169c05a6eSGreg Roach<?php 269c05a6eSGreg Roach 369c05a6eSGreg Roach/** 469c05a6eSGreg Roach * webtrees: online genealogy 5*dec352c1SGreg Roach * Copyright (C) 2020 webtrees development team 669c05a6eSGreg Roach * This program is free software: you can redistribute it and/or modify 769c05a6eSGreg Roach * it under the terms of the GNU General Public License as published by 869c05a6eSGreg Roach * the Free Software Foundation, either version 3 of the License, or 969c05a6eSGreg Roach * (at your option) any later version. 1069c05a6eSGreg Roach * This program is distributed in the hope that it will be useful, 1169c05a6eSGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 1269c05a6eSGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1369c05a6eSGreg Roach * GNU General Public License for more details. 1469c05a6eSGreg Roach * You should have received a copy of the GNU General Public License 1569c05a6eSGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 1669c05a6eSGreg Roach */ 1769c05a6eSGreg Roach 1869c05a6eSGreg Roachdeclare(strict_types=1); 1969c05a6eSGreg Roach 2069c05a6eSGreg Roachnamespace Fisharebest\Webtrees\Services; 2169c05a6eSGreg Roach 2269c05a6eSGreg Roachuse Fisharebest\Webtrees\Auth; 2369c05a6eSGreg Roachuse Fisharebest\Webtrees\Cache; 2469c05a6eSGreg Roachuse Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 2569c05a6eSGreg Roachuse Fisharebest\Webtrees\Factory; 2669c05a6eSGreg Roachuse Fisharebest\Webtrees\Gedcom; 2769c05a6eSGreg Roachuse Fisharebest\Webtrees\GedcomRecord; 2869c05a6eSGreg Roachuse Fisharebest\Webtrees\Header; 2969c05a6eSGreg Roachuse Fisharebest\Webtrees\Tree; 3069c05a6eSGreg Roachuse Fisharebest\Webtrees\Webtrees; 3169c05a6eSGreg Roachuse Illuminate\Database\Capsule\Manager as DB; 3269c05a6eSGreg Roachuse Illuminate\Database\Query\Builder; 3369c05a6eSGreg Roachuse Illuminate\Database\Query\Expression; 3469c05a6eSGreg Roachuse Illuminate\Support\Collection; 3569c05a6eSGreg Roach 3669c05a6eSGreg Roachuse function app; 3769c05a6eSGreg Roachuse function assert; 3869c05a6eSGreg Roachuse function date; 3969c05a6eSGreg Roachuse function explode; 4069c05a6eSGreg Roachuse function fwrite; 4169c05a6eSGreg Roachuse function mb_convert_encoding; 4269c05a6eSGreg Roachuse function pathinfo; 43*dec352c1SGreg Roachuse function str_contains; 44*dec352c1SGreg Roachuse function str_starts_with; 4569c05a6eSGreg Roachuse function strpos; 4669c05a6eSGreg Roachuse function strtolower; 4769c05a6eSGreg Roachuse function strtoupper; 4869c05a6eSGreg Roachuse function utf8_decode; 4969c05a6eSGreg Roach 5069c05a6eSGreg Roachuse const PATHINFO_EXTENSION; 5169c05a6eSGreg Roach 5269c05a6eSGreg Roach/** 5369c05a6eSGreg Roach * Export data in GEDCOM format 5469c05a6eSGreg Roach */ 5569c05a6eSGreg Roachclass GedcomExportService 5669c05a6eSGreg Roach{ 5769c05a6eSGreg Roach /** 5869c05a6eSGreg Roach * Write GEDCOM data to a stream. 5969c05a6eSGreg Roach * 6069c05a6eSGreg Roach * @param Tree $tree - Export data from this tree 6169c05a6eSGreg Roach * @param resource $stream - Write to this stream 6269c05a6eSGreg Roach * @param bool $sort_by_xref - Write GEDCOM records in XREF order 6369c05a6eSGreg Roach * @param string $encoding - Convert from UTF-8 to other encoding 6469c05a6eSGreg Roach * @param int $access_level - Apply privacy filtering 6569c05a6eSGreg Roach * @param string $media_path - Prepend path to media filenames 6669c05a6eSGreg Roach * @param Collection<string>|null $records - Just export these records 6769c05a6eSGreg Roach */ 6869c05a6eSGreg Roach public function export( 6969c05a6eSGreg Roach Tree $tree, 7069c05a6eSGreg Roach $stream, 7169c05a6eSGreg Roach bool $sort_by_xref = false, 7269c05a6eSGreg Roach string $encoding = 'UTF-8', 7369c05a6eSGreg Roach int $access_level = Auth::PRIV_HIDE, 7469c05a6eSGreg Roach string $media_path = '', 7569c05a6eSGreg Roach Collection $records = null 7669c05a6eSGreg Roach ): void { 7769c05a6eSGreg Roach if ($records instanceof Collection) { 7869c05a6eSGreg Roach // Export just these records - e.g. from clippings cart. 7969c05a6eSGreg Roach $data = [ 8069c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, false)]), 8169c05a6eSGreg Roach $records, 8269c05a6eSGreg Roach new Collection(['0 TRLR']), 8369c05a6eSGreg Roach ]; 8469c05a6eSGreg Roach } elseif ($access_level === Auth::PRIV_HIDE) { 8569c05a6eSGreg Roach // If we will be applying privacy filters, then we will need the GEDCOM record objects. 8669c05a6eSGreg Roach $data = [ 8769c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, true)]), 8869c05a6eSGreg Roach $this->individualQuery($tree, $sort_by_xref)->cursor(), 8969c05a6eSGreg Roach $this->familyQuery($tree, $sort_by_xref)->cursor(), 9069c05a6eSGreg Roach $this->sourceQuery($tree, $sort_by_xref)->cursor(), 9169c05a6eSGreg Roach $this->otherQuery($tree, $sort_by_xref)->cursor(), 9269c05a6eSGreg Roach $this->mediaQuery($tree, $sort_by_xref)->cursor(), 9369c05a6eSGreg Roach new Collection(['0 TRLR']), 9469c05a6eSGreg Roach ]; 9569c05a6eSGreg Roach } else { 9669c05a6eSGreg Roach // Disable the pending changes before creating GEDCOM records. 9769c05a6eSGreg Roach $cache = app('cache.array'); 9869c05a6eSGreg Roach assert($cache instanceof Cache); 9969c05a6eSGreg Roach $cache->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 10069c05a6eSGreg Roach return new Collection(); 10169c05a6eSGreg Roach }); 10269c05a6eSGreg Roach 10369c05a6eSGreg Roach $data = [ 10469c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, true)]), 10569c05a6eSGreg Roach $this->individualQuery($tree, $sort_by_xref)->get()->map(Factory::individual()->mapper($tree)), 10669c05a6eSGreg Roach $this->familyQuery($tree, $sort_by_xref)->get()->map(Factory::family()->mapper($tree)), 10769c05a6eSGreg Roach $this->sourceQuery($tree, $sort_by_xref)->get()->map(Factory::source()->mapper($tree)), 10869c05a6eSGreg Roach $this->otherQuery($tree, $sort_by_xref)->get()->map(Factory::gedcomRecord()->mapper($tree)), 10969c05a6eSGreg Roach $this->mediaQuery($tree, $sort_by_xref)->get()->map(Factory::media()->mapper($tree)), 11069c05a6eSGreg Roach new Collection(['0 TRLR']), 11169c05a6eSGreg Roach ]; 11269c05a6eSGreg Roach } 11369c05a6eSGreg Roach 11469c05a6eSGreg Roach foreach ($data as $rows) { 11569c05a6eSGreg Roach foreach ($rows as $datum) { 11669c05a6eSGreg Roach if (is_string($datum)) { 11769c05a6eSGreg Roach $gedcom = $datum; 11869c05a6eSGreg Roach } elseif ($datum instanceof GedcomRecord) { 11969c05a6eSGreg Roach $gedcom = $datum->privatizeGedcom($access_level); 12069c05a6eSGreg Roach } else { 121813bb733SGreg Roach $gedcom = 122813bb733SGreg Roach $datum->i_gedcom ?? 123813bb733SGreg Roach $datum->f_gedcom ?? 124813bb733SGreg Roach $datum->s_gedcom ?? 125813bb733SGreg Roach $datum->m_gedcom ?? 126813bb733SGreg Roach $datum->o_gedcom; 12769c05a6eSGreg Roach } 12869c05a6eSGreg Roach 12969c05a6eSGreg Roach if ($media_path !== '') { 13069c05a6eSGreg Roach $gedcom = $this->convertMediaPath($gedcom, $media_path); 13169c05a6eSGreg Roach } 13269c05a6eSGreg Roach 13369c05a6eSGreg Roach $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 13469c05a6eSGreg Roach $gedcom = $this->convertEncoding($encoding, $gedcom); 13569c05a6eSGreg Roach 13669c05a6eSGreg Roach fwrite($stream, $gedcom); 13769c05a6eSGreg Roach } 13869c05a6eSGreg Roach } 13969c05a6eSGreg Roach } 14069c05a6eSGreg Roach 14169c05a6eSGreg Roach /** 14269c05a6eSGreg Roach * Create a header record for a gedcom file. 14369c05a6eSGreg Roach * 14469c05a6eSGreg Roach * @param Tree $tree 14569c05a6eSGreg Roach * @param string $encoding 14669c05a6eSGreg Roach * @param bool $include_sub 14769c05a6eSGreg Roach * 14869c05a6eSGreg Roach * @return string 14969c05a6eSGreg Roach */ 15069c05a6eSGreg Roach public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 15169c05a6eSGreg Roach { 15269c05a6eSGreg Roach // Force a ".ged" suffix 15369c05a6eSGreg Roach $filename = $tree->name(); 15469c05a6eSGreg Roach 15569c05a6eSGreg Roach if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 15669c05a6eSGreg Roach $filename .= '.ged'; 15769c05a6eSGreg Roach } 15869c05a6eSGreg Roach 15969c05a6eSGreg Roach // Build a new header record 16069c05a6eSGreg Roach $gedcom = '0 HEAD'; 16169c05a6eSGreg Roach $gedcom .= "\n1 SOUR " . Webtrees::NAME; 16269c05a6eSGreg Roach $gedcom .= "\n2 NAME " . Webtrees::NAME; 16369c05a6eSGreg Roach $gedcom .= "\n2 VERS " . Webtrees::VERSION; 16469c05a6eSGreg Roach $gedcom .= "\n1 DEST DISKETTE"; 16569c05a6eSGreg Roach $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 16669c05a6eSGreg Roach $gedcom .= "\n2 TIME " . date('H:i:s'); 16769c05a6eSGreg Roach $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked"; 16869c05a6eSGreg Roach $gedcom .= "\n1 CHAR " . $encoding; 16969c05a6eSGreg Roach $gedcom .= "\n1 FILE " . $filename; 17069c05a6eSGreg Roach 17169c05a6eSGreg Roach // Preserve some values from the original header 17269c05a6eSGreg Roach $header = Factory::header()->make('HEAD', $tree) ?? Factory::header()->new('HEAD', '0 HEAD', null, $tree); 17369c05a6eSGreg Roach 17469c05a6eSGreg Roach foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 17569c05a6eSGreg Roach $gedcom .= "\n" . $fact->gedcom(); 17669c05a6eSGreg Roach } 17769c05a6eSGreg Roach 17869c05a6eSGreg Roach if ($include_sub) { 17969c05a6eSGreg Roach foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 18069c05a6eSGreg Roach $gedcom .= "\n" . $fact->gedcom(); 18169c05a6eSGreg Roach } 18269c05a6eSGreg Roach } 18369c05a6eSGreg Roach 18469c05a6eSGreg Roach return $gedcom; 18569c05a6eSGreg Roach } 18669c05a6eSGreg Roach 18769c05a6eSGreg Roach /** 18869c05a6eSGreg Roach * Prepend a media path, such as might have been removed during import. 18969c05a6eSGreg Roach * 19069c05a6eSGreg Roach * @param string $gedcom 19169c05a6eSGreg Roach * @param string $media_path 19269c05a6eSGreg Roach * 19369c05a6eSGreg Roach * @return string 19469c05a6eSGreg Roach */ 19569c05a6eSGreg Roach private function convertMediaPath(string $gedcom, string $media_path): string 19669c05a6eSGreg Roach { 19769c05a6eSGreg Roach if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 19869c05a6eSGreg Roach return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 19969c05a6eSGreg Roach $filename = $match[1]; 20069c05a6eSGreg Roach 20169c05a6eSGreg Roach // Convert separators to match new path. 202*dec352c1SGreg Roach if (str_contains($media_path, '\\')) { 20369c05a6eSGreg Roach $filename = strtr($filename, ['/' => '\\']); 20469c05a6eSGreg Roach } 20569c05a6eSGreg Roach 206*dec352c1SGreg Roach if (!str_starts_with($filename, $media_path)) { 20769c05a6eSGreg Roach return $media_path . $filename; 20869c05a6eSGreg Roach } 20969c05a6eSGreg Roach 21069c05a6eSGreg Roach return $filename; 21169c05a6eSGreg Roach }, $gedcom); 21269c05a6eSGreg Roach } 21369c05a6eSGreg Roach 21469c05a6eSGreg Roach return $gedcom; 21569c05a6eSGreg Roach } 21669c05a6eSGreg Roach 21769c05a6eSGreg Roach /** 21869c05a6eSGreg Roach * @param string $encoding 21969c05a6eSGreg Roach * @param string $gedcom 22069c05a6eSGreg Roach * 22169c05a6eSGreg Roach * @return string 22269c05a6eSGreg Roach */ 22369c05a6eSGreg Roach private function convertEncoding(string $encoding, string $gedcom): string 22469c05a6eSGreg Roach { 22569c05a6eSGreg Roach switch ($encoding) { 22669c05a6eSGreg Roach case 'ANSI': 22769c05a6eSGreg Roach // Many desktop applications interpret ANSI as ISO-8859-1 22869c05a6eSGreg Roach return utf8_decode($gedcom); 22969c05a6eSGreg Roach 23069c05a6eSGreg Roach case 'ANSEL': 23169c05a6eSGreg Roach // coming soon...? 23269c05a6eSGreg Roach case 'ASCII': 23369c05a6eSGreg Roach // Might be needed by really old software? 23469c05a6eSGreg Roach return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 23569c05a6eSGreg Roach 23669c05a6eSGreg Roach default: 23769c05a6eSGreg Roach return $gedcom; 23869c05a6eSGreg Roach } 23969c05a6eSGreg Roach } 24069c05a6eSGreg Roach 24169c05a6eSGreg Roach /** 24269c05a6eSGreg Roach * Wrap long lines using concatenation records. 24369c05a6eSGreg Roach * 24469c05a6eSGreg Roach * @param string $gedcom 24569c05a6eSGreg Roach * @param int $max_line_length 24669c05a6eSGreg Roach * 24769c05a6eSGreg Roach * @return string 24869c05a6eSGreg Roach */ 24969c05a6eSGreg Roach public function wrapLongLines(string $gedcom, int $max_line_length): string 25069c05a6eSGreg Roach { 25169c05a6eSGreg Roach $lines = []; 25269c05a6eSGreg Roach 25369c05a6eSGreg Roach foreach (explode("\n", $gedcom) as $line) { 25469c05a6eSGreg Roach // Split long lines 25569c05a6eSGreg Roach // The total length of a GEDCOM line, including level number, cross-reference number, 25669c05a6eSGreg Roach // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 25769c05a6eSGreg Roach if (mb_strlen($line) > $max_line_length) { 25869c05a6eSGreg Roach [$level, $tag] = explode(' ', $line, 3); 25969c05a6eSGreg Roach if ($tag !== 'CONT') { 26069c05a6eSGreg Roach $level++; 26169c05a6eSGreg Roach } 26269c05a6eSGreg Roach do { 26369c05a6eSGreg Roach // Split after $pos chars 26469c05a6eSGreg Roach $pos = $max_line_length; 26569c05a6eSGreg Roach // Split on a non-space (standard gedcom behavior) 26669c05a6eSGreg Roach while (mb_substr($line, $pos - 1, 1) === ' ') { 26769c05a6eSGreg Roach --$pos; 26869c05a6eSGreg Roach } 26969c05a6eSGreg Roach if ($pos === strpos($line, ' ', 3)) { 27069c05a6eSGreg Roach // No non-spaces in the data! Can’t split it :-( 27169c05a6eSGreg Roach break; 27269c05a6eSGreg Roach } 27369c05a6eSGreg Roach $lines[] = mb_substr($line, 0, $pos); 27469c05a6eSGreg Roach $line = $level . ' CONC ' . mb_substr($line, $pos); 27569c05a6eSGreg Roach } while (mb_strlen($line) > $max_line_length); 27669c05a6eSGreg Roach } 27769c05a6eSGreg Roach $lines[] = $line; 27869c05a6eSGreg Roach } 27969c05a6eSGreg Roach 28069c05a6eSGreg Roach return implode(Gedcom::EOL, $lines); 28169c05a6eSGreg Roach } 28269c05a6eSGreg Roach 28369c05a6eSGreg Roach /** 28469c05a6eSGreg Roach * @param Tree $tree 28569c05a6eSGreg Roach * @param bool $sort_by_xref 28669c05a6eSGreg Roach * 28769c05a6eSGreg Roach * @return Builder 28869c05a6eSGreg Roach */ 28969c05a6eSGreg Roach private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 29069c05a6eSGreg Roach { 29169c05a6eSGreg Roach $query = DB::table('families') 29269c05a6eSGreg Roach ->where('f_file', '=', $tree->id()) 293813bb733SGreg Roach ->select(['f_gedcom', 'f_id']); 29469c05a6eSGreg Roach 29569c05a6eSGreg Roach 29669c05a6eSGreg Roach if ($sort_by_xref) { 29769c05a6eSGreg Roach $query 29869c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(f_id)')) 29969c05a6eSGreg Roach ->orderBy('f_id'); 30069c05a6eSGreg Roach } 30169c05a6eSGreg Roach 30269c05a6eSGreg Roach return $query; 30369c05a6eSGreg Roach } 30469c05a6eSGreg Roach 30569c05a6eSGreg Roach /** 30669c05a6eSGreg Roach * @param Tree $tree 30769c05a6eSGreg Roach * @param bool $sort_by_xref 30869c05a6eSGreg Roach * 30969c05a6eSGreg Roach * @return Builder 31069c05a6eSGreg Roach */ 31169c05a6eSGreg Roach private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 31269c05a6eSGreg Roach { 31369c05a6eSGreg Roach $query = DB::table('individuals') 31469c05a6eSGreg Roach ->where('i_file', '=', $tree->id()) 315813bb733SGreg Roach ->select(['i_gedcom', 'i_id']); 31669c05a6eSGreg Roach 31769c05a6eSGreg Roach if ($sort_by_xref) { 31869c05a6eSGreg Roach $query 31969c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(i_id)')) 32069c05a6eSGreg Roach ->orderBy('i_id'); 32169c05a6eSGreg Roach } 32269c05a6eSGreg Roach 32369c05a6eSGreg Roach return $query; 32469c05a6eSGreg Roach } 32569c05a6eSGreg Roach 32669c05a6eSGreg Roach /** 32769c05a6eSGreg Roach * @param Tree $tree 32869c05a6eSGreg Roach * @param bool $sort_by_xref 32969c05a6eSGreg Roach * 33069c05a6eSGreg Roach * @return Builder 33169c05a6eSGreg Roach */ 33269c05a6eSGreg Roach private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 33369c05a6eSGreg Roach { 33469c05a6eSGreg Roach $query = DB::table('sources') 33569c05a6eSGreg Roach ->where('s_file', '=', $tree->id()) 336813bb733SGreg Roach ->select(['s_gedcom', 's_id']); 33769c05a6eSGreg Roach 33869c05a6eSGreg Roach if ($sort_by_xref) { 33969c05a6eSGreg Roach $query 34069c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(s_id)')) 34169c05a6eSGreg Roach ->orderBy('s_id'); 34269c05a6eSGreg Roach } 34369c05a6eSGreg Roach 34469c05a6eSGreg Roach return $query; 34569c05a6eSGreg Roach } 34669c05a6eSGreg Roach 34769c05a6eSGreg Roach /** 34869c05a6eSGreg Roach * @param Tree $tree 34969c05a6eSGreg Roach * @param bool $sort_by_xref 35069c05a6eSGreg Roach * 35169c05a6eSGreg Roach * @return Builder 35269c05a6eSGreg Roach */ 35369c05a6eSGreg Roach private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 35469c05a6eSGreg Roach { 35569c05a6eSGreg Roach $query = DB::table('media') 35669c05a6eSGreg Roach ->where('m_file', '=', $tree->id()) 357813bb733SGreg Roach ->select(['m_gedcom', 'm_id']); 35869c05a6eSGreg Roach 35969c05a6eSGreg Roach if ($sort_by_xref) { 36069c05a6eSGreg Roach $query 36169c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(m_id)')) 36269c05a6eSGreg Roach ->orderBy('m_id'); 36369c05a6eSGreg Roach } 36469c05a6eSGreg Roach 36569c05a6eSGreg Roach return $query; 36669c05a6eSGreg Roach } 36769c05a6eSGreg Roach 36869c05a6eSGreg Roach /** 36969c05a6eSGreg Roach * @param Tree $tree 37069c05a6eSGreg Roach * @param bool $sort_by_xref 37169c05a6eSGreg Roach * 37269c05a6eSGreg Roach * @return Builder 37369c05a6eSGreg Roach */ 37469c05a6eSGreg Roach private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 37569c05a6eSGreg Roach { 37669c05a6eSGreg Roach $query = DB::table('other') 37769c05a6eSGreg Roach ->where('o_file', '=', $tree->id()) 37869c05a6eSGreg Roach ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 379813bb733SGreg Roach ->select(['o_gedcom', 'o_id']); 38069c05a6eSGreg Roach 38169c05a6eSGreg Roach if ($sort_by_xref) { 38269c05a6eSGreg Roach $query 38369c05a6eSGreg Roach ->orderBy('o_type') 38469c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(o_id)')) 38569c05a6eSGreg Roach ->orderBy('o_id'); 38669c05a6eSGreg Roach } 38769c05a6eSGreg Roach 38869c05a6eSGreg Roach return $query; 38969c05a6eSGreg Roach } 39069c05a6eSGreg Roach} 391