169c05a6eSGreg Roach<?php 269c05a6eSGreg Roach 369c05a6eSGreg Roach/** 469c05a6eSGreg Roach * webtrees: online genealogy 589f7189bSGreg Roach * Copyright (C) 2021 webtrees development team 669c05a6eSGreg Roach * This program is free software: you can redistribute it and/or modify 769c05a6eSGreg Roach * it under the terms of the GNU General Public License as published by 869c05a6eSGreg Roach * the Free Software Foundation, either version 3 of the License, or 969c05a6eSGreg Roach * (at your option) any later version. 1069c05a6eSGreg Roach * This program is distributed in the hope that it will be useful, 1169c05a6eSGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 1269c05a6eSGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1369c05a6eSGreg Roach * GNU General Public License for more details. 1469c05a6eSGreg Roach * You should have received a copy of the GNU General Public License 1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 1669c05a6eSGreg Roach */ 1769c05a6eSGreg Roach 1869c05a6eSGreg Roachdeclare(strict_types=1); 1969c05a6eSGreg Roach 2069c05a6eSGreg Roachnamespace Fisharebest\Webtrees\Services; 2169c05a6eSGreg Roach 2269c05a6eSGreg Roachuse Fisharebest\Webtrees\Auth; 2369c05a6eSGreg Roachuse Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 246b9cb339SGreg Roachuse Fisharebest\Webtrees\Registry; 2569c05a6eSGreg Roachuse Fisharebest\Webtrees\Gedcom; 2669c05a6eSGreg Roachuse Fisharebest\Webtrees\GedcomRecord; 2769c05a6eSGreg Roachuse Fisharebest\Webtrees\Header; 2869c05a6eSGreg Roachuse Fisharebest\Webtrees\Tree; 2969c05a6eSGreg Roachuse Fisharebest\Webtrees\Webtrees; 3069c05a6eSGreg Roachuse Illuminate\Database\Capsule\Manager as DB; 3169c05a6eSGreg Roachuse Illuminate\Database\Query\Builder; 3269c05a6eSGreg Roachuse Illuminate\Database\Query\Expression; 3369c05a6eSGreg Roachuse Illuminate\Support\Collection; 3469c05a6eSGreg Roach 3569c05a6eSGreg Roachuse function date; 3669c05a6eSGreg Roachuse function explode; 3769c05a6eSGreg Roachuse function fwrite; 3869c05a6eSGreg Roachuse function mb_convert_encoding; 3969c05a6eSGreg Roachuse function pathinfo; 40dec352c1SGreg Roachuse function str_contains; 41dec352c1SGreg Roachuse function str_starts_with; 4269c05a6eSGreg Roachuse function strpos; 4369c05a6eSGreg Roachuse function strtolower; 4469c05a6eSGreg Roachuse function strtoupper; 4569c05a6eSGreg Roachuse function utf8_decode; 4669c05a6eSGreg Roach 4769c05a6eSGreg Roachuse const PATHINFO_EXTENSION; 4869c05a6eSGreg Roach 4969c05a6eSGreg Roach/** 5069c05a6eSGreg Roach * Export data in GEDCOM format 5169c05a6eSGreg Roach */ 5269c05a6eSGreg Roachclass GedcomExportService 5369c05a6eSGreg Roach{ 5469c05a6eSGreg Roach /** 5569c05a6eSGreg Roach * Write GEDCOM data to a stream. 5669c05a6eSGreg Roach * 5769c05a6eSGreg Roach * @param Tree $tree - Export data from this tree 5869c05a6eSGreg Roach * @param resource $stream - Write to this stream 5969c05a6eSGreg Roach * @param bool $sort_by_xref - Write GEDCOM records in XREF order 6069c05a6eSGreg Roach * @param string $encoding - Convert from UTF-8 to other encoding 6169c05a6eSGreg Roach * @param int $access_level - Apply privacy filtering 6269c05a6eSGreg Roach * @param string $media_path - Prepend path to media filenames 6369c05a6eSGreg Roach * @param Collection<string>|null $records - Just export these records 6469c05a6eSGreg Roach */ 6569c05a6eSGreg Roach public function export( 6669c05a6eSGreg Roach Tree $tree, 6769c05a6eSGreg Roach $stream, 6869c05a6eSGreg Roach bool $sort_by_xref = false, 6969c05a6eSGreg Roach string $encoding = 'UTF-8', 7069c05a6eSGreg Roach int $access_level = Auth::PRIV_HIDE, 7169c05a6eSGreg Roach string $media_path = '', 7269c05a6eSGreg Roach Collection $records = null 7369c05a6eSGreg Roach ): void { 7469c05a6eSGreg Roach if ($records instanceof Collection) { 7569c05a6eSGreg Roach // Export just these records - e.g. from clippings cart. 7669c05a6eSGreg Roach $data = [ 7769c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, false)]), 7869c05a6eSGreg Roach $records, 7969c05a6eSGreg Roach new Collection(['0 TRLR']), 8069c05a6eSGreg Roach ]; 8169c05a6eSGreg Roach } elseif ($access_level === Auth::PRIV_HIDE) { 8269c05a6eSGreg Roach // If we will be applying privacy filters, then we will need the GEDCOM record objects. 8369c05a6eSGreg Roach $data = [ 8469c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, true)]), 8569c05a6eSGreg Roach $this->individualQuery($tree, $sort_by_xref)->cursor(), 8669c05a6eSGreg Roach $this->familyQuery($tree, $sort_by_xref)->cursor(), 8769c05a6eSGreg Roach $this->sourceQuery($tree, $sort_by_xref)->cursor(), 8869c05a6eSGreg Roach $this->otherQuery($tree, $sort_by_xref)->cursor(), 8969c05a6eSGreg Roach $this->mediaQuery($tree, $sort_by_xref)->cursor(), 9069c05a6eSGreg Roach new Collection(['0 TRLR']), 9169c05a6eSGreg Roach ]; 9269c05a6eSGreg Roach } else { 9369c05a6eSGreg Roach // Disable the pending changes before creating GEDCOM records. 946b9cb339SGreg Roach Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 9569c05a6eSGreg Roach return new Collection(); 9669c05a6eSGreg Roach }); 9769c05a6eSGreg Roach 9869c05a6eSGreg Roach $data = [ 9969c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, true)]), 1006b9cb339SGreg Roach $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), 1016b9cb339SGreg Roach $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), 1026b9cb339SGreg Roach $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), 1036b9cb339SGreg Roach $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), 1046b9cb339SGreg Roach $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), 10569c05a6eSGreg Roach new Collection(['0 TRLR']), 10669c05a6eSGreg Roach ]; 10769c05a6eSGreg Roach } 10869c05a6eSGreg Roach 10969c05a6eSGreg Roach foreach ($data as $rows) { 11069c05a6eSGreg Roach foreach ($rows as $datum) { 11169c05a6eSGreg Roach if (is_string($datum)) { 11269c05a6eSGreg Roach $gedcom = $datum; 11369c05a6eSGreg Roach } elseif ($datum instanceof GedcomRecord) { 11469c05a6eSGreg Roach $gedcom = $datum->privatizeGedcom($access_level); 11569c05a6eSGreg Roach } else { 116813bb733SGreg Roach $gedcom = 117813bb733SGreg Roach $datum->i_gedcom ?? 118813bb733SGreg Roach $datum->f_gedcom ?? 119813bb733SGreg Roach $datum->s_gedcom ?? 120813bb733SGreg Roach $datum->m_gedcom ?? 121813bb733SGreg Roach $datum->o_gedcom; 12269c05a6eSGreg Roach } 12369c05a6eSGreg Roach 12469c05a6eSGreg Roach if ($media_path !== '') { 12569c05a6eSGreg Roach $gedcom = $this->convertMediaPath($gedcom, $media_path); 12669c05a6eSGreg Roach } 12769c05a6eSGreg Roach 12869c05a6eSGreg Roach $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 12969c05a6eSGreg Roach $gedcom = $this->convertEncoding($encoding, $gedcom); 13069c05a6eSGreg Roach 13169c05a6eSGreg Roach fwrite($stream, $gedcom); 13269c05a6eSGreg Roach } 13369c05a6eSGreg Roach } 13469c05a6eSGreg Roach } 13569c05a6eSGreg Roach 13669c05a6eSGreg Roach /** 13769c05a6eSGreg Roach * Create a header record for a gedcom file. 13869c05a6eSGreg Roach * 13969c05a6eSGreg Roach * @param Tree $tree 14069c05a6eSGreg Roach * @param string $encoding 14169c05a6eSGreg Roach * @param bool $include_sub 14269c05a6eSGreg Roach * 14369c05a6eSGreg Roach * @return string 14469c05a6eSGreg Roach */ 14569c05a6eSGreg Roach public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 14669c05a6eSGreg Roach { 14769c05a6eSGreg Roach // Force a ".ged" suffix 14869c05a6eSGreg Roach $filename = $tree->name(); 14969c05a6eSGreg Roach 15069c05a6eSGreg Roach if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 15169c05a6eSGreg Roach $filename .= '.ged'; 15269c05a6eSGreg Roach } 15369c05a6eSGreg Roach 15469c05a6eSGreg Roach // Build a new header record 15569c05a6eSGreg Roach $gedcom = '0 HEAD'; 15669c05a6eSGreg Roach $gedcom .= "\n1 SOUR " . Webtrees::NAME; 15769c05a6eSGreg Roach $gedcom .= "\n2 NAME " . Webtrees::NAME; 15869c05a6eSGreg Roach $gedcom .= "\n2 VERS " . Webtrees::VERSION; 15969c05a6eSGreg Roach $gedcom .= "\n1 DEST DISKETTE"; 16069c05a6eSGreg Roach $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 16169c05a6eSGreg Roach $gedcom .= "\n2 TIME " . date('H:i:s'); 162*88a91440SGreg Roach $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; 16369c05a6eSGreg Roach $gedcom .= "\n1 CHAR " . $encoding; 16469c05a6eSGreg Roach $gedcom .= "\n1 FILE " . $filename; 16569c05a6eSGreg Roach 16669c05a6eSGreg Roach // Preserve some values from the original header 1676b9cb339SGreg Roach $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); 16869c05a6eSGreg Roach 16969c05a6eSGreg Roach foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 17069c05a6eSGreg Roach $gedcom .= "\n" . $fact->gedcom(); 17169c05a6eSGreg Roach } 17269c05a6eSGreg Roach 17369c05a6eSGreg Roach if ($include_sub) { 17469c05a6eSGreg Roach foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 17569c05a6eSGreg Roach $gedcom .= "\n" . $fact->gedcom(); 17669c05a6eSGreg Roach } 17769c05a6eSGreg Roach } 17869c05a6eSGreg Roach 17969c05a6eSGreg Roach return $gedcom; 18069c05a6eSGreg Roach } 18169c05a6eSGreg Roach 18269c05a6eSGreg Roach /** 18369c05a6eSGreg Roach * Prepend a media path, such as might have been removed during import. 18469c05a6eSGreg Roach * 18569c05a6eSGreg Roach * @param string $gedcom 18669c05a6eSGreg Roach * @param string $media_path 18769c05a6eSGreg Roach * 18869c05a6eSGreg Roach * @return string 18969c05a6eSGreg Roach */ 19069c05a6eSGreg Roach private function convertMediaPath(string $gedcom, string $media_path): string 19169c05a6eSGreg Roach { 19269c05a6eSGreg Roach if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 19369c05a6eSGreg Roach return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 19469c05a6eSGreg Roach $filename = $match[1]; 19569c05a6eSGreg Roach 196290708e9SGreg Roach // Don’t modify external links 197290708e9SGreg Roach if (!str_contains($filename, '://')) { 19869c05a6eSGreg Roach // Convert separators to match new path. 199dec352c1SGreg Roach if (str_contains($media_path, '\\')) { 20069c05a6eSGreg Roach $filename = strtr($filename, ['/' => '\\']); 20169c05a6eSGreg Roach } 20269c05a6eSGreg Roach 203dec352c1SGreg Roach if (!str_starts_with($filename, $media_path)) { 204290708e9SGreg Roach $filename = $media_path . $filename; 205290708e9SGreg Roach } 20669c05a6eSGreg Roach } 20769c05a6eSGreg Roach 208290708e9SGreg Roach return "\n1 FILE " . $filename; 20969c05a6eSGreg Roach }, $gedcom); 21069c05a6eSGreg Roach } 21169c05a6eSGreg Roach 21269c05a6eSGreg Roach return $gedcom; 21369c05a6eSGreg Roach } 21469c05a6eSGreg Roach 21569c05a6eSGreg Roach /** 21669c05a6eSGreg Roach * @param string $encoding 21769c05a6eSGreg Roach * @param string $gedcom 21869c05a6eSGreg Roach * 21969c05a6eSGreg Roach * @return string 22069c05a6eSGreg Roach */ 22169c05a6eSGreg Roach private function convertEncoding(string $encoding, string $gedcom): string 22269c05a6eSGreg Roach { 22369c05a6eSGreg Roach switch ($encoding) { 22469c05a6eSGreg Roach case 'ANSI': 22569c05a6eSGreg Roach // Many desktop applications interpret ANSI as ISO-8859-1 22669c05a6eSGreg Roach return utf8_decode($gedcom); 22769c05a6eSGreg Roach 22869c05a6eSGreg Roach case 'ANSEL': 22969c05a6eSGreg Roach // coming soon...? 23069c05a6eSGreg Roach case 'ASCII': 23169c05a6eSGreg Roach // Might be needed by really old software? 23269c05a6eSGreg Roach return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 23369c05a6eSGreg Roach 23469c05a6eSGreg Roach default: 23569c05a6eSGreg Roach return $gedcom; 23669c05a6eSGreg Roach } 23769c05a6eSGreg Roach } 23869c05a6eSGreg Roach 23969c05a6eSGreg Roach /** 24069c05a6eSGreg Roach * Wrap long lines using concatenation records. 24169c05a6eSGreg Roach * 24269c05a6eSGreg Roach * @param string $gedcom 24369c05a6eSGreg Roach * @param int $max_line_length 24469c05a6eSGreg Roach * 24569c05a6eSGreg Roach * @return string 24669c05a6eSGreg Roach */ 24769c05a6eSGreg Roach public function wrapLongLines(string $gedcom, int $max_line_length): string 24869c05a6eSGreg Roach { 24969c05a6eSGreg Roach $lines = []; 25069c05a6eSGreg Roach 25169c05a6eSGreg Roach foreach (explode("\n", $gedcom) as $line) { 25269c05a6eSGreg Roach // Split long lines 25369c05a6eSGreg Roach // The total length of a GEDCOM line, including level number, cross-reference number, 25469c05a6eSGreg Roach // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 25569c05a6eSGreg Roach if (mb_strlen($line) > $max_line_length) { 25669c05a6eSGreg Roach [$level, $tag] = explode(' ', $line, 3); 25769c05a6eSGreg Roach if ($tag !== 'CONT') { 25869c05a6eSGreg Roach $level++; 25969c05a6eSGreg Roach } 26069c05a6eSGreg Roach do { 26169c05a6eSGreg Roach // Split after $pos chars 26269c05a6eSGreg Roach $pos = $max_line_length; 26369c05a6eSGreg Roach // Split on a non-space (standard gedcom behavior) 26469c05a6eSGreg Roach while (mb_substr($line, $pos - 1, 1) === ' ') { 26569c05a6eSGreg Roach --$pos; 26669c05a6eSGreg Roach } 26769c05a6eSGreg Roach if ($pos === strpos($line, ' ', 3)) { 26869c05a6eSGreg Roach // No non-spaces in the data! Can’t split it :-( 26969c05a6eSGreg Roach break; 27069c05a6eSGreg Roach } 27169c05a6eSGreg Roach $lines[] = mb_substr($line, 0, $pos); 27269c05a6eSGreg Roach $line = $level . ' CONC ' . mb_substr($line, $pos); 27369c05a6eSGreg Roach } while (mb_strlen($line) > $max_line_length); 27469c05a6eSGreg Roach } 27569c05a6eSGreg Roach $lines[] = $line; 27669c05a6eSGreg Roach } 27769c05a6eSGreg Roach 27869c05a6eSGreg Roach return implode(Gedcom::EOL, $lines); 27969c05a6eSGreg Roach } 28069c05a6eSGreg Roach 28169c05a6eSGreg Roach /** 28269c05a6eSGreg Roach * @param Tree $tree 28369c05a6eSGreg Roach * @param bool $sort_by_xref 28469c05a6eSGreg Roach * 28569c05a6eSGreg Roach * @return Builder 28669c05a6eSGreg Roach */ 28769c05a6eSGreg Roach private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 28869c05a6eSGreg Roach { 28969c05a6eSGreg Roach $query = DB::table('families') 29069c05a6eSGreg Roach ->where('f_file', '=', $tree->id()) 291813bb733SGreg Roach ->select(['f_gedcom', 'f_id']); 29269c05a6eSGreg Roach 29369c05a6eSGreg Roach 29469c05a6eSGreg Roach if ($sort_by_xref) { 29569c05a6eSGreg Roach $query 29669c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(f_id)')) 29769c05a6eSGreg Roach ->orderBy('f_id'); 29869c05a6eSGreg Roach } 29969c05a6eSGreg Roach 30069c05a6eSGreg Roach return $query; 30169c05a6eSGreg Roach } 30269c05a6eSGreg Roach 30369c05a6eSGreg Roach /** 30469c05a6eSGreg Roach * @param Tree $tree 30569c05a6eSGreg Roach * @param bool $sort_by_xref 30669c05a6eSGreg Roach * 30769c05a6eSGreg Roach * @return Builder 30869c05a6eSGreg Roach */ 30969c05a6eSGreg Roach private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 31069c05a6eSGreg Roach { 31169c05a6eSGreg Roach $query = DB::table('individuals') 31269c05a6eSGreg Roach ->where('i_file', '=', $tree->id()) 313813bb733SGreg Roach ->select(['i_gedcom', 'i_id']); 31469c05a6eSGreg Roach 31569c05a6eSGreg Roach if ($sort_by_xref) { 31669c05a6eSGreg Roach $query 31769c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(i_id)')) 31869c05a6eSGreg Roach ->orderBy('i_id'); 31969c05a6eSGreg Roach } 32069c05a6eSGreg Roach 32169c05a6eSGreg Roach return $query; 32269c05a6eSGreg Roach } 32369c05a6eSGreg Roach 32469c05a6eSGreg Roach /** 32569c05a6eSGreg Roach * @param Tree $tree 32669c05a6eSGreg Roach * @param bool $sort_by_xref 32769c05a6eSGreg Roach * 32869c05a6eSGreg Roach * @return Builder 32969c05a6eSGreg Roach */ 33069c05a6eSGreg Roach private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 33169c05a6eSGreg Roach { 33269c05a6eSGreg Roach $query = DB::table('sources') 33369c05a6eSGreg Roach ->where('s_file', '=', $tree->id()) 334813bb733SGreg Roach ->select(['s_gedcom', 's_id']); 33569c05a6eSGreg Roach 33669c05a6eSGreg Roach if ($sort_by_xref) { 33769c05a6eSGreg Roach $query 33869c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(s_id)')) 33969c05a6eSGreg Roach ->orderBy('s_id'); 34069c05a6eSGreg Roach } 34169c05a6eSGreg Roach 34269c05a6eSGreg Roach return $query; 34369c05a6eSGreg Roach } 34469c05a6eSGreg Roach 34569c05a6eSGreg Roach /** 34669c05a6eSGreg Roach * @param Tree $tree 34769c05a6eSGreg Roach * @param bool $sort_by_xref 34869c05a6eSGreg Roach * 34969c05a6eSGreg Roach * @return Builder 35069c05a6eSGreg Roach */ 35169c05a6eSGreg Roach private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 35269c05a6eSGreg Roach { 35369c05a6eSGreg Roach $query = DB::table('media') 35469c05a6eSGreg Roach ->where('m_file', '=', $tree->id()) 355813bb733SGreg Roach ->select(['m_gedcom', 'm_id']); 35669c05a6eSGreg Roach 35769c05a6eSGreg Roach if ($sort_by_xref) { 35869c05a6eSGreg Roach $query 35969c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(m_id)')) 36069c05a6eSGreg Roach ->orderBy('m_id'); 36169c05a6eSGreg Roach } 36269c05a6eSGreg Roach 36369c05a6eSGreg Roach return $query; 36469c05a6eSGreg Roach } 36569c05a6eSGreg Roach 36669c05a6eSGreg Roach /** 36769c05a6eSGreg Roach * @param Tree $tree 36869c05a6eSGreg Roach * @param bool $sort_by_xref 36969c05a6eSGreg Roach * 37069c05a6eSGreg Roach * @return Builder 37169c05a6eSGreg Roach */ 37269c05a6eSGreg Roach private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 37369c05a6eSGreg Roach { 37469c05a6eSGreg Roach $query = DB::table('other') 37569c05a6eSGreg Roach ->where('o_file', '=', $tree->id()) 37669c05a6eSGreg Roach ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 377813bb733SGreg Roach ->select(['o_gedcom', 'o_id']); 37869c05a6eSGreg Roach 37969c05a6eSGreg Roach if ($sort_by_xref) { 38069c05a6eSGreg Roach $query 38169c05a6eSGreg Roach ->orderBy('o_type') 38269c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(o_id)')) 38369c05a6eSGreg Roach ->orderBy('o_id'); 38469c05a6eSGreg Roach } 38569c05a6eSGreg Roach 38669c05a6eSGreg Roach return $query; 38769c05a6eSGreg Roach } 38869c05a6eSGreg Roach} 389