169c05a6eSGreg Roach<?php 269c05a6eSGreg Roach 369c05a6eSGreg Roach/** 469c05a6eSGreg Roach * webtrees: online genealogy 569c05a6eSGreg Roach * Copyright (C) 2019 webtrees development team 669c05a6eSGreg Roach * This program is free software: you can redistribute it and/or modify 769c05a6eSGreg Roach * it under the terms of the GNU General Public License as published by 869c05a6eSGreg Roach * the Free Software Foundation, either version 3 of the License, or 969c05a6eSGreg Roach * (at your option) any later version. 1069c05a6eSGreg Roach * This program is distributed in the hope that it will be useful, 1169c05a6eSGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 1269c05a6eSGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 1369c05a6eSGreg Roach * GNU General Public License for more details. 1469c05a6eSGreg Roach * You should have received a copy of the GNU General Public License 1569c05a6eSGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 1669c05a6eSGreg Roach */ 1769c05a6eSGreg Roach 1869c05a6eSGreg Roachdeclare(strict_types=1); 1969c05a6eSGreg Roach 2069c05a6eSGreg Roachnamespace Fisharebest\Webtrees\Services; 2169c05a6eSGreg Roach 2269c05a6eSGreg Roachuse Fisharebest\Webtrees\Auth; 2369c05a6eSGreg Roachuse Fisharebest\Webtrees\Cache; 2469c05a6eSGreg Roachuse Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; 2569c05a6eSGreg Roachuse Fisharebest\Webtrees\Factory; 2669c05a6eSGreg Roachuse Fisharebest\Webtrees\Gedcom; 2769c05a6eSGreg Roachuse Fisharebest\Webtrees\GedcomRecord; 2869c05a6eSGreg Roachuse Fisharebest\Webtrees\Header; 2969c05a6eSGreg Roachuse Fisharebest\Webtrees\Tree; 3069c05a6eSGreg Roachuse Fisharebest\Webtrees\Webtrees; 3169c05a6eSGreg Roachuse Illuminate\Database\Capsule\Manager as DB; 3269c05a6eSGreg Roachuse Illuminate\Database\Query\Builder; 3369c05a6eSGreg Roachuse Illuminate\Database\Query\Expression; 3469c05a6eSGreg Roachuse Illuminate\Support\Collection; 3569c05a6eSGreg Roach 3669c05a6eSGreg Roachuse function app; 3769c05a6eSGreg Roachuse function assert; 3869c05a6eSGreg Roachuse function date; 3969c05a6eSGreg Roachuse function explode; 4069c05a6eSGreg Roachuse function fwrite; 4169c05a6eSGreg Roachuse function mb_convert_encoding; 4269c05a6eSGreg Roachuse function pathinfo; 4369c05a6eSGreg Roachuse function strpos; 4469c05a6eSGreg Roachuse function strtolower; 4569c05a6eSGreg Roachuse function strtoupper; 4669c05a6eSGreg Roachuse function utf8_decode; 4769c05a6eSGreg Roach 4869c05a6eSGreg Roachuse const PATHINFO_EXTENSION; 4969c05a6eSGreg Roach 5069c05a6eSGreg Roach/** 5169c05a6eSGreg Roach * Export data in GEDCOM format 5269c05a6eSGreg Roach */ 5369c05a6eSGreg Roachclass GedcomExportService 5469c05a6eSGreg Roach{ 5569c05a6eSGreg Roach /** 5669c05a6eSGreg Roach * Write GEDCOM data to a stream. 5769c05a6eSGreg Roach * 5869c05a6eSGreg Roach * @param Tree $tree - Export data from this tree 5969c05a6eSGreg Roach * @param resource $stream - Write to this stream 6069c05a6eSGreg Roach * @param bool $sort_by_xref - Write GEDCOM records in XREF order 6169c05a6eSGreg Roach * @param string $encoding - Convert from UTF-8 to other encoding 6269c05a6eSGreg Roach * @param int $access_level - Apply privacy filtering 6369c05a6eSGreg Roach * @param string $media_path - Prepend path to media filenames 6469c05a6eSGreg Roach * @param Collection<string>|null $records - Just export these records 6569c05a6eSGreg Roach */ 6669c05a6eSGreg Roach public function export( 6769c05a6eSGreg Roach Tree $tree, 6869c05a6eSGreg Roach $stream, 6969c05a6eSGreg Roach bool $sort_by_xref = false, 7069c05a6eSGreg Roach string $encoding = 'UTF-8', 7169c05a6eSGreg Roach int $access_level = Auth::PRIV_HIDE, 7269c05a6eSGreg Roach string $media_path = '', 7369c05a6eSGreg Roach Collection $records = null 7469c05a6eSGreg Roach ): void { 7569c05a6eSGreg Roach if ($records instanceof Collection) { 7669c05a6eSGreg Roach // Export just these records - e.g. from clippings cart. 7769c05a6eSGreg Roach $data = [ 7869c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, false)]), 7969c05a6eSGreg Roach $records, 8069c05a6eSGreg Roach new Collection(['0 TRLR']), 8169c05a6eSGreg Roach ]; 8269c05a6eSGreg Roach } elseif ($access_level === Auth::PRIV_HIDE) { 8369c05a6eSGreg Roach // If we will be applying privacy filters, then we will need the GEDCOM record objects. 8469c05a6eSGreg Roach $data = [ 8569c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, true)]), 8669c05a6eSGreg Roach $this->individualQuery($tree, $sort_by_xref)->cursor(), 8769c05a6eSGreg Roach $this->familyQuery($tree, $sort_by_xref)->cursor(), 8869c05a6eSGreg Roach $this->sourceQuery($tree, $sort_by_xref)->cursor(), 8969c05a6eSGreg Roach $this->otherQuery($tree, $sort_by_xref)->cursor(), 9069c05a6eSGreg Roach $this->mediaQuery($tree, $sort_by_xref)->cursor(), 9169c05a6eSGreg Roach new Collection(['0 TRLR']), 9269c05a6eSGreg Roach ]; 9369c05a6eSGreg Roach } else { 9469c05a6eSGreg Roach // Disable the pending changes before creating GEDCOM records. 9569c05a6eSGreg Roach $cache = app('cache.array'); 9669c05a6eSGreg Roach assert($cache instanceof Cache); 9769c05a6eSGreg Roach $cache->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { 9869c05a6eSGreg Roach return new Collection(); 9969c05a6eSGreg Roach }); 10069c05a6eSGreg Roach 10169c05a6eSGreg Roach $data = [ 10269c05a6eSGreg Roach new Collection([$this->createHeader($tree, $encoding, true)]), 10369c05a6eSGreg Roach $this->individualQuery($tree, $sort_by_xref)->get()->map(Factory::individual()->mapper($tree)), 10469c05a6eSGreg Roach $this->familyQuery($tree, $sort_by_xref)->get()->map(Factory::family()->mapper($tree)), 10569c05a6eSGreg Roach $this->sourceQuery($tree, $sort_by_xref)->get()->map(Factory::source()->mapper($tree)), 10669c05a6eSGreg Roach $this->otherQuery($tree, $sort_by_xref)->get()->map(Factory::gedcomRecord()->mapper($tree)), 10769c05a6eSGreg Roach $this->mediaQuery($tree, $sort_by_xref)->get()->map(Factory::media()->mapper($tree)), 10869c05a6eSGreg Roach new Collection(['0 TRLR']), 10969c05a6eSGreg Roach ]; 11069c05a6eSGreg Roach } 11169c05a6eSGreg Roach 11269c05a6eSGreg Roach foreach ($data as $rows) { 11369c05a6eSGreg Roach foreach ($rows as $datum) { 11469c05a6eSGreg Roach if (is_string($datum)) { 11569c05a6eSGreg Roach $gedcom = $datum; 11669c05a6eSGreg Roach } elseif ($datum instanceof GedcomRecord) { 11769c05a6eSGreg Roach $gedcom = $datum->privatizeGedcom($access_level); 11869c05a6eSGreg Roach } else { 119*813bb733SGreg Roach $gedcom = 120*813bb733SGreg Roach $datum->i_gedcom ?? 121*813bb733SGreg Roach $datum->f_gedcom ?? 122*813bb733SGreg Roach $datum->s_gedcom ?? 123*813bb733SGreg Roach $datum->m_gedcom ?? 124*813bb733SGreg Roach $datum->o_gedcom; 12569c05a6eSGreg Roach } 12669c05a6eSGreg Roach 12769c05a6eSGreg Roach if ($media_path !== '') { 12869c05a6eSGreg Roach $gedcom = $this->convertMediaPath($gedcom, $media_path); 12969c05a6eSGreg Roach } 13069c05a6eSGreg Roach 13169c05a6eSGreg Roach $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL; 13269c05a6eSGreg Roach $gedcom = $this->convertEncoding($encoding, $gedcom); 13369c05a6eSGreg Roach 13469c05a6eSGreg Roach fwrite($stream, $gedcom); 13569c05a6eSGreg Roach } 13669c05a6eSGreg Roach } 13769c05a6eSGreg Roach } 13869c05a6eSGreg Roach 13969c05a6eSGreg Roach /** 14069c05a6eSGreg Roach * Create a header record for a gedcom file. 14169c05a6eSGreg Roach * 14269c05a6eSGreg Roach * @param Tree $tree 14369c05a6eSGreg Roach * @param string $encoding 14469c05a6eSGreg Roach * @param bool $include_sub 14569c05a6eSGreg Roach * 14669c05a6eSGreg Roach * @return string 14769c05a6eSGreg Roach */ 14869c05a6eSGreg Roach public function createHeader(Tree $tree, string $encoding, bool $include_sub): string 14969c05a6eSGreg Roach { 15069c05a6eSGreg Roach // Force a ".ged" suffix 15169c05a6eSGreg Roach $filename = $tree->name(); 15269c05a6eSGreg Roach 15369c05a6eSGreg Roach if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { 15469c05a6eSGreg Roach $filename .= '.ged'; 15569c05a6eSGreg Roach } 15669c05a6eSGreg Roach 15769c05a6eSGreg Roach // Build a new header record 15869c05a6eSGreg Roach $gedcom = '0 HEAD'; 15969c05a6eSGreg Roach $gedcom .= "\n1 SOUR " . Webtrees::NAME; 16069c05a6eSGreg Roach $gedcom .= "\n2 NAME " . Webtrees::NAME; 16169c05a6eSGreg Roach $gedcom .= "\n2 VERS " . Webtrees::VERSION; 16269c05a6eSGreg Roach $gedcom .= "\n1 DEST DISKETTE"; 16369c05a6eSGreg Roach $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); 16469c05a6eSGreg Roach $gedcom .= "\n2 TIME " . date('H:i:s'); 16569c05a6eSGreg Roach $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked"; 16669c05a6eSGreg Roach $gedcom .= "\n1 CHAR " . $encoding; 16769c05a6eSGreg Roach $gedcom .= "\n1 FILE " . $filename; 16869c05a6eSGreg Roach 16969c05a6eSGreg Roach // Preserve some values from the original header 17069c05a6eSGreg Roach $header = Factory::header()->make('HEAD', $tree) ?? Factory::header()->new('HEAD', '0 HEAD', null, $tree); 17169c05a6eSGreg Roach 17269c05a6eSGreg Roach foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { 17369c05a6eSGreg Roach $gedcom .= "\n" . $fact->gedcom(); 17469c05a6eSGreg Roach } 17569c05a6eSGreg Roach 17669c05a6eSGreg Roach if ($include_sub) { 17769c05a6eSGreg Roach foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { 17869c05a6eSGreg Roach $gedcom .= "\n" . $fact->gedcom(); 17969c05a6eSGreg Roach } 18069c05a6eSGreg Roach } 18169c05a6eSGreg Roach 18269c05a6eSGreg Roach return $gedcom; 18369c05a6eSGreg Roach } 18469c05a6eSGreg Roach 18569c05a6eSGreg Roach /** 18669c05a6eSGreg Roach * Prepend a media path, such as might have been removed during import. 18769c05a6eSGreg Roach * 18869c05a6eSGreg Roach * @param string $gedcom 18969c05a6eSGreg Roach * @param string $media_path 19069c05a6eSGreg Roach * 19169c05a6eSGreg Roach * @return string 19269c05a6eSGreg Roach */ 19369c05a6eSGreg Roach private function convertMediaPath(string $gedcom, string $media_path): string 19469c05a6eSGreg Roach { 19569c05a6eSGreg Roach if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { 19669c05a6eSGreg Roach return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { 19769c05a6eSGreg Roach $filename = $match[1]; 19869c05a6eSGreg Roach 19969c05a6eSGreg Roach // Convert separators to match new path. 20069c05a6eSGreg Roach if (strpos($media_path, '\\') !== false) { 20169c05a6eSGreg Roach $filename = strtr($filename, ['/' => '\\']); 20269c05a6eSGreg Roach } 20369c05a6eSGreg Roach 20469c05a6eSGreg Roach if (strpos($filename, $media_path) !== 0) { 20569c05a6eSGreg Roach return $media_path . $filename; 20669c05a6eSGreg Roach } 20769c05a6eSGreg Roach 20869c05a6eSGreg Roach return $filename; 20969c05a6eSGreg Roach }, $gedcom); 21069c05a6eSGreg Roach } 21169c05a6eSGreg Roach 21269c05a6eSGreg Roach return $gedcom; 21369c05a6eSGreg Roach } 21469c05a6eSGreg Roach 21569c05a6eSGreg Roach /** 21669c05a6eSGreg Roach * @param string $encoding 21769c05a6eSGreg Roach * @param string $gedcom 21869c05a6eSGreg Roach * 21969c05a6eSGreg Roach * @return string 22069c05a6eSGreg Roach */ 22169c05a6eSGreg Roach private function convertEncoding(string $encoding, string $gedcom): string 22269c05a6eSGreg Roach { 22369c05a6eSGreg Roach switch ($encoding) { 22469c05a6eSGreg Roach case 'ANSI': 22569c05a6eSGreg Roach // Many desktop applications interpret ANSI as ISO-8859-1 22669c05a6eSGreg Roach return utf8_decode($gedcom); 22769c05a6eSGreg Roach 22869c05a6eSGreg Roach case 'ANSEL': 22969c05a6eSGreg Roach // coming soon...? 23069c05a6eSGreg Roach case 'ASCII': 23169c05a6eSGreg Roach // Might be needed by really old software? 23269c05a6eSGreg Roach return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII'); 23369c05a6eSGreg Roach 23469c05a6eSGreg Roach default: 23569c05a6eSGreg Roach return $gedcom; 23669c05a6eSGreg Roach } 23769c05a6eSGreg Roach } 23869c05a6eSGreg Roach 23969c05a6eSGreg Roach /** 24069c05a6eSGreg Roach * Wrap long lines using concatenation records. 24169c05a6eSGreg Roach * 24269c05a6eSGreg Roach * @param string $gedcom 24369c05a6eSGreg Roach * @param int $max_line_length 24469c05a6eSGreg Roach * 24569c05a6eSGreg Roach * @return string 24669c05a6eSGreg Roach */ 24769c05a6eSGreg Roach public function wrapLongLines(string $gedcom, int $max_line_length): string 24869c05a6eSGreg Roach { 24969c05a6eSGreg Roach $lines = []; 25069c05a6eSGreg Roach 25169c05a6eSGreg Roach foreach (explode("\n", $gedcom) as $line) { 25269c05a6eSGreg Roach // Split long lines 25369c05a6eSGreg Roach // The total length of a GEDCOM line, including level number, cross-reference number, 25469c05a6eSGreg Roach // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. 25569c05a6eSGreg Roach if (mb_strlen($line) > $max_line_length) { 25669c05a6eSGreg Roach [$level, $tag] = explode(' ', $line, 3); 25769c05a6eSGreg Roach if ($tag !== 'CONT') { 25869c05a6eSGreg Roach $level++; 25969c05a6eSGreg Roach } 26069c05a6eSGreg Roach do { 26169c05a6eSGreg Roach // Split after $pos chars 26269c05a6eSGreg Roach $pos = $max_line_length; 26369c05a6eSGreg Roach // Split on a non-space (standard gedcom behavior) 26469c05a6eSGreg Roach while (mb_substr($line, $pos - 1, 1) === ' ') { 26569c05a6eSGreg Roach --$pos; 26669c05a6eSGreg Roach } 26769c05a6eSGreg Roach if ($pos === strpos($line, ' ', 3)) { 26869c05a6eSGreg Roach // No non-spaces in the data! Can’t split it :-( 26969c05a6eSGreg Roach break; 27069c05a6eSGreg Roach } 27169c05a6eSGreg Roach $lines[] = mb_substr($line, 0, $pos); 27269c05a6eSGreg Roach $line = $level . ' CONC ' . mb_substr($line, $pos); 27369c05a6eSGreg Roach } while (mb_strlen($line) > $max_line_length); 27469c05a6eSGreg Roach } 27569c05a6eSGreg Roach $lines[] = $line; 27669c05a6eSGreg Roach } 27769c05a6eSGreg Roach 27869c05a6eSGreg Roach return implode(Gedcom::EOL, $lines); 27969c05a6eSGreg Roach } 28069c05a6eSGreg Roach 28169c05a6eSGreg Roach /** 28269c05a6eSGreg Roach * @param Tree $tree 28369c05a6eSGreg Roach * @param bool $sort_by_xref 28469c05a6eSGreg Roach * 28569c05a6eSGreg Roach * @return Builder 28669c05a6eSGreg Roach */ 28769c05a6eSGreg Roach private function familyQuery(Tree $tree, bool $sort_by_xref): Builder 28869c05a6eSGreg Roach { 28969c05a6eSGreg Roach $query = DB::table('families') 29069c05a6eSGreg Roach ->where('f_file', '=', $tree->id()) 291*813bb733SGreg Roach ->select(['f_gedcom', 'f_id']); 29269c05a6eSGreg Roach 29369c05a6eSGreg Roach 29469c05a6eSGreg Roach if ($sort_by_xref) { 29569c05a6eSGreg Roach $query 29669c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(f_id)')) 29769c05a6eSGreg Roach ->orderBy('f_id'); 29869c05a6eSGreg Roach } 29969c05a6eSGreg Roach 30069c05a6eSGreg Roach return $query; 30169c05a6eSGreg Roach } 30269c05a6eSGreg Roach 30369c05a6eSGreg Roach /** 30469c05a6eSGreg Roach * @param Tree $tree 30569c05a6eSGreg Roach * @param bool $sort_by_xref 30669c05a6eSGreg Roach * 30769c05a6eSGreg Roach * @return Builder 30869c05a6eSGreg Roach */ 30969c05a6eSGreg Roach private function individualQuery(Tree $tree, bool $sort_by_xref): Builder 31069c05a6eSGreg Roach { 31169c05a6eSGreg Roach $query = DB::table('individuals') 31269c05a6eSGreg Roach ->where('i_file', '=', $tree->id()) 313*813bb733SGreg Roach ->select(['i_gedcom', 'i_id']); 31469c05a6eSGreg Roach 31569c05a6eSGreg Roach if ($sort_by_xref) { 31669c05a6eSGreg Roach $query 31769c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(i_id)')) 31869c05a6eSGreg Roach ->orderBy('i_id'); 31969c05a6eSGreg Roach } 32069c05a6eSGreg Roach 32169c05a6eSGreg Roach return $query; 32269c05a6eSGreg Roach } 32369c05a6eSGreg Roach 32469c05a6eSGreg Roach /** 32569c05a6eSGreg Roach * @param Tree $tree 32669c05a6eSGreg Roach * @param bool $sort_by_xref 32769c05a6eSGreg Roach * 32869c05a6eSGreg Roach * @return Builder 32969c05a6eSGreg Roach */ 33069c05a6eSGreg Roach private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder 33169c05a6eSGreg Roach { 33269c05a6eSGreg Roach $query = DB::table('sources') 33369c05a6eSGreg Roach ->where('s_file', '=', $tree->id()) 334*813bb733SGreg Roach ->select(['s_gedcom', 's_id']); 33569c05a6eSGreg Roach 33669c05a6eSGreg Roach if ($sort_by_xref) { 33769c05a6eSGreg Roach $query 33869c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(s_id)')) 33969c05a6eSGreg Roach ->orderBy('s_id'); 34069c05a6eSGreg Roach } 34169c05a6eSGreg Roach 34269c05a6eSGreg Roach return $query; 34369c05a6eSGreg Roach } 34469c05a6eSGreg Roach 34569c05a6eSGreg Roach /** 34669c05a6eSGreg Roach * @param Tree $tree 34769c05a6eSGreg Roach * @param bool $sort_by_xref 34869c05a6eSGreg Roach * 34969c05a6eSGreg Roach * @return Builder 35069c05a6eSGreg Roach */ 35169c05a6eSGreg Roach private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder 35269c05a6eSGreg Roach { 35369c05a6eSGreg Roach $query = DB::table('media') 35469c05a6eSGreg Roach ->where('m_file', '=', $tree->id()) 355*813bb733SGreg Roach ->select(['m_gedcom', 'm_id']); 35669c05a6eSGreg Roach 35769c05a6eSGreg Roach if ($sort_by_xref) { 35869c05a6eSGreg Roach $query 35969c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(m_id)')) 36069c05a6eSGreg Roach ->orderBy('m_id'); 36169c05a6eSGreg Roach } 36269c05a6eSGreg Roach 36369c05a6eSGreg Roach return $query; 36469c05a6eSGreg Roach } 36569c05a6eSGreg Roach 36669c05a6eSGreg Roach /** 36769c05a6eSGreg Roach * @param Tree $tree 36869c05a6eSGreg Roach * @param bool $sort_by_xref 36969c05a6eSGreg Roach * 37069c05a6eSGreg Roach * @return Builder 37169c05a6eSGreg Roach */ 37269c05a6eSGreg Roach private function otherQuery(Tree $tree, bool $sort_by_xref): Builder 37369c05a6eSGreg Roach { 37469c05a6eSGreg Roach $query = DB::table('other') 37569c05a6eSGreg Roach ->where('o_file', '=', $tree->id()) 37669c05a6eSGreg Roach ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 377*813bb733SGreg Roach ->select(['o_gedcom', 'o_id']); 37869c05a6eSGreg Roach 37969c05a6eSGreg Roach if ($sort_by_xref) { 38069c05a6eSGreg Roach $query 38169c05a6eSGreg Roach ->orderBy('o_type') 38269c05a6eSGreg Roach ->orderBy(new Expression('LENGTH(o_id)')) 38369c05a6eSGreg Roach ->orderBy('o_id'); 38469c05a6eSGreg Roach } 38569c05a6eSGreg Roach 38669c05a6eSGreg Roach return $query; 38769c05a6eSGreg Roach } 38869c05a6eSGreg Roach} 389