. */ declare(strict_types=1); namespace Fisharebest\Webtrees\Services; use Fisharebest\Webtrees\Auth; use Fisharebest\Webtrees\Encodings\UTF16BE; use Fisharebest\Webtrees\Encodings\UTF16LE; use Fisharebest\Webtrees\Encodings\UTF8; use Fisharebest\Webtrees\Encodings\Windows1252; use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory; use Fisharebest\Webtrees\Gedcom; use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter; use Fisharebest\Webtrees\GedcomRecord; use Fisharebest\Webtrees\Header; use Fisharebest\Webtrees\Registry; use Fisharebest\Webtrees\Tree; use Fisharebest\Webtrees\Webtrees; use Illuminate\Database\Capsule\Manager as DB; use Illuminate\Database\Query\Builder; use Illuminate\Database\Query\Expression; use Illuminate\Support\Collection; use RuntimeException; use function date; use function explode; use function fopen; use function fwrite; use function pathinfo; use function rewind; use function str_contains; use function str_starts_with; use function stream_filter_append; use function strlen; use function strpos; use function strtolower; use function strtoupper; use const PATHINFO_EXTENSION; use const STREAM_FILTER_WRITE; /** * Export data in GEDCOM format */ class GedcomExportService { /** * Write GEDCOM data to a stream. * * @param Tree $tree - Export data from this tree * @param bool $sort_by_xref - Write GEDCOM records in XREF order * @param string $encoding - Convert from UTF-8 to other encoding * @param int $access_level - Apply privacy filtering * @param string $media_path - Prepend path to media filenames * @param string $line_endings - CRLF or LF * @param Collection|null $records - Just export these records * * @return resource */ public function export( Tree $tree, bool $sort_by_xref = false, string $encoding = UTF8::NAME, int $access_level = Auth::PRIV_HIDE, string $media_path = '', string $line_endings = 'CRLF', Collection $records = null ) { $stream = fopen('php://memory', 'wb+'); if ($stream === false) { throw new RuntimeException('Failed to create temporary stream'); } stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]); if ($records instanceof Collection) { // Export just these records - e.g. from clippings cart. $data = [ new Collection([$this->createHeader($tree, $encoding, false)]), $records, new Collection(['0 TRLR']), ]; } elseif ($access_level === Auth::PRIV_HIDE) { // If we will be applying privacy filters, then we will need the GEDCOM record objects. $data = [ new Collection([$this->createHeader($tree, $encoding, true)]), $this->individualQuery($tree, $sort_by_xref)->cursor(), $this->familyQuery($tree, $sort_by_xref)->cursor(), $this->sourceQuery($tree, $sort_by_xref)->cursor(), $this->otherQuery($tree, $sort_by_xref)->cursor(), $this->mediaQuery($tree, $sort_by_xref)->cursor(), new Collection(['0 TRLR']), ]; } else { // Disable the pending changes before creating GEDCOM records. Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection { return new Collection(); }); $data = [ new Collection([$this->createHeader($tree, $encoding, true)]), $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)), $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)), $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)), $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)), $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)), new Collection(['0 TRLR']), ]; } foreach ($data as $rows) { foreach ($rows as $datum) { if (is_string($datum)) { $gedcom = $datum; } elseif ($datum instanceof GedcomRecord) { $gedcom = $datum->privatizeGedcom($access_level); } else { $gedcom = $datum->i_gedcom ?? $datum->f_gedcom ?? $datum->s_gedcom ?? $datum->m_gedcom ?? $datum->o_gedcom; } if ($media_path !== '') { $gedcom = $this->convertMediaPath($gedcom, $media_path); } $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n"; if ($line_endings === 'CRLF') { $gedcom = strtr($gedcom, ["\n" => "\r\n"]); } $bytes_written = fwrite($stream, $gedcom); if ($bytes_written !== strlen($gedcom)) { throw new RuntimeException('Unable to write to stream. Perhaps the disk is full?'); } } } if (rewind($stream) === false) { throw new RuntimeException('Cannot rewind temporary stream'); } return $stream; } /** * Create a header record for a gedcom file. * * @param Tree $tree * @param string $encoding * @param bool $include_sub * * @return string */ public function createHeader(Tree $tree, string $encoding, bool $include_sub): string { // Force a ".ged" suffix $filename = $tree->name(); if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') { $filename .= '.ged'; } $gedcom_encodings = [ UTF16BE::NAME => 'UNICODE', UTF16LE::NAME => 'UNICODE', Windows1252::NAME => 'ANSI', ]; $encoding = $gedcom_encodings[$encoding] ?? $encoding; // Build a new header record $gedcom = '0 HEAD'; $gedcom .= "\n1 SOUR " . Webtrees::NAME; $gedcom .= "\n2 NAME " . Webtrees::NAME; $gedcom .= "\n2 VERS " . Webtrees::VERSION; $gedcom .= "\n1 DEST DISKETTE"; $gedcom .= "\n1 DATE " . strtoupper(date('d M Y')); $gedcom .= "\n2 TIME " . date('H:i:s'); $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED"; $gedcom .= "\n1 CHAR " . $encoding; $gedcom .= "\n1 FILE " . $filename; // Preserve some values from the original header $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree); foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) { $gedcom .= "\n" . $fact->gedcom(); } if ($include_sub) { foreach ($header->facts(['SUBM', 'SUBN']) as $fact) { $gedcom .= "\n" . $fact->gedcom(); } } return $gedcom; } /** * Prepend a media path, such as might have been removed during import. * * @param string $gedcom * @param string $media_path * * @return string */ private function convertMediaPath(string $gedcom, string $media_path): string { if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) { return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string { $filename = $match[1]; // Don’t modify external links if (!str_contains($filename, '://')) { // Convert separators to match new path. if (str_contains($media_path, '\\')) { $filename = strtr($filename, ['/' => '\\']); } if (!str_starts_with($filename, $media_path)) { $filename = $media_path . $filename; } } return "\n1 FILE " . $filename; }, $gedcom); } return $gedcom; } /** * Wrap long lines using concatenation records. * * @param string $gedcom * @param int $max_line_length * * @return string */ public function wrapLongLines(string $gedcom, int $max_line_length): string { $lines = []; foreach (explode("\n", $gedcom) as $line) { // Split long lines // The total length of a GEDCOM line, including level number, cross-reference number, // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters. if (mb_strlen($line) > $max_line_length) { [$level, $tag] = explode(' ', $line, 3); if ($tag !== 'CONT') { $level++; } do { // Split after $pos chars $pos = $max_line_length; // Split on a non-space (standard gedcom behavior) while (mb_substr($line, $pos - 1, 1) === ' ') { --$pos; } if ($pos === strpos($line, ' ', 3)) { // No non-spaces in the data! Can’t split it :-( break; } $lines[] = mb_substr($line, 0, $pos); $line = $level . ' CONC ' . mb_substr($line, $pos); } while (mb_strlen($line) > $max_line_length); } $lines[] = $line; } return implode("\n", $lines); } /** * @param Tree $tree * @param bool $sort_by_xref * * @return Builder */ private function familyQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('families') ->where('f_file', '=', $tree->id()) ->select(['f_gedcom', 'f_id']); if ($sort_by_xref) { $query ->orderBy(new Expression('LENGTH(f_id)')) ->orderBy('f_id'); } return $query; } /** * @param Tree $tree * @param bool $sort_by_xref * * @return Builder */ private function individualQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('individuals') ->where('i_file', '=', $tree->id()) ->select(['i_gedcom', 'i_id']); if ($sort_by_xref) { $query ->orderBy(new Expression('LENGTH(i_id)')) ->orderBy('i_id'); } return $query; } /** * @param Tree $tree * @param bool $sort_by_xref * * @return Builder */ private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('sources') ->where('s_file', '=', $tree->id()) ->select(['s_gedcom', 's_id']); if ($sort_by_xref) { $query ->orderBy(new Expression('LENGTH(s_id)')) ->orderBy('s_id'); } return $query; } /** * @param Tree $tree * @param bool $sort_by_xref * * @return Builder */ private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('media') ->where('m_file', '=', $tree->id()) ->select(['m_gedcom', 'm_id']); if ($sort_by_xref) { $query ->orderBy(new Expression('LENGTH(m_id)')) ->orderBy('m_id'); } return $query; } /** * @param Tree $tree * @param bool $sort_by_xref * * @return Builder */ private function otherQuery(Tree $tree, bool $sort_by_xref): Builder { $query = DB::table('other') ->where('o_file', '=', $tree->id()) ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) ->select(['o_gedcom', 'o_id']); if ($sort_by_xref) { $query ->orderBy('o_type') ->orderBy(new Expression('LENGTH(o_id)')) ->orderBy('o_id'); } return $query; } }