xref: /webtrees/app/Services/GedcomExportService.php (revision 16ecfcafdc113f0b4074e256aa90a4c870e7b2f0)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\Encodings\UTF16BE;
24use Fisharebest\Webtrees\Encodings\UTF16LE;
25use Fisharebest\Webtrees\Encodings\UTF8;
26use Fisharebest\Webtrees\Encodings\Windows1252;
27use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
28use Fisharebest\Webtrees\Gedcom;
29use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter;
30use Fisharebest\Webtrees\GedcomRecord;
31use Fisharebest\Webtrees\Header;
32use Fisharebest\Webtrees\Registry;
33use Fisharebest\Webtrees\Tree;
34use Fisharebest\Webtrees\Webtrees;
35use Illuminate\Database\Capsule\Manager as DB;
36use Illuminate\Database\Query\Builder;
37use Illuminate\Database\Query\Expression;
38use Illuminate\Support\Collection;
39use League\Flysystem\Filesystem;
40use League\Flysystem\FilesystemOperator;
41use League\Flysystem\ZipArchive\FilesystemZipArchiveProvider;
42use League\Flysystem\ZipArchive\ZipArchiveAdapter;
43use Psr\Http\Message\ResponseFactoryInterface;
44use Psr\Http\Message\ResponseInterface;
45use Psr\Http\Message\StreamFactoryInterface;
46use RuntimeException;
47
48use function addcslashes;
49use function date;
50use function explode;
51use function fclose;
52use function fopen;
53use function fwrite;
54use function pathinfo;
55use function preg_match_all;
56use function rewind;
57use function str_contains;
58use function stream_filter_append;
59use function stream_get_meta_data;
60use function strlen;
61use function strpos;
62use function strtolower;
63use function strtoupper;
64use function tmpfile;
65
66use const PATHINFO_EXTENSION;
67use const PREG_SET_ORDER;
68use const STREAM_FILTER_WRITE;
69
70/**
71 * Export data in GEDCOM format
72 */
73class GedcomExportService
74{
75    private const ACCESS_LEVELS = [
76        'gedadmin' => Auth::PRIV_NONE,
77        'user'     => Auth::PRIV_USER,
78        'visitor'  => Auth::PRIV_PRIVATE,
79        'none'     => Auth::PRIV_HIDE,
80    ];
81
82    private ResponseFactoryInterface $response_factory;
83
84    private StreamFactoryInterface $stream_factory;
85
86    /**
87     * @param ResponseFactoryInterface $response_factory
88     * @param StreamFactoryInterface   $stream_factory
89     */
90    public function __construct(ResponseFactoryInterface $response_factory, StreamFactoryInterface $stream_factory)
91    {
92        $this->response_factory = $response_factory;
93        $this->stream_factory   = $stream_factory;
94    }
95
96    /**
97     * @param Tree                        $tree           - Export data from this tree
98     * @param bool                        $sort_by_xref   - Write GEDCOM records in XREF order
99     * @param string                      $encoding       - Convert from UTF-8 to other encoding
100     * @param string                      $privacy        - Filter records by role
101     * @param string                      $filename       - Name of download file, without an extension
102     * @param string                      $format         - One of: gedcom, zip, zipmedia, gedzip
103     *
104     * @return ResponseInterface
105     */
106    public function downloadResponse(
107        Tree $tree,
108        bool $sort_by_xref,
109        string $encoding,
110        string $privacy,
111        string $line_endings,
112        string $filename,
113        string $format,
114        Collection $records = null
115    ): ResponseInterface {
116        $access_level = self::ACCESS_LEVELS[$privacy];
117
118        if ($format === 'gedcom') {
119            $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records);
120            $stream   = $this->stream_factory->createStreamFromResource($resource);
121
122            return $this->response_factory->createResponse()
123                ->withBody($stream)
124                ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME)
125                ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"');
126        }
127
128        // Create a new/empty .ZIP file
129        $temp_zip_file  = stream_get_meta_data(tmpfile())['uri'];
130        $zip_provider   = new FilesystemZipArchiveProvider($temp_zip_file, 0755);
131        $zip_adapter    = new ZipArchiveAdapter($zip_provider);
132        $zip_filesystem = new Filesystem($zip_adapter);
133
134        if ($format === 'zipmedia') {
135            $media_path = $tree->getPreference('MEDIA_DIRECTORY');
136        } elseif ($format === 'gedzip') {
137            $media_path = '';
138        } else {
139            // Don't add media
140            $media_path = null;
141        }
142
143        $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path);
144
145        if ($format === 'gedzip') {
146            $zip_filesystem->writeStream('gedcom.ged', $resource);
147            $extension = '.gdz';
148        } else {
149            $zip_filesystem->writeStream($filename . '.ged', $resource);
150            $extension = '.zip';
151        }
152
153        fclose($resource);
154
155        $stream = $this->stream_factory->createStreamFromFile($temp_zip_file);
156
157        return $this->response_factory->createResponse()
158            ->withBody($stream)
159            ->withHeader('content-type', 'application/zip')
160            ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"')  . $extension . '"');
161    }
162
163    /**
164     * Write GEDCOM data to a stream.
165     *
166     * @param Tree                        $tree           - Export data from this tree
167     * @param bool                        $sort_by_xref   - Write GEDCOM records in XREF order
168     * @param string                      $encoding       - Convert from UTF-8 to other encoding
169     * @param int                         $access_level   - Apply privacy filtering
170     * @param string                      $line_endings   - CRLF or LF
171     * @param Collection<int,string>|null $records        - Just export these records
172     * @param FilesystemOperator|null     $zip_filesystem - Write media files to this filesystem
173     * @param string|null                 $media_path     - Location within the zip filesystem
174     *
175     * @return resource
176     */
177    public function export(
178        Tree $tree,
179        bool $sort_by_xref = false,
180        string $encoding = UTF8::NAME,
181        int $access_level = Auth::PRIV_HIDE,
182        string $line_endings = 'CRLF',
183        Collection $records = null,
184        FilesystemOperator $zip_filesystem = null,
185        string $media_path = null
186    ) {
187        $stream = fopen('php://memory', 'wb+');
188
189        if ($stream === false) {
190            throw new RuntimeException('Failed to create temporary stream');
191        }
192
193        stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]);
194
195        if ($records instanceof Collection) {
196            // Export just these records - e.g. from clippings cart.
197            $data = [
198                new Collection([$this->createHeader($tree, $encoding, false)]),
199                $records,
200                new Collection(['0 TRLR']),
201            ];
202        } elseif ($access_level === Auth::PRIV_HIDE) {
203            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
204            $data = [
205                new Collection([$this->createHeader($tree, $encoding, true)]),
206                $this->individualQuery($tree, $sort_by_xref)->cursor(),
207                $this->familyQuery($tree, $sort_by_xref)->cursor(),
208                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
209                $this->otherQuery($tree, $sort_by_xref)->cursor(),
210                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
211                new Collection(['0 TRLR']),
212            ];
213        } else {
214            // Disable the pending changes before creating GEDCOM records.
215            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
216                return new Collection();
217            });
218
219            $data = [
220                new Collection([$this->createHeader($tree, $encoding, true)]),
221                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
222                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
223                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
224                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
225                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
226                new Collection(['0 TRLR']),
227            ];
228        }
229
230        $media_filesystem = Registry::filesystem()->media($tree);
231
232        foreach ($data as $rows) {
233            foreach ($rows as $datum) {
234                if (is_string($datum)) {
235                    $gedcom = $datum;
236                } elseif ($datum instanceof GedcomRecord) {
237                    $gedcom = $datum->privatizeGedcom($access_level);
238                } else {
239                    $gedcom =
240                        $datum->i_gedcom ??
241                        $datum->f_gedcom ??
242                        $datum->s_gedcom ??
243                        $datum->m_gedcom ??
244                        $datum->o_gedcom;
245                }
246
247                if ($media_path !== null && $zip_filesystem !== null && preg_match('/0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) {
248                    preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER);
249
250                    foreach ($matches as $match) {
251                        $media_file = $match[1];
252
253                        if ($media_filesystem->fileExists($media_file)) {
254                            $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file));
255                        }
256                    }
257                }
258
259                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n";
260
261                if ($line_endings === 'CRLF') {
262                    $gedcom = strtr($gedcom, ["\n" => "\r\n"]);
263                }
264
265                $bytes_written = fwrite($stream, $gedcom);
266
267                if ($bytes_written !== strlen($gedcom)) {
268                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
269                }
270            }
271        }
272
273        if (rewind($stream) === false) {
274            throw new RuntimeException('Cannot rewind temporary stream');
275        }
276
277        return $stream;
278    }
279
280    /**
281     * Create a header record for a gedcom file.
282     *
283     * @param Tree   $tree
284     * @param string $encoding
285     * @param bool   $include_sub
286     *
287     * @return string
288     */
289    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
290    {
291        // Force a ".ged" suffix
292        $filename = $tree->name();
293
294        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
295            $filename .= '.ged';
296        }
297
298        $gedcom_encodings = [
299            UTF16BE::NAME     => 'UNICODE',
300            UTF16LE::NAME     => 'UNICODE',
301            Windows1252::NAME => 'ANSI',
302        ];
303
304        $encoding = $gedcom_encodings[$encoding] ?? $encoding;
305
306        // Build a new header record
307        $gedcom = '0 HEAD';
308        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
309        $gedcom .= "\n2 NAME " . Webtrees::NAME;
310        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
311        $gedcom .= "\n1 DEST DISKETTE";
312        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
313        $gedcom .= "\n2 TIME " . date('H:i:s');
314        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
315        $gedcom .= "\n1 CHAR " . $encoding;
316        $gedcom .= "\n1 FILE " . $filename;
317
318        // Preserve some values from the original header
319        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
320
321        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
322            $gedcom .= "\n" . $fact->gedcom();
323        }
324
325        if ($include_sub) {
326            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
327                $gedcom .= "\n" . $fact->gedcom();
328            }
329        }
330
331        return $gedcom;
332    }
333
334    /**
335     * Prepend a media path, such as might have been removed during import.
336     *
337     * @param string $gedcom
338     * @param string $media_path
339     *
340     * @return string
341     */
342    private function convertMediaPath(string $gedcom, string $media_path): string
343    {
344        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
345            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
346                $filename = $match[1];
347
348                // Don’t modify external links
349                if (!str_contains($filename, '://')) {
350                    $filename = $media_path . $filename;
351                }
352
353                return "\n1 FILE " . $filename;
354            }, $gedcom);
355        }
356
357        return $gedcom;
358    }
359
360    /**
361     * Wrap long lines using concatenation records.
362     *
363     * @param string $gedcom
364     * @param int    $max_line_length
365     *
366     * @return string
367     */
368    public function wrapLongLines(string $gedcom, int $max_line_length): string
369    {
370        $lines = [];
371
372        foreach (explode("\n", $gedcom) as $line) {
373            // Split long lines
374            // The total length of a GEDCOM line, including level number, cross-reference number,
375            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
376            if (mb_strlen($line) > $max_line_length) {
377                [$level, $tag] = explode(' ', $line, 3);
378                if ($tag !== 'CONT') {
379                    $level++;
380                }
381                do {
382                    // Split after $pos chars
383                    $pos = $max_line_length;
384                    // Split on a non-space (standard gedcom behavior)
385                    while (mb_substr($line, $pos - 1, 1) === ' ') {
386                        --$pos;
387                    }
388                    if ($pos === strpos($line, ' ', 3)) {
389                        // No non-spaces in the data! Can’t split it :-(
390                        break;
391                    }
392                    $lines[] = mb_substr($line, 0, $pos);
393                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
394                } while (mb_strlen($line) > $max_line_length);
395            }
396            $lines[] = $line;
397        }
398
399        return implode("\n", $lines);
400    }
401
402    /**
403     * @param Tree $tree
404     * @param bool $sort_by_xref
405     *
406     * @return Builder
407     */
408    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
409    {
410        $query = DB::table('families')
411            ->where('f_file', '=', $tree->id())
412            ->select(['f_gedcom', 'f_id']);
413
414
415        if ($sort_by_xref) {
416            $query
417                ->orderBy(new Expression('LENGTH(f_id)'))
418                ->orderBy('f_id');
419        }
420
421        return $query;
422    }
423
424    /**
425     * @param Tree $tree
426     * @param bool $sort_by_xref
427     *
428     * @return Builder
429     */
430    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
431    {
432        $query = DB::table('individuals')
433            ->where('i_file', '=', $tree->id())
434            ->select(['i_gedcom', 'i_id']);
435
436        if ($sort_by_xref) {
437            $query
438                ->orderBy(new Expression('LENGTH(i_id)'))
439                ->orderBy('i_id');
440        }
441
442        return $query;
443    }
444
445    /**
446     * @param Tree $tree
447     * @param bool $sort_by_xref
448     *
449     * @return Builder
450     */
451    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
452    {
453        $query = DB::table('sources')
454            ->where('s_file', '=', $tree->id())
455            ->select(['s_gedcom', 's_id']);
456
457        if ($sort_by_xref) {
458            $query
459                ->orderBy(new Expression('LENGTH(s_id)'))
460                ->orderBy('s_id');
461        }
462
463        return $query;
464    }
465
466    /**
467     * @param Tree $tree
468     * @param bool $sort_by_xref
469     *
470     * @return Builder
471     */
472    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
473    {
474        $query = DB::table('media')
475            ->where('m_file', '=', $tree->id())
476            ->select(['m_gedcom', 'm_id']);
477
478        if ($sort_by_xref) {
479            $query
480                ->orderBy(new Expression('LENGTH(m_id)'))
481                ->orderBy('m_id');
482        }
483
484        return $query;
485    }
486
487    /**
488     * @param Tree $tree
489     * @param bool $sort_by_xref
490     *
491     * @return Builder
492     */
493    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
494    {
495        $query = DB::table('other')
496            ->where('o_file', '=', $tree->id())
497            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
498            ->select(['o_gedcom', 'o_id']);
499
500        if ($sort_by_xref) {
501            $query
502                ->orderBy('o_type')
503                ->orderBy(new Expression('LENGTH(o_id)'))
504                ->orderBy('o_id');
505        }
506
507        return $query;
508    }
509}
510