xref: /webtrees/app/Services/GedcomExportService.php (revision 1821c9e596679be8717c51394b448df204b1f577)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\Encodings\UTF16BE;
24use Fisharebest\Webtrees\Encodings\UTF16LE;
25use Fisharebest\Webtrees\Encodings\UTF8;
26use Fisharebest\Webtrees\Encodings\Windows1252;
27use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
28use Fisharebest\Webtrees\Gedcom;
29use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter;
30use Fisharebest\Webtrees\GedcomRecord;
31use Fisharebest\Webtrees\Header;
32use Fisharebest\Webtrees\Registry;
33use Fisharebest\Webtrees\Tree;
34use Fisharebest\Webtrees\Webtrees;
35use Illuminate\Database\Capsule\Manager as DB;
36use Illuminate\Database\Query\Builder;
37use Illuminate\Database\Query\Expression;
38use Illuminate\Support\Collection;
39use League\Flysystem\Filesystem;
40use League\Flysystem\FilesystemOperator;
41use League\Flysystem\ZipArchive\FilesystemZipArchiveProvider;
42use League\Flysystem\ZipArchive\ZipArchiveAdapter;
43use Psr\Http\Message\ResponseFactoryInterface;
44use Psr\Http\Message\ResponseInterface;
45use Psr\Http\Message\StreamFactoryInterface;
46use RuntimeException;
47
48use function addcslashes;
49use function date;
50use function explode;
51use function fclose;
52use function fopen;
53use function fwrite;
54use function is_string;
55use function pathinfo;
56use function preg_match_all;
57use function rewind;
58use function str_contains;
59use function stream_filter_append;
60use function stream_get_meta_data;
61use function strlen;
62use function strpos;
63use function strtolower;
64use function strtoupper;
65use function tmpfile;
66
67use const PATHINFO_EXTENSION;
68use const PREG_SET_ORDER;
69use const STREAM_FILTER_WRITE;
70
71/**
72 * Export data in GEDCOM format
73 */
74class GedcomExportService
75{
76    private const ACCESS_LEVELS = [
77        'gedadmin' => Auth::PRIV_NONE,
78        'user'     => Auth::PRIV_USER,
79        'visitor'  => Auth::PRIV_PRIVATE,
80        'none'     => Auth::PRIV_HIDE,
81    ];
82
83    private ResponseFactoryInterface $response_factory;
84
85    private StreamFactoryInterface $stream_factory;
86
87    /**
88     * @param ResponseFactoryInterface $response_factory
89     * @param StreamFactoryInterface   $stream_factory
90     */
91    public function __construct(ResponseFactoryInterface $response_factory, StreamFactoryInterface $stream_factory)
92    {
93        $this->response_factory = $response_factory;
94        $this->stream_factory   = $stream_factory;
95    }
96
97    /**
98     * @param Tree                        $tree           - Export data from this tree
99     * @param bool                        $sort_by_xref   - Write GEDCOM records in XREF order
100     * @param string                      $encoding       - Convert from UTF-8 to other encoding
101     * @param string                      $privacy        - Filter records by role
102     * @param string                      $filename       - Name of download file, without an extension
103     * @param string                      $format         - One of: gedcom, zip, zipmedia, gedzip
104     *
105     * @return ResponseInterface
106     */
107    public function downloadResponse(
108        Tree $tree,
109        bool $sort_by_xref,
110        string $encoding,
111        string $privacy,
112        string $line_endings,
113        string $filename,
114        string $format,
115        Collection $records = null
116    ): ResponseInterface {
117        $access_level = self::ACCESS_LEVELS[$privacy];
118
119        if ($format === 'gedcom') {
120            $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records);
121            $stream   = $this->stream_factory->createStreamFromResource($resource);
122
123            return $this->response_factory->createResponse()
124                ->withBody($stream)
125                ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME)
126                ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"');
127        }
128
129        // Create a new/empty .ZIP file
130        $temp_zip_file  = stream_get_meta_data(tmpfile())['uri'];
131        $zip_provider   = new FilesystemZipArchiveProvider($temp_zip_file, 0755);
132        $zip_adapter    = new ZipArchiveAdapter($zip_provider);
133        $zip_filesystem = new Filesystem($zip_adapter);
134
135        if ($format === 'zipmedia') {
136            $media_path = $tree->getPreference('MEDIA_DIRECTORY');
137        } elseif ($format === 'gedzip') {
138            $media_path = '';
139        } else {
140            // Don't add media
141            $media_path = null;
142        }
143
144        $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path);
145
146        if ($format === 'gedzip') {
147            $zip_filesystem->writeStream('gedcom.ged', $resource);
148            $extension = '.gdz';
149        } else {
150            $zip_filesystem->writeStream($filename . '.ged', $resource);
151            $extension = '.zip';
152        }
153
154        fclose($resource);
155
156        $stream = $this->stream_factory->createStreamFromFile($temp_zip_file);
157
158        return $this->response_factory->createResponse()
159            ->withBody($stream)
160            ->withHeader('content-type', 'application/zip')
161            ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"');
162    }
163
164    /**
165     * Write GEDCOM data to a stream.
166     *
167     * @param Tree                        $tree           - Export data from this tree
168     * @param bool                        $sort_by_xref   - Write GEDCOM records in XREF order
169     * @param string                      $encoding       - Convert from UTF-8 to other encoding
170     * @param int                         $access_level   - Apply privacy filtering
171     * @param string                      $line_endings   - CRLF or LF
172     * @param Collection<int,string>|null $records        - Just export these records
173     * @param FilesystemOperator|null     $zip_filesystem - Write media files to this filesystem
174     * @param string|null                 $media_path     - Location within the zip filesystem
175     *
176     * @return resource
177     */
178    public function export(
179        Tree $tree,
180        bool $sort_by_xref = false,
181        string $encoding = UTF8::NAME,
182        int $access_level = Auth::PRIV_HIDE,
183        string $line_endings = 'CRLF',
184        Collection $records = null,
185        FilesystemOperator $zip_filesystem = null,
186        string $media_path = null
187    ) {
188        $stream = fopen('php://memory', 'wb+');
189
190        if ($stream === false) {
191            throw new RuntimeException('Failed to create temporary stream');
192        }
193
194        stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]);
195
196        if ($records instanceof Collection) {
197            // Export just these records - e.g. from clippings cart.
198            $data = [
199                new Collection([$this->createHeader($tree, $encoding, false)]),
200                $records,
201                new Collection(['0 TRLR']),
202            ];
203        } elseif ($access_level === Auth::PRIV_HIDE) {
204            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
205            $data = [
206                new Collection([$this->createHeader($tree, $encoding, true)]),
207                $this->individualQuery($tree, $sort_by_xref)->cursor(),
208                $this->familyQuery($tree, $sort_by_xref)->cursor(),
209                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
210                $this->otherQuery($tree, $sort_by_xref)->cursor(),
211                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
212                new Collection(['0 TRLR']),
213            ];
214        } else {
215            // Disable the pending changes before creating GEDCOM records.
216            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
217                return new Collection();
218            });
219
220            $data = [
221                new Collection([$this->createHeader($tree, $encoding, true)]),
222                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
223                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
224                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
225                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
226                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
227                new Collection(['0 TRLR']),
228            ];
229        }
230
231        $media_filesystem = Registry::filesystem()->media($tree);
232
233        foreach ($data as $rows) {
234            foreach ($rows as $datum) {
235                if (is_string($datum)) {
236                    $gedcom = $datum;
237                } elseif ($datum instanceof GedcomRecord) {
238                    $gedcom = $datum->privatizeGedcom($access_level);
239                } else {
240                    $gedcom =
241                        $datum->i_gedcom ??
242                        $datum->f_gedcom ??
243                        $datum->s_gedcom ??
244                        $datum->m_gedcom ??
245                        $datum->o_gedcom;
246                }
247
248                if ($media_path !== null && $zip_filesystem !== null && preg_match('/0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) {
249                    preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER);
250
251                    foreach ($matches as $match) {
252                        $media_file = $match[1];
253
254                        if ($media_filesystem->fileExists($media_file)) {
255                            $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file));
256                        }
257                    }
258                }
259
260                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n";
261
262                if ($line_endings === 'CRLF') {
263                    $gedcom = strtr($gedcom, ["\n" => "\r\n"]);
264                }
265
266                $bytes_written = fwrite($stream, $gedcom);
267
268                if ($bytes_written !== strlen($gedcom)) {
269                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
270                }
271            }
272        }
273
274        if (rewind($stream) === false) {
275            throw new RuntimeException('Cannot rewind temporary stream');
276        }
277
278        return $stream;
279    }
280
281    /**
282     * Create a header record for a gedcom file.
283     *
284     * @param Tree   $tree
285     * @param string $encoding
286     * @param bool   $include_sub
287     *
288     * @return string
289     */
290    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
291    {
292        // Force a ".ged" suffix
293        $filename = $tree->name();
294
295        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
296            $filename .= '.ged';
297        }
298
299        $gedcom_encodings = [
300            UTF16BE::NAME     => 'UNICODE',
301            UTF16LE::NAME     => 'UNICODE',
302            Windows1252::NAME => 'ANSI',
303        ];
304
305        $encoding = $gedcom_encodings[$encoding] ?? $encoding;
306
307        // Build a new header record
308        $gedcom = '0 HEAD';
309        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
310        $gedcom .= "\n2 NAME " . Webtrees::NAME;
311        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
312        $gedcom .= "\n1 DEST DISKETTE";
313        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
314        $gedcom .= "\n2 TIME " . date('H:i:s');
315        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
316        $gedcom .= "\n1 CHAR " . $encoding;
317        $gedcom .= "\n1 FILE " . $filename;
318
319        // Preserve some values from the original header
320        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
321
322        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
323            $gedcom .= "\n" . $fact->gedcom();
324        }
325
326        if ($include_sub) {
327            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
328                $gedcom .= "\n" . $fact->gedcom();
329            }
330        }
331
332        return $gedcom;
333    }
334
335    /**
336     * Wrap long lines using concatenation records.
337     *
338     * @param string $gedcom
339     * @param int    $max_line_length
340     *
341     * @return string
342     */
343    public function wrapLongLines(string $gedcom, int $max_line_length): string
344    {
345        $lines = [];
346
347        foreach (explode("\n", $gedcom) as $line) {
348            // Split long lines
349            // The total length of a GEDCOM line, including level number, cross-reference number,
350            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
351            if (mb_strlen($line) > $max_line_length) {
352                [$level, $tag] = explode(' ', $line, 3);
353                if ($tag !== 'CONT') {
354                    $level++;
355                }
356                do {
357                    // Split after $pos chars
358                    $pos = $max_line_length;
359                    // Split on a non-space (standard gedcom behavior)
360                    while (mb_substr($line, $pos - 1, 1) === ' ') {
361                        --$pos;
362                    }
363                    if ($pos === strpos($line, ' ', 3)) {
364                        // No non-spaces in the data! Can’t split it :-(
365                        break;
366                    }
367                    $lines[] = mb_substr($line, 0, $pos);
368                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
369                } while (mb_strlen($line) > $max_line_length);
370            }
371            $lines[] = $line;
372        }
373
374        return implode("\n", $lines);
375    }
376
377    /**
378     * @param Tree $tree
379     * @param bool $sort_by_xref
380     *
381     * @return Builder
382     */
383    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
384    {
385        $query = DB::table('families')
386            ->where('f_file', '=', $tree->id())
387            ->select(['f_gedcom', 'f_id']);
388
389
390        if ($sort_by_xref) {
391            $query
392                ->orderBy(new Expression('LENGTH(f_id)'))
393                ->orderBy('f_id');
394        }
395
396        return $query;
397    }
398
399    /**
400     * @param Tree $tree
401     * @param bool $sort_by_xref
402     *
403     * @return Builder
404     */
405    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
406    {
407        $query = DB::table('individuals')
408            ->where('i_file', '=', $tree->id())
409            ->select(['i_gedcom', 'i_id']);
410
411        if ($sort_by_xref) {
412            $query
413                ->orderBy(new Expression('LENGTH(i_id)'))
414                ->orderBy('i_id');
415        }
416
417        return $query;
418    }
419
420    /**
421     * @param Tree $tree
422     * @param bool $sort_by_xref
423     *
424     * @return Builder
425     */
426    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
427    {
428        $query = DB::table('sources')
429            ->where('s_file', '=', $tree->id())
430            ->select(['s_gedcom', 's_id']);
431
432        if ($sort_by_xref) {
433            $query
434                ->orderBy(new Expression('LENGTH(s_id)'))
435                ->orderBy('s_id');
436        }
437
438        return $query;
439    }
440
441    /**
442     * @param Tree $tree
443     * @param bool $sort_by_xref
444     *
445     * @return Builder
446     */
447    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
448    {
449        $query = DB::table('media')
450            ->where('m_file', '=', $tree->id())
451            ->select(['m_gedcom', 'm_id']);
452
453        if ($sort_by_xref) {
454            $query
455                ->orderBy(new Expression('LENGTH(m_id)'))
456                ->orderBy('m_id');
457        }
458
459        return $query;
460    }
461
462    /**
463     * @param Tree $tree
464     * @param bool $sort_by_xref
465     *
466     * @return Builder
467     */
468    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
469    {
470        $query = DB::table('other')
471            ->where('o_file', '=', $tree->id())
472            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
473            ->select(['o_gedcom', 'o_id']);
474
475        if ($sort_by_xref) {
476            $query
477                ->orderBy('o_type')
478                ->orderBy(new Expression('LENGTH(o_id)'))
479                ->orderBy('o_id');
480        }
481
482        return $query;
483    }
484}
485