xref: /webtrees/app/Services/GedcomExportService.php (revision cd7208d453330ddd37c842f0504708dc8dbcbb09)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\DB;
24use Fisharebest\Webtrees\Encodings\UTF16BE;
25use Fisharebest\Webtrees\Encodings\UTF16LE;
26use Fisharebest\Webtrees\Encodings\UTF8;
27use Fisharebest\Webtrees\Encodings\Windows1252;
28use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
29use Fisharebest\Webtrees\Gedcom;
30use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter;
31use Fisharebest\Webtrees\GedcomRecord;
32use Fisharebest\Webtrees\Header;
33use Fisharebest\Webtrees\Registry;
34use Fisharebest\Webtrees\Tree;
35use Fisharebest\Webtrees\Webtrees;
36use Illuminate\Database\Query\Builder;
37use Illuminate\Database\Query\Expression;
38use Illuminate\Support\Collection;
39use League\Flysystem\Filesystem;
40use League\Flysystem\FilesystemOperator;
41use League\Flysystem\ZipArchive\FilesystemZipArchiveProvider;
42use League\Flysystem\ZipArchive\ZipArchiveAdapter;
43use Psr\Http\Message\ResponseFactoryInterface;
44use Psr\Http\Message\ResponseInterface;
45use Psr\Http\Message\StreamFactoryInterface;
46use RuntimeException;
47
48use function addcslashes;
49use function date;
50use function explode;
51use function fclose;
52use function fopen;
53use function fwrite;
54use function is_string;
55use function pathinfo;
56use function preg_match_all;
57use function rewind;
58use function stream_filter_append;
59use function stream_get_meta_data;
60use function strlen;
61use function strpos;
62use function strtolower;
63use function strtoupper;
64use function tmpfile;
65
66use const PATHINFO_EXTENSION;
67use const PREG_SET_ORDER;
68use const STREAM_FILTER_WRITE;
69
70/**
71 * Export data in GEDCOM format
72 */
73class GedcomExportService
74{
75    private const ACCESS_LEVELS = [
76        'gedadmin' => Auth::PRIV_NONE,
77        'user'     => Auth::PRIV_USER,
78        'visitor'  => Auth::PRIV_PRIVATE,
79        'none'     => Auth::PRIV_HIDE,
80    ];
81
82    private ResponseFactoryInterface $response_factory;
83
84    private StreamFactoryInterface $stream_factory;
85
86    public function __construct(ResponseFactoryInterface $response_factory, StreamFactoryInterface $stream_factory)
87    {
88        $this->response_factory = $response_factory;
89        $this->stream_factory   = $stream_factory;
90    }
91
92    /**
93     * @param Tree                                            $tree         Export data from this tree
94     * @param bool                                            $sort_by_xref Write GEDCOM records in XREF order
95     * @param string                                          $encoding     Convert from UTF-8 to other encoding
96     * @param string                                          $privacy      Filter records by role
97     * @param string                                          $line_endings CRLF or LF
98     * @param string                                          $filename     Name of download file, without an extension
99     * @param string                                          $format       One of: gedcom, zip, zipmedia, gedzip
100     * @param Collection<int,string|object|GedcomRecord>|null $records
101     */
102    public function downloadResponse(
103        Tree $tree,
104        bool $sort_by_xref,
105        string $encoding,
106        string $privacy,
107        string $line_endings,
108        string $filename,
109        string $format,
110        Collection $records = null
111    ): ResponseInterface {
112        $access_level = self::ACCESS_LEVELS[$privacy];
113
114        if ($format === 'gedcom') {
115            $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records);
116            $stream   = $this->stream_factory->createStreamFromResource($resource);
117
118            return $this->response_factory->createResponse()
119                ->withBody($stream)
120                ->withHeader('content-type', 'text/x-gedcom; charset=' . UTF8::NAME)
121                ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . '.ged"');
122        }
123
124        // Create a new/empty .ZIP file
125        $temp_zip_file  = stream_get_meta_data(tmpfile())['uri'];
126        $zip_provider   = new FilesystemZipArchiveProvider($temp_zip_file, 0755);
127        $zip_adapter    = new ZipArchiveAdapter($zip_provider);
128        $zip_filesystem = new Filesystem($zip_adapter);
129
130        if ($format === 'zipmedia') {
131            $media_path = $tree->getPreference('MEDIA_DIRECTORY');
132        } elseif ($format === 'gedzip') {
133            $media_path = '';
134        } else {
135            // Don't add media
136            $media_path = null;
137        }
138
139        $resource = $this->export($tree, $sort_by_xref, $encoding, $access_level, $line_endings, $records, $zip_filesystem, $media_path);
140
141        if ($format === 'gedzip') {
142            $zip_filesystem->writeStream('gedcom.ged', $resource);
143            $extension = '.gdz';
144        } else {
145            $zip_filesystem->writeStream($filename . '.ged', $resource);
146            $extension = '.zip';
147        }
148
149        fclose($resource);
150
151        $stream = $this->stream_factory->createStreamFromFile($temp_zip_file);
152
153        return $this->response_factory->createResponse()
154            ->withBody($stream)
155            ->withHeader('content-type', 'application/zip')
156            ->withHeader('content-disposition', 'attachment; filename="' . addcslashes($filename, '"') . $extension . '"');
157    }
158
159    /**
160     * Write GEDCOM data to a stream.
161     *
162     * @param Tree                                            $tree           Export data from this tree
163     * @param bool                                            $sort_by_xref   Write GEDCOM records in XREF order
164     * @param string                                          $encoding       Convert from UTF-8 to other encoding
165     * @param int                                             $access_level   Apply privacy filtering
166     * @param string                                          $line_endings   CRLF or LF
167     * @param Collection<int,string|object|GedcomRecord>|null $records        Just export these records
168     * @param FilesystemOperator|null                         $zip_filesystem Write media files to this filesystem
169     * @param string|null                                     $media_path     Location within the zip filesystem
170     *
171     * @return resource
172     */
173    public function export(
174        Tree $tree,
175        bool $sort_by_xref = false,
176        string $encoding = UTF8::NAME,
177        int $access_level = Auth::PRIV_HIDE,
178        string $line_endings = 'CRLF',
179        Collection|null $records = null,
180        FilesystemOperator|null $zip_filesystem = null,
181        string $media_path = null
182    ) {
183        $stream = fopen('php://memory', 'wb+');
184
185        if ($stream === false) {
186            throw new RuntimeException('Failed to create temporary stream');
187        }
188
189        stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]);
190
191        if ($records instanceof Collection) {
192            // Export just these records - e.g. from clippings cart.
193            $data = [
194                new Collection([$this->createHeader($tree, $encoding, false)]),
195                $records,
196                new Collection(['0 TRLR']),
197            ];
198        } elseif ($access_level === Auth::PRIV_HIDE) {
199            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
200            $data = [
201                new Collection([$this->createHeader($tree, $encoding, true)]),
202                $this->individualQuery($tree, $sort_by_xref)->cursor(),
203                $this->familyQuery($tree, $sort_by_xref)->cursor(),
204                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
205                $this->otherQuery($tree, $sort_by_xref)->cursor(),
206                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
207                new Collection(['0 TRLR']),
208            ];
209        } else {
210            // Disable the pending changes before creating GEDCOM records.
211            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static fn (): Collection => new Collection());
212
213            $data = [
214                new Collection([$this->createHeader($tree, $encoding, true)]),
215                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
216                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
217                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
218                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
219                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
220                new Collection(['0 TRLR']),
221            ];
222        }
223
224        $media_filesystem = $tree->mediaFilesystem();
225
226        foreach ($data as $rows) {
227            foreach ($rows as $datum) {
228                if (is_string($datum)) {
229                    $gedcom = $datum;
230                } elseif ($datum instanceof GedcomRecord) {
231                    $gedcom = $datum->privatizeGedcom($access_level);
232
233                    if ($gedcom === '') {
234                        continue;
235                    }
236                } else {
237                    $gedcom =
238                        $datum->i_gedcom ??
239                        $datum->f_gedcom ??
240                        $datum->s_gedcom ??
241                        $datum->m_gedcom ??
242                        $datum->o_gedcom;
243                }
244
245                if ($media_path !== null && $zip_filesystem !== null && preg_match('/0 @' . Gedcom::REGEX_XREF . '@ OBJE/', $gedcom) === 1) {
246                    preg_match_all('/\n1 FILE (.+)/', $gedcom, $matches, PREG_SET_ORDER);
247
248                    foreach ($matches as $match) {
249                        $media_file = $match[1];
250
251                        if ($media_filesystem->fileExists($media_file)) {
252                            $zip_filesystem->writeStream($media_path . $media_file, $media_filesystem->readStream($media_file));
253                        }
254                    }
255                }
256
257                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n";
258
259                if ($line_endings === 'CRLF') {
260                    $gedcom = strtr($gedcom, ["\n" => "\r\n"]);
261                }
262
263                $bytes_written = fwrite($stream, $gedcom);
264
265                if ($bytes_written !== strlen($gedcom)) {
266                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
267                }
268            }
269        }
270
271        if (rewind($stream) === false) {
272            throw new RuntimeException('Cannot rewind temporary stream');
273        }
274
275        return $stream;
276    }
277
278    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
279    {
280        // Force a ".ged" suffix
281        $filename = $tree->name();
282
283        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
284            $filename .= '.ged';
285        }
286
287        $gedcom_encodings = [
288            UTF16BE::NAME     => 'UNICODE',
289            UTF16LE::NAME     => 'UNICODE',
290            Windows1252::NAME => 'ANSI',
291        ];
292
293        $encoding = $gedcom_encodings[$encoding] ?? $encoding;
294
295        // Build a new header record
296        $gedcom = '0 HEAD';
297        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
298        $gedcom .= "\n2 NAME " . Webtrees::NAME;
299        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
300        $gedcom .= "\n1 DEST DISKETTE";
301        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
302        $gedcom .= "\n2 TIME " . date('H:i:s');
303        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
304        $gedcom .= "\n1 CHAR " . $encoding;
305        $gedcom .= "\n1 FILE " . $filename;
306
307        // Preserve some values from the original header
308        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
309
310        // There should always be a header record.
311        if ($header instanceof Header) {
312            foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
313                $gedcom .= "\n" . $fact->gedcom();
314            }
315
316            if ($include_sub) {
317                foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
318                    $gedcom .= "\n" . $fact->gedcom();
319                }
320            }
321        }
322
323        return $gedcom;
324    }
325
326    public function wrapLongLines(string $gedcom, int $max_line_length): string
327    {
328        $lines = [];
329
330        foreach (explode("\n", $gedcom) as $line) {
331            // Split long lines
332            // The total length of a GEDCOM line, including level number, cross-reference number,
333            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
334            if (mb_strlen($line) > $max_line_length) {
335                [$level, $tag] = explode(' ', $line, 3);
336                if ($tag !== 'CONT') {
337                    $level++;
338                }
339                do {
340                    // Split after $pos chars
341                    $pos = $max_line_length;
342                    // Split on a non-space (standard gedcom behavior)
343                    while (mb_substr($line, $pos - 1, 1) === ' ') {
344                        --$pos;
345                    }
346                    if ($pos === strpos($line, ' ', 3)) {
347                        // No non-spaces in the data! Can’t split it :-(
348                        break;
349                    }
350                    $lines[] = mb_substr($line, 0, $pos);
351                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
352                } while (mb_strlen($line) > $max_line_length);
353            }
354            $lines[] = $line;
355        }
356
357        return implode("\n", $lines);
358    }
359
360    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
361    {
362        $query = DB::table('families')
363            ->where('f_file', '=', $tree->id())
364            ->select(['f_gedcom', 'f_id']);
365
366        if ($sort_by_xref) {
367            $query
368                ->orderBy(new Expression('LENGTH(f_id)'))
369                ->orderBy('f_id');
370        }
371
372        return $query;
373    }
374
375    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
376    {
377        $query = DB::table('individuals')
378            ->where('i_file', '=', $tree->id())
379            ->select(['i_gedcom', 'i_id']);
380
381        if ($sort_by_xref) {
382            $query
383                ->orderBy(new Expression('LENGTH(i_id)'))
384                ->orderBy('i_id');
385        }
386
387        return $query;
388    }
389
390    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
391    {
392        $query = DB::table('sources')
393            ->where('s_file', '=', $tree->id())
394            ->select(['s_gedcom', 's_id']);
395
396        if ($sort_by_xref) {
397            $query
398                ->orderBy(new Expression('LENGTH(s_id)'))
399                ->orderBy('s_id');
400        }
401
402        return $query;
403    }
404
405    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
406    {
407        $query = DB::table('media')
408            ->where('m_file', '=', $tree->id())
409            ->select(['m_gedcom', 'm_id']);
410
411        if ($sort_by_xref) {
412            $query
413                ->orderBy(new Expression('LENGTH(m_id)'))
414                ->orderBy('m_id');
415        }
416
417        return $query;
418    }
419
420    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
421    {
422        $query = DB::table('other')
423            ->where('o_file', '=', $tree->id())
424            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
425            ->select(['o_gedcom', 'o_id']);
426
427        if ($sort_by_xref) {
428            $query
429                ->orderBy('o_type')
430                ->orderBy(new Expression('LENGTH(o_id)'))
431                ->orderBy('o_id');
432        }
433
434        return $query;
435    }
436}
437