xref: /webtrees/app/Services/GedcomExportService.php (revision d1a8402e9ca2ba8e58bd30cfd5a35f2d81ec931b)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
24use Fisharebest\Webtrees\Registry;
25use Fisharebest\Webtrees\Gedcom;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Header;
28use Fisharebest\Webtrees\Tree;
29use Fisharebest\Webtrees\Webtrees;
30use Illuminate\Database\Capsule\Manager as DB;
31use Illuminate\Database\Query\Builder;
32use Illuminate\Database\Query\Expression;
33use Illuminate\Support\Collection;
34use RuntimeException;
35
36use function date;
37use function explode;
38use function fopen;
39use function fwrite;
40use function mb_convert_encoding;
41use function pathinfo;
42use function rewind;
43use function str_contains;
44use function str_starts_with;
45use function strlen;
46use function strpos;
47use function strtolower;
48use function strtoupper;
49use function utf8_decode;
50
51use const PATHINFO_EXTENSION;
52
53/**
54 * Export data in GEDCOM format
55 */
56class GedcomExportService
57{
58    /**
59     * Write GEDCOM data to a stream.
60     *
61     * @param Tree                    $tree         - Export data from this tree
62     * @param bool                    $sort_by_xref - Write GEDCOM records in XREF order
63     * @param string                  $encoding     - Convert from UTF-8 to other encoding
64     * @param int                     $access_level - Apply privacy filtering
65     * @param string                  $media_path   - Prepend path to media filenames
66     * @param Collection<string>|null $records      - Just export these records
67     *
68     * @return resource
69     */
70    public function export(
71        Tree $tree,
72        bool $sort_by_xref = false,
73        string $encoding = 'UTF-8',
74        int $access_level = Auth::PRIV_HIDE,
75        string $media_path = '',
76        Collection $records = null
77    ) {
78        $stream = fopen('php://memory', 'wb+');
79
80        if ($stream === false) {
81            throw new RuntimeException('Failed to create temporary stream');
82        }
83
84        if ($records instanceof Collection) {
85            // Export just these records - e.g. from clippings cart.
86            $data = [
87                new Collection([$this->createHeader($tree, $encoding, false)]),
88                $records,
89                new Collection(['0 TRLR']),
90            ];
91        } elseif ($access_level === Auth::PRIV_HIDE) {
92            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
93            $data = [
94                new Collection([$this->createHeader($tree, $encoding, true)]),
95                $this->individualQuery($tree, $sort_by_xref)->cursor(),
96                $this->familyQuery($tree, $sort_by_xref)->cursor(),
97                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
98                $this->otherQuery($tree, $sort_by_xref)->cursor(),
99                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
100                new Collection(['0 TRLR']),
101            ];
102        } else {
103            // Disable the pending changes before creating GEDCOM records.
104            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
105                return new Collection();
106            });
107
108            $data = [
109                new Collection([$this->createHeader($tree, $encoding, true)]),
110                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
111                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
112                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
113                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
114                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
115                new Collection(['0 TRLR']),
116            ];
117        }
118
119        foreach ($data as $rows) {
120            foreach ($rows as $datum) {
121                if (is_string($datum)) {
122                    $gedcom = $datum;
123                } elseif ($datum instanceof GedcomRecord) {
124                    $gedcom = $datum->privatizeGedcom($access_level);
125                } else {
126                    $gedcom =
127                        $datum->i_gedcom ??
128                        $datum->f_gedcom ??
129                        $datum->s_gedcom ??
130                        $datum->m_gedcom ??
131                        $datum->o_gedcom;
132                }
133
134                if ($media_path !== '') {
135                    $gedcom = $this->convertMediaPath($gedcom, $media_path);
136                }
137
138                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL;
139                $gedcom = $this->convertEncoding($encoding, $gedcom);
140
141                $bytes_written = fwrite($stream, $gedcom);
142
143                if ($bytes_written !== strlen($gedcom)) {
144                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
145                }
146            }
147        }
148
149        if (rewind($stream) === false) {
150            throw new RuntimeException('Cannot rewind temporary stream');
151        }
152
153        return $stream;
154    }
155
156    /**
157     * Create a header record for a gedcom file.
158     *
159     * @param Tree   $tree
160     * @param string $encoding
161     * @param bool   $include_sub
162     *
163     * @return string
164     */
165    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
166    {
167        // Force a ".ged" suffix
168        $filename = $tree->name();
169
170        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
171            $filename .= '.ged';
172        }
173
174        // Build a new header record
175        $gedcom = '0 HEAD';
176        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
177        $gedcom .= "\n2 NAME " . Webtrees::NAME;
178        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
179        $gedcom .= "\n1 DEST DISKETTE";
180        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
181        $gedcom .= "\n2 TIME " . date('H:i:s');
182        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
183        $gedcom .= "\n1 CHAR " . $encoding;
184        $gedcom .= "\n1 FILE " . $filename;
185
186        // Preserve some values from the original header
187        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
188
189        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
190            $gedcom .= "\n" . $fact->gedcom();
191        }
192
193        if ($include_sub) {
194            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
195                $gedcom .= "\n" . $fact->gedcom();
196            }
197        }
198
199        return $gedcom;
200    }
201
202    /**
203     * Prepend a media path, such as might have been removed during import.
204     *
205     * @param string $gedcom
206     * @param string $media_path
207     *
208     * @return string
209     */
210    private function convertMediaPath(string $gedcom, string $media_path): string
211    {
212        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
213            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
214                $filename = $match[1];
215
216                // Don’t modify external links
217                if (!str_contains($filename, '://')) {
218                    // Convert separators to match new path.
219                    if (str_contains($media_path, '\\')) {
220                        $filename = strtr($filename, ['/' => '\\']);
221                    }
222
223                    if (!str_starts_with($filename, $media_path)) {
224                        $filename = $media_path . $filename;
225                    }
226                }
227
228                return "\n1 FILE " . $filename;
229            }, $gedcom);
230        }
231
232        return $gedcom;
233    }
234
235    /**
236     * @param string $encoding
237     * @param string $gedcom
238     *
239     * @return string
240     */
241    private function convertEncoding(string $encoding, string $gedcom): string
242    {
243        switch ($encoding) {
244            case 'ANSI':
245                // Many desktop applications interpret ANSI as ISO-8859-1
246                return utf8_decode($gedcom);
247
248            case 'ANSEL':
249                // coming soon...?
250            case 'ASCII':
251                // Might be needed by really old software?
252                return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII');
253
254            default:
255                return $gedcom;
256        }
257    }
258
259    /**
260     * Wrap long lines using concatenation records.
261     *
262     * @param string $gedcom
263     * @param int    $max_line_length
264     *
265     * @return string
266     */
267    public function wrapLongLines(string $gedcom, int $max_line_length): string
268    {
269        $lines = [];
270
271        foreach (explode("\n", $gedcom) as $line) {
272            // Split long lines
273            // The total length of a GEDCOM line, including level number, cross-reference number,
274            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
275            if (mb_strlen($line) > $max_line_length) {
276                [$level, $tag] = explode(' ', $line, 3);
277                if ($tag !== 'CONT') {
278                    $level++;
279                }
280                do {
281                    // Split after $pos chars
282                    $pos = $max_line_length;
283                    // Split on a non-space (standard gedcom behavior)
284                    while (mb_substr($line, $pos - 1, 1) === ' ') {
285                        --$pos;
286                    }
287                    if ($pos === strpos($line, ' ', 3)) {
288                        // No non-spaces in the data! Can’t split it :-(
289                        break;
290                    }
291                    $lines[] = mb_substr($line, 0, $pos);
292                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
293                } while (mb_strlen($line) > $max_line_length);
294            }
295            $lines[] = $line;
296        }
297
298        return implode(Gedcom::EOL, $lines);
299    }
300
301    /**
302     * @param Tree $tree
303     * @param bool $sort_by_xref
304     *
305     * @return Builder
306     */
307    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
308    {
309        $query = DB::table('families')
310            ->where('f_file', '=', $tree->id())
311            ->select(['f_gedcom', 'f_id']);
312
313
314        if ($sort_by_xref) {
315            $query
316                ->orderBy(new Expression('LENGTH(f_id)'))
317                ->orderBy('f_id');
318        }
319
320        return $query;
321    }
322
323    /**
324     * @param Tree $tree
325     * @param bool $sort_by_xref
326     *
327     * @return Builder
328     */
329    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
330    {
331        $query = DB::table('individuals')
332            ->where('i_file', '=', $tree->id())
333            ->select(['i_gedcom', 'i_id']);
334
335        if ($sort_by_xref) {
336            $query
337                ->orderBy(new Expression('LENGTH(i_id)'))
338                ->orderBy('i_id');
339        }
340
341        return $query;
342    }
343
344    /**
345     * @param Tree $tree
346     * @param bool $sort_by_xref
347     *
348     * @return Builder
349     */
350    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
351    {
352        $query = DB::table('sources')
353            ->where('s_file', '=', $tree->id())
354            ->select(['s_gedcom', 's_id']);
355
356        if ($sort_by_xref) {
357            $query
358                ->orderBy(new Expression('LENGTH(s_id)'))
359                ->orderBy('s_id');
360        }
361
362        return $query;
363    }
364
365    /**
366     * @param Tree $tree
367     * @param bool $sort_by_xref
368     *
369     * @return Builder
370     */
371    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
372    {
373        $query = DB::table('media')
374            ->where('m_file', '=', $tree->id())
375            ->select(['m_gedcom', 'm_id']);
376
377        if ($sort_by_xref) {
378            $query
379                ->orderBy(new Expression('LENGTH(m_id)'))
380                ->orderBy('m_id');
381        }
382
383        return $query;
384    }
385
386    /**
387     * @param Tree $tree
388     * @param bool $sort_by_xref
389     *
390     * @return Builder
391     */
392    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
393    {
394        $query = DB::table('other')
395            ->where('o_file', '=', $tree->id())
396            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
397            ->select(['o_gedcom', 'o_id']);
398
399        if ($sort_by_xref) {
400            $query
401                ->orderBy('o_type')
402                ->orderBy(new Expression('LENGTH(o_id)'))
403                ->orderBy('o_id');
404        }
405
406        return $query;
407    }
408}
409