xref: /webtrees/app/Services/GedcomExportService.php (revision bbf511c3d1d7c8ffcf60c229ed2890633fc80235)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
24use Fisharebest\Webtrees\Registry;
25use Fisharebest\Webtrees\Gedcom;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Header;
28use Fisharebest\Webtrees\Tree;
29use Fisharebest\Webtrees\Webtrees;
30use Illuminate\Database\Capsule\Manager as DB;
31use Illuminate\Database\Query\Builder;
32use Illuminate\Database\Query\Expression;
33use Illuminate\Support\Collection;
34use RuntimeException;
35
36use function date;
37use function explode;
38use function fwrite;
39use function mb_convert_encoding;
40use function pathinfo;
41use function str_contains;
42use function str_starts_with;
43use function strlen;
44use function strpos;
45use function strtolower;
46use function strtoupper;
47use function utf8_decode;
48
49use const PATHINFO_EXTENSION;
50
51/**
52 * Export data in GEDCOM format
53 */
54class GedcomExportService
55{
56    /**
57     * Write GEDCOM data to a stream.
58     *
59     * @param Tree                    $tree         - Export data from this tree
60     * @param resource                $stream       - Write to this stream
61     * @param bool                    $sort_by_xref - Write GEDCOM records in XREF order
62     * @param string                  $encoding     - Convert from UTF-8 to other encoding
63     * @param int                     $access_level - Apply privacy filtering
64     * @param string                  $media_path   - Prepend path to media filenames
65     * @param Collection<string>|null $records      - Just export these records
66     */
67    public function export(
68        Tree $tree,
69        $stream,
70        bool $sort_by_xref = false,
71        string $encoding = 'UTF-8',
72        int $access_level = Auth::PRIV_HIDE,
73        string $media_path = '',
74        Collection $records = null
75    ): void {
76        if ($records instanceof Collection) {
77            // Export just these records - e.g. from clippings cart.
78            $data = [
79                new Collection([$this->createHeader($tree, $encoding, false)]),
80                $records,
81                new Collection(['0 TRLR']),
82            ];
83        } elseif ($access_level === Auth::PRIV_HIDE) {
84            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
85            $data = [
86                new Collection([$this->createHeader($tree, $encoding, true)]),
87                $this->individualQuery($tree, $sort_by_xref)->cursor(),
88                $this->familyQuery($tree, $sort_by_xref)->cursor(),
89                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
90                $this->otherQuery($tree, $sort_by_xref)->cursor(),
91                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
92                new Collection(['0 TRLR']),
93            ];
94        } else {
95            // Disable the pending changes before creating GEDCOM records.
96            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
97                return new Collection();
98            });
99
100            $data = [
101                new Collection([$this->createHeader($tree, $encoding, true)]),
102                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
103                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
104                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
105                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
106                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
107                new Collection(['0 TRLR']),
108            ];
109        }
110
111        foreach ($data as $rows) {
112            foreach ($rows as $datum) {
113                if (is_string($datum)) {
114                    $gedcom = $datum;
115                } elseif ($datum instanceof GedcomRecord) {
116                    $gedcom = $datum->privatizeGedcom($access_level);
117                } else {
118                    $gedcom =
119                        $datum->i_gedcom ??
120                        $datum->f_gedcom ??
121                        $datum->s_gedcom ??
122                        $datum->m_gedcom ??
123                        $datum->o_gedcom;
124                }
125
126                if ($media_path !== '') {
127                    $gedcom = $this->convertMediaPath($gedcom, $media_path);
128                }
129
130                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL;
131                $gedcom = $this->convertEncoding($encoding, $gedcom);
132
133                $bytes_written = fwrite($stream, $gedcom);
134
135                if ($bytes_written !== strlen($gedcom)) {
136                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
137                }
138            }
139        }
140    }
141
142    /**
143     * Create a header record for a gedcom file.
144     *
145     * @param Tree   $tree
146     * @param string $encoding
147     * @param bool   $include_sub
148     *
149     * @return string
150     */
151    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
152    {
153        // Force a ".ged" suffix
154        $filename = $tree->name();
155
156        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
157            $filename .= '.ged';
158        }
159
160        // Build a new header record
161        $gedcom = '0 HEAD';
162        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
163        $gedcom .= "\n2 NAME " . Webtrees::NAME;
164        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
165        $gedcom .= "\n1 DEST DISKETTE";
166        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
167        $gedcom .= "\n2 TIME " . date('H:i:s');
168        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
169        $gedcom .= "\n1 CHAR " . $encoding;
170        $gedcom .= "\n1 FILE " . $filename;
171
172        // Preserve some values from the original header
173        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
174
175        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
176            $gedcom .= "\n" . $fact->gedcom();
177        }
178
179        if ($include_sub) {
180            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
181                $gedcom .= "\n" . $fact->gedcom();
182            }
183        }
184
185        return $gedcom;
186    }
187
188    /**
189     * Prepend a media path, such as might have been removed during import.
190     *
191     * @param string $gedcom
192     * @param string $media_path
193     *
194     * @return string
195     */
196    private function convertMediaPath(string $gedcom, string $media_path): string
197    {
198        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
199            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
200                $filename = $match[1];
201
202                // Don’t modify external links
203                if (!str_contains($filename, '://')) {
204                    // Convert separators to match new path.
205                    if (str_contains($media_path, '\\')) {
206                        $filename = strtr($filename, ['/' => '\\']);
207                    }
208
209                    if (!str_starts_with($filename, $media_path)) {
210                        $filename = $media_path . $filename;
211                    }
212                }
213
214                return "\n1 FILE " . $filename;
215            }, $gedcom);
216        }
217
218        return $gedcom;
219    }
220
221    /**
222     * @param string $encoding
223     * @param string $gedcom
224     *
225     * @return string
226     */
227    private function convertEncoding(string $encoding, string $gedcom): string
228    {
229        switch ($encoding) {
230            case 'ANSI':
231                // Many desktop applications interpret ANSI as ISO-8859-1
232                return utf8_decode($gedcom);
233
234            case 'ANSEL':
235                // coming soon...?
236            case 'ASCII':
237                // Might be needed by really old software?
238                return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII');
239
240            default:
241                return $gedcom;
242        }
243    }
244
245    /**
246     * Wrap long lines using concatenation records.
247     *
248     * @param string $gedcom
249     * @param int    $max_line_length
250     *
251     * @return string
252     */
253    public function wrapLongLines(string $gedcom, int $max_line_length): string
254    {
255        $lines = [];
256
257        foreach (explode("\n", $gedcom) as $line) {
258            // Split long lines
259            // The total length of a GEDCOM line, including level number, cross-reference number,
260            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
261            if (mb_strlen($line) > $max_line_length) {
262                [$level, $tag] = explode(' ', $line, 3);
263                if ($tag !== 'CONT') {
264                    $level++;
265                }
266                do {
267                    // Split after $pos chars
268                    $pos = $max_line_length;
269                    // Split on a non-space (standard gedcom behavior)
270                    while (mb_substr($line, $pos - 1, 1) === ' ') {
271                        --$pos;
272                    }
273                    if ($pos === strpos($line, ' ', 3)) {
274                        // No non-spaces in the data! Can’t split it :-(
275                        break;
276                    }
277                    $lines[] = mb_substr($line, 0, $pos);
278                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
279                } while (mb_strlen($line) > $max_line_length);
280            }
281            $lines[] = $line;
282        }
283
284        return implode(Gedcom::EOL, $lines);
285    }
286
287    /**
288     * @param Tree $tree
289     * @param bool $sort_by_xref
290     *
291     * @return Builder
292     */
293    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
294    {
295        $query = DB::table('families')
296            ->where('f_file', '=', $tree->id())
297            ->select(['f_gedcom', 'f_id']);
298
299
300        if ($sort_by_xref) {
301            $query
302                ->orderBy(new Expression('LENGTH(f_id)'))
303                ->orderBy('f_id');
304        }
305
306        return $query;
307    }
308
309    /**
310     * @param Tree $tree
311     * @param bool $sort_by_xref
312     *
313     * @return Builder
314     */
315    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
316    {
317        $query = DB::table('individuals')
318            ->where('i_file', '=', $tree->id())
319            ->select(['i_gedcom', 'i_id']);
320
321        if ($sort_by_xref) {
322            $query
323                ->orderBy(new Expression('LENGTH(i_id)'))
324                ->orderBy('i_id');
325        }
326
327        return $query;
328    }
329
330    /**
331     * @param Tree $tree
332     * @param bool $sort_by_xref
333     *
334     * @return Builder
335     */
336    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
337    {
338        $query = DB::table('sources')
339            ->where('s_file', '=', $tree->id())
340            ->select(['s_gedcom', 's_id']);
341
342        if ($sort_by_xref) {
343            $query
344                ->orderBy(new Expression('LENGTH(s_id)'))
345                ->orderBy('s_id');
346        }
347
348        return $query;
349    }
350
351    /**
352     * @param Tree $tree
353     * @param bool $sort_by_xref
354     *
355     * @return Builder
356     */
357    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
358    {
359        $query = DB::table('media')
360            ->where('m_file', '=', $tree->id())
361            ->select(['m_gedcom', 'm_id']);
362
363        if ($sort_by_xref) {
364            $query
365                ->orderBy(new Expression('LENGTH(m_id)'))
366                ->orderBy('m_id');
367        }
368
369        return $query;
370    }
371
372    /**
373     * @param Tree $tree
374     * @param bool $sort_by_xref
375     *
376     * @return Builder
377     */
378    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
379    {
380        $query = DB::table('other')
381            ->where('o_file', '=', $tree->id())
382            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
383            ->select(['o_gedcom', 'o_id']);
384
385        if ($sort_by_xref) {
386            $query
387                ->orderBy('o_type')
388                ->orderBy(new Expression('LENGTH(o_id)'))
389                ->orderBy('o_id');
390        }
391
392        return $query;
393    }
394}
395