xref: /webtrees/app/Services/GedcomExportService.php (revision c43c166ecb395e5c9f57dc8e20d9f6f7a27bf410)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2020 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
24use Fisharebest\Webtrees\Registry;
25use Fisharebest\Webtrees\Gedcom;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Header;
28use Fisharebest\Webtrees\Tree;
29use Fisharebest\Webtrees\Webtrees;
30use Illuminate\Database\Capsule\Manager as DB;
31use Illuminate\Database\Query\Builder;
32use Illuminate\Database\Query\Expression;
33use Illuminate\Support\Collection;
34
35use function date;
36use function explode;
37use function fwrite;
38use function mb_convert_encoding;
39use function pathinfo;
40use function str_contains;
41use function str_starts_with;
42use function strpos;
43use function strtolower;
44use function strtoupper;
45use function utf8_decode;
46
47use const PATHINFO_EXTENSION;
48
49/**
50 * Export data in GEDCOM format
51 */
52class GedcomExportService
53{
54    /**
55     * Write GEDCOM data to a stream.
56     *
57     * @param Tree                    $tree         - Export data from this tree
58     * @param resource                $stream       - Write to this stream
59     * @param bool                    $sort_by_xref - Write GEDCOM records in XREF order
60     * @param string                  $encoding     - Convert from UTF-8 to other encoding
61     * @param int                     $access_level - Apply privacy filtering
62     * @param string                  $media_path   - Prepend path to media filenames
63     * @param Collection<string>|null $records      - Just export these records
64     */
65    public function export(
66        Tree $tree,
67        $stream,
68        bool $sort_by_xref = false,
69        string $encoding = 'UTF-8',
70        int $access_level = Auth::PRIV_HIDE,
71        string $media_path = '',
72        Collection $records = null
73    ): void {
74        if ($records instanceof Collection) {
75            // Export just these records - e.g. from clippings cart.
76            $data = [
77                new Collection([$this->createHeader($tree, $encoding, false)]),
78                $records,
79                new Collection(['0 TRLR']),
80            ];
81        } elseif ($access_level === Auth::PRIV_HIDE) {
82            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
83            $data = [
84                new Collection([$this->createHeader($tree, $encoding, true)]),
85                $this->individualQuery($tree, $sort_by_xref)->cursor(),
86                $this->familyQuery($tree, $sort_by_xref)->cursor(),
87                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
88                $this->otherQuery($tree, $sort_by_xref)->cursor(),
89                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
90                new Collection(['0 TRLR']),
91            ];
92        } else {
93            // Disable the pending changes before creating GEDCOM records.
94            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
95                return new Collection();
96            });
97
98            $data = [
99                new Collection([$this->createHeader($tree, $encoding, true)]),
100                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
101                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
102                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
103                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
104                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
105                new Collection(['0 TRLR']),
106            ];
107        }
108
109        foreach ($data as $rows) {
110            foreach ($rows as $datum) {
111                if (is_string($datum)) {
112                    $gedcom = $datum;
113                } elseif ($datum instanceof GedcomRecord) {
114                    $gedcom = $datum->privatizeGedcom($access_level);
115                } else {
116                    $gedcom =
117                        $datum->i_gedcom ??
118                        $datum->f_gedcom ??
119                        $datum->s_gedcom ??
120                        $datum->m_gedcom ??
121                        $datum->o_gedcom;
122                }
123
124                if ($media_path !== '') {
125                    $gedcom = $this->convertMediaPath($gedcom, $media_path);
126                }
127
128                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL;
129                $gedcom = $this->convertEncoding($encoding, $gedcom);
130
131                fwrite($stream, $gedcom);
132            }
133        }
134    }
135
136    /**
137     * Create a header record for a gedcom file.
138     *
139     * @param Tree   $tree
140     * @param string $encoding
141     * @param bool   $include_sub
142     *
143     * @return string
144     */
145    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
146    {
147        // Force a ".ged" suffix
148        $filename = $tree->name();
149
150        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
151            $filename .= '.ged';
152        }
153
154        // Build a new header record
155        $gedcom = '0 HEAD';
156        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
157        $gedcom .= "\n2 NAME " . Webtrees::NAME;
158        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
159        $gedcom .= "\n1 DEST DISKETTE";
160        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
161        $gedcom .= "\n2 TIME " . date('H:i:s');
162        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked";
163        $gedcom .= "\n1 CHAR " . $encoding;
164        $gedcom .= "\n1 FILE " . $filename;
165
166        // Preserve some values from the original header
167        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
168
169        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
170            $gedcom .= "\n" . $fact->gedcom();
171        }
172
173        if ($include_sub) {
174            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
175                $gedcom .= "\n" . $fact->gedcom();
176            }
177        }
178
179        return $gedcom;
180    }
181
182    /**
183     * Prepend a media path, such as might have been removed during import.
184     *
185     * @param string $gedcom
186     * @param string $media_path
187     *
188     * @return string
189     */
190    private function convertMediaPath(string $gedcom, string $media_path): string
191    {
192        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
193            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
194                $filename = $match[1];
195
196                // Convert separators to match new path.
197                if (str_contains($media_path, '\\')) {
198                    $filename = strtr($filename, ['/' => '\\']);
199                }
200
201                if (!str_starts_with($filename, $media_path)) {
202                    return $media_path . $filename;
203                }
204
205                return $filename;
206            }, $gedcom);
207        }
208
209        return $gedcom;
210    }
211
212    /**
213     * @param string $encoding
214     * @param string $gedcom
215     *
216     * @return string
217     */
218    private function convertEncoding(string $encoding, string $gedcom): string
219    {
220        switch ($encoding) {
221            case 'ANSI':
222                // Many desktop applications interpret ANSI as ISO-8859-1
223                return utf8_decode($gedcom);
224
225            case 'ANSEL':
226                // coming soon...?
227            case 'ASCII':
228                // Might be needed by really old software?
229                return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII');
230
231            default:
232                return $gedcom;
233        }
234    }
235
236    /**
237     * Wrap long lines using concatenation records.
238     *
239     * @param string $gedcom
240     * @param int    $max_line_length
241     *
242     * @return string
243     */
244    public function wrapLongLines(string $gedcom, int $max_line_length): string
245    {
246        $lines = [];
247
248        foreach (explode("\n", $gedcom) as $line) {
249            // Split long lines
250            // The total length of a GEDCOM line, including level number, cross-reference number,
251            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
252            if (mb_strlen($line) > $max_line_length) {
253                [$level, $tag] = explode(' ', $line, 3);
254                if ($tag !== 'CONT') {
255                    $level++;
256                }
257                do {
258                    // Split after $pos chars
259                    $pos = $max_line_length;
260                    // Split on a non-space (standard gedcom behavior)
261                    while (mb_substr($line, $pos - 1, 1) === ' ') {
262                        --$pos;
263                    }
264                    if ($pos === strpos($line, ' ', 3)) {
265                        // No non-spaces in the data! Can’t split it :-(
266                        break;
267                    }
268                    $lines[] = mb_substr($line, 0, $pos);
269                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
270                } while (mb_strlen($line) > $max_line_length);
271            }
272            $lines[] = $line;
273        }
274
275        return implode(Gedcom::EOL, $lines);
276    }
277
278    /**
279     * @param Tree $tree
280     * @param bool $sort_by_xref
281     *
282     * @return Builder
283     */
284    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
285    {
286        $query = DB::table('families')
287            ->where('f_file', '=', $tree->id())
288            ->select(['f_gedcom', 'f_id']);
289
290
291        if ($sort_by_xref) {
292            $query
293                ->orderBy(new Expression('LENGTH(f_id)'))
294                ->orderBy('f_id');
295        }
296
297        return $query;
298    }
299
300    /**
301     * @param Tree $tree
302     * @param bool $sort_by_xref
303     *
304     * @return Builder
305     */
306    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
307    {
308        $query = DB::table('individuals')
309            ->where('i_file', '=', $tree->id())
310            ->select(['i_gedcom', 'i_id']);
311
312        if ($sort_by_xref) {
313            $query
314                ->orderBy(new Expression('LENGTH(i_id)'))
315                ->orderBy('i_id');
316        }
317
318        return $query;
319    }
320
321    /**
322     * @param Tree $tree
323     * @param bool $sort_by_xref
324     *
325     * @return Builder
326     */
327    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
328    {
329        $query = DB::table('sources')
330            ->where('s_file', '=', $tree->id())
331            ->select(['s_gedcom', 's_id']);
332
333        if ($sort_by_xref) {
334            $query
335                ->orderBy(new Expression('LENGTH(s_id)'))
336                ->orderBy('s_id');
337        }
338
339        return $query;
340    }
341
342    /**
343     * @param Tree $tree
344     * @param bool $sort_by_xref
345     *
346     * @return Builder
347     */
348    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
349    {
350        $query = DB::table('media')
351            ->where('m_file', '=', $tree->id())
352            ->select(['m_gedcom', 'm_id']);
353
354        if ($sort_by_xref) {
355            $query
356                ->orderBy(new Expression('LENGTH(m_id)'))
357                ->orderBy('m_id');
358        }
359
360        return $query;
361    }
362
363    /**
364     * @param Tree $tree
365     * @param bool $sort_by_xref
366     *
367     * @return Builder
368     */
369    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
370    {
371        $query = DB::table('other')
372            ->where('o_file', '=', $tree->id())
373            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
374            ->select(['o_gedcom', 'o_id']);
375
376        if ($sort_by_xref) {
377            $query
378                ->orderBy('o_type')
379                ->orderBy(new Expression('LENGTH(o_id)'))
380                ->orderBy('o_id');
381        }
382
383        return $query;
384    }
385}
386