xref: /webtrees/app/Services/GedcomExportService.php (revision c344974e96e2ea1576815a443b99a00ffc322086)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2020 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\Cache;
24use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
25use Fisharebest\Webtrees\Factory;
26use Fisharebest\Webtrees\Gedcom;
27use Fisharebest\Webtrees\GedcomRecord;
28use Fisharebest\Webtrees\Header;
29use Fisharebest\Webtrees\Tree;
30use Fisharebest\Webtrees\Webtrees;
31use Illuminate\Database\Capsule\Manager as DB;
32use Illuminate\Database\Query\Builder;
33use Illuminate\Database\Query\Expression;
34use Illuminate\Support\Collection;
35
36use function app;
37use function assert;
38use function date;
39use function explode;
40use function fwrite;
41use function mb_convert_encoding;
42use function pathinfo;
43use function str_contains;
44use function str_starts_with;
45use function strpos;
46use function strtolower;
47use function strtoupper;
48use function utf8_decode;
49
50use const PATHINFO_EXTENSION;
51
52/**
53 * Export data in GEDCOM format
54 */
55class GedcomExportService
56{
57    /**
58     * Write GEDCOM data to a stream.
59     *
60     * @param Tree                    $tree         - Export data from this tree
61     * @param resource                $stream       - Write to this stream
62     * @param bool                    $sort_by_xref - Write GEDCOM records in XREF order
63     * @param string                  $encoding     - Convert from UTF-8 to other encoding
64     * @param int                     $access_level - Apply privacy filtering
65     * @param string                  $media_path   - Prepend path to media filenames
66     * @param Collection<string>|null $records      - Just export these records
67     */
68    public function export(
69        Tree $tree,
70        $stream,
71        bool $sort_by_xref = false,
72        string $encoding = 'UTF-8',
73        int $access_level = Auth::PRIV_HIDE,
74        string $media_path = '',
75        Collection $records = null
76    ): void {
77        if ($records instanceof Collection) {
78            // Export just these records - e.g. from clippings cart.
79            $data = [
80                new Collection([$this->createHeader($tree, $encoding, false)]),
81                $records,
82                new Collection(['0 TRLR']),
83            ];
84        } elseif ($access_level === Auth::PRIV_HIDE) {
85            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
86            $data = [
87                new Collection([$this->createHeader($tree, $encoding, true)]),
88                $this->individualQuery($tree, $sort_by_xref)->cursor(),
89                $this->familyQuery($tree, $sort_by_xref)->cursor(),
90                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
91                $this->otherQuery($tree, $sort_by_xref)->cursor(),
92                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
93                new Collection(['0 TRLR']),
94            ];
95        } else {
96            // Disable the pending changes before creating GEDCOM records.
97            $cache = app('cache.array');
98            assert($cache instanceof Cache);
99            $cache->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
100                return new Collection();
101            });
102
103            $data = [
104                new Collection([$this->createHeader($tree, $encoding, true)]),
105                $this->individualQuery($tree, $sort_by_xref)->get()->map(Factory::individual()->mapper($tree)),
106                $this->familyQuery($tree, $sort_by_xref)->get()->map(Factory::family()->mapper($tree)),
107                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Factory::source()->mapper($tree)),
108                $this->otherQuery($tree, $sort_by_xref)->get()->map(Factory::gedcomRecord()->mapper($tree)),
109                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Factory::media()->mapper($tree)),
110                new Collection(['0 TRLR']),
111            ];
112        }
113
114        foreach ($data as $rows) {
115            foreach ($rows as $datum) {
116                if (is_string($datum)) {
117                    $gedcom = $datum;
118                } elseif ($datum instanceof GedcomRecord) {
119                    $gedcom = $datum->privatizeGedcom($access_level);
120                } else {
121                    $gedcom =
122                        $datum->i_gedcom ??
123                        $datum->f_gedcom ??
124                        $datum->s_gedcom ??
125                        $datum->m_gedcom ??
126                        $datum->o_gedcom;
127                }
128
129                if ($media_path !== '') {
130                    $gedcom = $this->convertMediaPath($gedcom, $media_path);
131                }
132
133                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL;
134                $gedcom = $this->convertEncoding($encoding, $gedcom);
135
136                fwrite($stream, $gedcom);
137            }
138        }
139    }
140
141    /**
142     * Create a header record for a gedcom file.
143     *
144     * @param Tree   $tree
145     * @param string $encoding
146     * @param bool   $include_sub
147     *
148     * @return string
149     */
150    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
151    {
152        // Force a ".ged" suffix
153        $filename = $tree->name();
154
155        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
156            $filename .= '.ged';
157        }
158
159        // Build a new header record
160        $gedcom = '0 HEAD';
161        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
162        $gedcom .= "\n2 NAME " . Webtrees::NAME;
163        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
164        $gedcom .= "\n1 DEST DISKETTE";
165        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
166        $gedcom .= "\n2 TIME " . date('H:i:s');
167        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked";
168        $gedcom .= "\n1 CHAR " . $encoding;
169        $gedcom .= "\n1 FILE " . $filename;
170
171        // Preserve some values from the original header
172        $header = Factory::header()->make('HEAD', $tree) ?? Factory::header()->new('HEAD', '0 HEAD', null, $tree);
173
174        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
175            $gedcom .= "\n" . $fact->gedcom();
176        }
177
178        if ($include_sub) {
179            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
180                $gedcom .= "\n" . $fact->gedcom();
181            }
182        }
183
184        return $gedcom;
185    }
186
187    /**
188     * Prepend a media path, such as might have been removed during import.
189     *
190     * @param string $gedcom
191     * @param string $media_path
192     *
193     * @return string
194     */
195    private function convertMediaPath(string $gedcom, string $media_path): string
196    {
197        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
198            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
199                $filename = $match[1];
200
201                // Convert separators to match new path.
202                if (str_contains($media_path, '\\')) {
203                    $filename = strtr($filename, ['/' => '\\']);
204                }
205
206                if (!str_starts_with($filename, $media_path)) {
207                    return $media_path . $filename;
208                }
209
210                return $filename;
211            }, $gedcom);
212        }
213
214        return $gedcom;
215    }
216
217    /**
218     * @param string $encoding
219     * @param string $gedcom
220     *
221     * @return string
222     */
223    private function convertEncoding(string $encoding, string $gedcom): string
224    {
225        switch ($encoding) {
226            case 'ANSI':
227                // Many desktop applications interpret ANSI as ISO-8859-1
228                return utf8_decode($gedcom);
229
230            case 'ANSEL':
231                // coming soon...?
232            case 'ASCII':
233                // Might be needed by really old software?
234                return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII');
235
236            default:
237                return $gedcom;
238        }
239    }
240
241    /**
242     * Wrap long lines using concatenation records.
243     *
244     * @param string $gedcom
245     * @param int    $max_line_length
246     *
247     * @return string
248     */
249    public function wrapLongLines(string $gedcom, int $max_line_length): string
250    {
251        $lines = [];
252
253        foreach (explode("\n", $gedcom) as $line) {
254            // Split long lines
255            // The total length of a GEDCOM line, including level number, cross-reference number,
256            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
257            if (mb_strlen($line) > $max_line_length) {
258                [$level, $tag] = explode(' ', $line, 3);
259                if ($tag !== 'CONT') {
260                    $level++;
261                }
262                do {
263                    // Split after $pos chars
264                    $pos = $max_line_length;
265                    // Split on a non-space (standard gedcom behavior)
266                    while (mb_substr($line, $pos - 1, 1) === ' ') {
267                        --$pos;
268                    }
269                    if ($pos === strpos($line, ' ', 3)) {
270                        // No non-spaces in the data! Can’t split it :-(
271                        break;
272                    }
273                    $lines[] = mb_substr($line, 0, $pos);
274                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
275                } while (mb_strlen($line) > $max_line_length);
276            }
277            $lines[] = $line;
278        }
279
280        return implode(Gedcom::EOL, $lines);
281    }
282
283    /**
284     * @param Tree $tree
285     * @param bool $sort_by_xref
286     *
287     * @return Builder
288     */
289    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
290    {
291        $query = DB::table('families')
292            ->where('f_file', '=', $tree->id())
293            ->select(['f_gedcom', 'f_id']);
294
295
296        if ($sort_by_xref) {
297            $query
298                ->orderBy(new Expression('LENGTH(f_id)'))
299                ->orderBy('f_id');
300        }
301
302        return $query;
303    }
304
305    /**
306     * @param Tree $tree
307     * @param bool $sort_by_xref
308     *
309     * @return Builder
310     */
311    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
312    {
313        $query = DB::table('individuals')
314            ->where('i_file', '=', $tree->id())
315            ->select(['i_gedcom', 'i_id']);
316
317        if ($sort_by_xref) {
318            $query
319                ->orderBy(new Expression('LENGTH(i_id)'))
320                ->orderBy('i_id');
321        }
322
323        return $query;
324    }
325
326    /**
327     * @param Tree $tree
328     * @param bool $sort_by_xref
329     *
330     * @return Builder
331     */
332    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
333    {
334        $query = DB::table('sources')
335            ->where('s_file', '=', $tree->id())
336            ->select(['s_gedcom', 's_id']);
337
338        if ($sort_by_xref) {
339            $query
340                ->orderBy(new Expression('LENGTH(s_id)'))
341                ->orderBy('s_id');
342        }
343
344        return $query;
345    }
346
347    /**
348     * @param Tree $tree
349     * @param bool $sort_by_xref
350     *
351     * @return Builder
352     */
353    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
354    {
355        $query = DB::table('media')
356            ->where('m_file', '=', $tree->id())
357            ->select(['m_gedcom', 'm_id']);
358
359        if ($sort_by_xref) {
360            $query
361                ->orderBy(new Expression('LENGTH(m_id)'))
362                ->orderBy('m_id');
363        }
364
365        return $query;
366    }
367
368    /**
369     * @param Tree $tree
370     * @param bool $sort_by_xref
371     *
372     * @return Builder
373     */
374    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
375    {
376        $query = DB::table('other')
377            ->where('o_file', '=', $tree->id())
378            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
379            ->select(['o_gedcom', 'o_id']);
380
381        if ($sort_by_xref) {
382            $query
383                ->orderBy('o_type')
384                ->orderBy(new Expression('LENGTH(o_id)'))
385                ->orderBy('o_id');
386        }
387
388        return $query;
389    }
390}
391