xref: /webtrees/app/Services/GedcomExportService.php (revision c5b48766684db09f7f8372612300a247ec6929e4)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\Encodings\UTF16BE;
24use Fisharebest\Webtrees\Encodings\UTF16LE;
25use Fisharebest\Webtrees\Encodings\UTF8;
26use Fisharebest\Webtrees\Encodings\Windows1252;
27use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
28use Fisharebest\Webtrees\Gedcom;
29use Fisharebest\Webtrees\GedcomFilters\GedcomEncodingFilter;
30use Fisharebest\Webtrees\GedcomRecord;
31use Fisharebest\Webtrees\Header;
32use Fisharebest\Webtrees\Registry;
33use Fisharebest\Webtrees\Tree;
34use Fisharebest\Webtrees\Webtrees;
35use Illuminate\Database\Capsule\Manager as DB;
36use Illuminate\Database\Query\Builder;
37use Illuminate\Database\Query\Expression;
38use Illuminate\Support\Collection;
39use RuntimeException;
40
41use function date;
42use function explode;
43use function fopen;
44use function fwrite;
45use function pathinfo;
46use function rewind;
47use function str_contains;
48use function str_starts_with;
49use function stream_filter_append;
50use function strlen;
51use function strpos;
52use function strtolower;
53use function strtoupper;
54
55use const PATHINFO_EXTENSION;
56use const STREAM_FILTER_WRITE;
57
58/**
59 * Export data in GEDCOM format
60 */
61class GedcomExportService
62{
63    /**
64     * Write GEDCOM data to a stream.
65     *
66     * @param Tree                        $tree         - Export data from this tree
67     * @param bool                        $sort_by_xref - Write GEDCOM records in XREF order
68     * @param string                      $encoding     - Convert from UTF-8 to other encoding
69     * @param int                         $access_level - Apply privacy filtering
70     * @param string                      $media_path   - Prepend path to media filenames
71     * @param string                      $line_endings - CRLF or LF
72     * @param Collection<int,string>|null $records      - Just export these records
73     *
74     * @return resource
75     */
76    public function export(
77        Tree $tree,
78        bool $sort_by_xref = false,
79        string $encoding = UTF8::NAME,
80        int $access_level = Auth::PRIV_HIDE,
81        string $media_path = '',
82        string $line_endings = 'CRLF',
83        Collection $records = null
84    ) {
85        $stream = fopen('php://memory', 'wb+');
86
87        if ($stream === false) {
88            throw new RuntimeException('Failed to create temporary stream');
89        }
90
91        stream_filter_append($stream, GedcomEncodingFilter::class, STREAM_FILTER_WRITE, ['src_encoding' => UTF8::NAME, 'dst_encoding' => $encoding]);
92
93        if ($records instanceof Collection) {
94            // Export just these records - e.g. from clippings cart.
95            $data = [
96                new Collection([$this->createHeader($tree, $encoding, false)]),
97                $records,
98                new Collection(['0 TRLR']),
99            ];
100        } elseif ($access_level === Auth::PRIV_HIDE) {
101            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
102            $data = [
103                new Collection([$this->createHeader($tree, $encoding, true)]),
104                $this->individualQuery($tree, $sort_by_xref)->cursor(),
105                $this->familyQuery($tree, $sort_by_xref)->cursor(),
106                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
107                $this->otherQuery($tree, $sort_by_xref)->cursor(),
108                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
109                new Collection(['0 TRLR']),
110            ];
111        } else {
112            // Disable the pending changes before creating GEDCOM records.
113            Registry::cache()->array()->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
114                return new Collection();
115            });
116
117            $data = [
118                new Collection([$this->createHeader($tree, $encoding, true)]),
119                $this->individualQuery($tree, $sort_by_xref)->get()->map(Registry::individualFactory()->mapper($tree)),
120                $this->familyQuery($tree, $sort_by_xref)->get()->map(Registry::familyFactory()->mapper($tree)),
121                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Registry::sourceFactory()->mapper($tree)),
122                $this->otherQuery($tree, $sort_by_xref)->get()->map(Registry::gedcomRecordFactory()->mapper($tree)),
123                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Registry::mediaFactory()->mapper($tree)),
124                new Collection(['0 TRLR']),
125            ];
126        }
127
128        foreach ($data as $rows) {
129            foreach ($rows as $datum) {
130                if (is_string($datum)) {
131                    $gedcom = $datum;
132                } elseif ($datum instanceof GedcomRecord) {
133                    $gedcom = $datum->privatizeGedcom($access_level);
134                } else {
135                    $gedcom =
136                        $datum->i_gedcom ??
137                        $datum->f_gedcom ??
138                        $datum->s_gedcom ??
139                        $datum->m_gedcom ??
140                        $datum->o_gedcom;
141                }
142
143                if ($media_path !== '') {
144                    $gedcom = $this->convertMediaPath($gedcom, $media_path);
145                }
146
147                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . "\n";
148
149                if ($line_endings === 'CRLF') {
150                    $gedcom = strtr($gedcom, ["\n" => "\r\n"]);
151                }
152
153                $bytes_written = fwrite($stream, $gedcom);
154
155                if ($bytes_written !== strlen($gedcom)) {
156                    throw new RuntimeException('Unable to write to stream.  Perhaps the disk is full?');
157                }
158            }
159        }
160
161        if (rewind($stream) === false) {
162            throw new RuntimeException('Cannot rewind temporary stream');
163        }
164
165        return $stream;
166    }
167
168    /**
169     * Create a header record for a gedcom file.
170     *
171     * @param Tree   $tree
172     * @param string $encoding
173     * @param bool   $include_sub
174     *
175     * @return string
176     */
177    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
178    {
179        // Force a ".ged" suffix
180        $filename = $tree->name();
181
182        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
183            $filename .= '.ged';
184        }
185
186        $gedcom_encodings = [
187            UTF16BE::NAME     => 'UNICODE',
188            UTF16LE::NAME     => 'UNICODE',
189            Windows1252::NAME => 'ANSI',
190        ];
191
192        $encoding = $gedcom_encodings[$encoding] ?? $encoding;
193
194        // Build a new header record
195        $gedcom = '0 HEAD';
196        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
197        $gedcom .= "\n2 NAME " . Webtrees::NAME;
198        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
199        $gedcom .= "\n1 DEST DISKETTE";
200        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
201        $gedcom .= "\n2 TIME " . date('H:i:s');
202        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM LINEAGE-LINKED";
203        $gedcom .= "\n1 CHAR " . $encoding;
204        $gedcom .= "\n1 FILE " . $filename;
205
206        // Preserve some values from the original header
207        $header = Registry::headerFactory()->make('HEAD', $tree) ?? Registry::headerFactory()->new('HEAD', '0 HEAD', null, $tree);
208
209        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
210            $gedcom .= "\n" . $fact->gedcom();
211        }
212
213        if ($include_sub) {
214            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
215                $gedcom .= "\n" . $fact->gedcom();
216            }
217        }
218
219        return $gedcom;
220    }
221
222    /**
223     * Prepend a media path, such as might have been removed during import.
224     *
225     * @param string $gedcom
226     * @param string $media_path
227     *
228     * @return string
229     */
230    private function convertMediaPath(string $gedcom, string $media_path): string
231    {
232        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
233            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
234                $filename = $match[1];
235
236                // Don’t modify external links
237                if (!str_contains($filename, '://')) {
238                    // Convert separators to match new path.
239                    if (str_contains($media_path, '\\')) {
240                        $filename = strtr($filename, ['/' => '\\']);
241                    }
242
243                    if (!str_starts_with($filename, $media_path)) {
244                        $filename = $media_path . $filename;
245                    }
246                }
247
248                return "\n1 FILE " . $filename;
249            }, $gedcom);
250        }
251
252        return $gedcom;
253    }
254
255    /**
256     * Wrap long lines using concatenation records.
257     *
258     * @param string $gedcom
259     * @param int    $max_line_length
260     *
261     * @return string
262     */
263    public function wrapLongLines(string $gedcom, int $max_line_length): string
264    {
265        $lines = [];
266
267        foreach (explode("\n", $gedcom) as $line) {
268            // Split long lines
269            // The total length of a GEDCOM line, including level number, cross-reference number,
270            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
271            if (mb_strlen($line) > $max_line_length) {
272                [$level, $tag] = explode(' ', $line, 3);
273                if ($tag !== 'CONT') {
274                    $level++;
275                }
276                do {
277                    // Split after $pos chars
278                    $pos = $max_line_length;
279                    // Split on a non-space (standard gedcom behavior)
280                    while (mb_substr($line, $pos - 1, 1) === ' ') {
281                        --$pos;
282                    }
283                    if ($pos === strpos($line, ' ', 3)) {
284                        // No non-spaces in the data! Can’t split it :-(
285                        break;
286                    }
287                    $lines[] = mb_substr($line, 0, $pos);
288                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
289                } while (mb_strlen($line) > $max_line_length);
290            }
291            $lines[] = $line;
292        }
293
294        return implode("\n", $lines);
295    }
296
297    /**
298     * @param Tree $tree
299     * @param bool $sort_by_xref
300     *
301     * @return Builder
302     */
303    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
304    {
305        $query = DB::table('families')
306            ->where('f_file', '=', $tree->id())
307            ->select(['f_gedcom', 'f_id']);
308
309
310        if ($sort_by_xref) {
311            $query
312                ->orderBy(new Expression('LENGTH(f_id)'))
313                ->orderBy('f_id');
314        }
315
316        return $query;
317    }
318
319    /**
320     * @param Tree $tree
321     * @param bool $sort_by_xref
322     *
323     * @return Builder
324     */
325    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
326    {
327        $query = DB::table('individuals')
328            ->where('i_file', '=', $tree->id())
329            ->select(['i_gedcom', 'i_id']);
330
331        if ($sort_by_xref) {
332            $query
333                ->orderBy(new Expression('LENGTH(i_id)'))
334                ->orderBy('i_id');
335        }
336
337        return $query;
338    }
339
340    /**
341     * @param Tree $tree
342     * @param bool $sort_by_xref
343     *
344     * @return Builder
345     */
346    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
347    {
348        $query = DB::table('sources')
349            ->where('s_file', '=', $tree->id())
350            ->select(['s_gedcom', 's_id']);
351
352        if ($sort_by_xref) {
353            $query
354                ->orderBy(new Expression('LENGTH(s_id)'))
355                ->orderBy('s_id');
356        }
357
358        return $query;
359    }
360
361    /**
362     * @param Tree $tree
363     * @param bool $sort_by_xref
364     *
365     * @return Builder
366     */
367    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
368    {
369        $query = DB::table('media')
370            ->where('m_file', '=', $tree->id())
371            ->select(['m_gedcom', 'm_id']);
372
373        if ($sort_by_xref) {
374            $query
375                ->orderBy(new Expression('LENGTH(m_id)'))
376                ->orderBy('m_id');
377        }
378
379        return $query;
380    }
381
382    /**
383     * @param Tree $tree
384     * @param bool $sort_by_xref
385     *
386     * @return Builder
387     */
388    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
389    {
390        $query = DB::table('other')
391            ->where('o_file', '=', $tree->id())
392            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
393            ->select(['o_gedcom', 'o_id']);
394
395        if ($sort_by_xref) {
396            $query
397                ->orderBy('o_type')
398                ->orderBy(new Expression('LENGTH(o_id)'))
399                ->orderBy('o_id');
400        }
401
402        return $query;
403    }
404}
405