xref: /webtrees/app/Services/GedcomExportService.php (revision f4c767fd89cdb62ee54edec032285924cd767af7)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Auth;
23use Fisharebest\Webtrees\Cache;
24use Fisharebest\Webtrees\Factories\AbstractGedcomRecordFactory;
25use Fisharebest\Webtrees\Factory;
26use Fisharebest\Webtrees\Gedcom;
27use Fisharebest\Webtrees\GedcomRecord;
28use Fisharebest\Webtrees\Header;
29use Fisharebest\Webtrees\Tree;
30use Fisharebest\Webtrees\Webtrees;
31use Illuminate\Database\Capsule\Manager as DB;
32use Illuminate\Database\Query\Builder;
33use Illuminate\Database\Query\Expression;
34use Illuminate\Support\Collection;
35
36use function app;
37use function assert;
38use function date;
39use function explode;
40use function fwrite;
41use function mb_convert_encoding;
42use function pathinfo;
43use function strpos;
44use function strtolower;
45use function strtoupper;
46use function utf8_decode;
47
48use const PATHINFO_EXTENSION;
49
50/**
51 * Export data in GEDCOM format
52 */
53class GedcomExportService
54{
55    /**
56     * Write GEDCOM data to a stream.
57     *
58     * @param Tree                    $tree         - Export data from this tree
59     * @param resource                $stream       - Write to this stream
60     * @param bool                    $sort_by_xref - Write GEDCOM records in XREF order
61     * @param string                  $encoding     - Convert from UTF-8 to other encoding
62     * @param int                     $access_level - Apply privacy filtering
63     * @param string                  $media_path   - Prepend path to media filenames
64     * @param Collection<string>|null $records      - Just export these records
65     */
66    public function export(
67        Tree $tree,
68        $stream,
69        bool $sort_by_xref = false,
70        string $encoding = 'UTF-8',
71        int $access_level = Auth::PRIV_HIDE,
72        string $media_path = '',
73        Collection $records = null
74    ): void {
75        if ($records instanceof Collection) {
76            // Export just these records - e.g. from clippings cart.
77            $data = [
78                new Collection([$this->createHeader($tree, $encoding, false)]),
79                $records,
80                new Collection(['0 TRLR']),
81            ];
82        } elseif ($access_level === Auth::PRIV_HIDE) {
83            // If we will be applying privacy filters, then we will need the GEDCOM record objects.
84            $data = [
85                new Collection([$this->createHeader($tree, $encoding, true)]),
86                $this->individualQuery($tree, $sort_by_xref)->cursor(),
87                $this->familyQuery($tree, $sort_by_xref)->cursor(),
88                $this->sourceQuery($tree, $sort_by_xref)->cursor(),
89                $this->otherQuery($tree, $sort_by_xref)->cursor(),
90                $this->mediaQuery($tree, $sort_by_xref)->cursor(),
91                new Collection(['0 TRLR']),
92            ];
93        } else {
94            // Disable the pending changes before creating GEDCOM records.
95            $cache = app('cache.array');
96            assert($cache instanceof Cache);
97            $cache->remember(AbstractGedcomRecordFactory::class . $tree->id(), static function (): Collection {
98                return new Collection();
99            });
100
101            $data = [
102                new Collection([$this->createHeader($tree, $encoding, true)]),
103                $this->individualQuery($tree, $sort_by_xref)->get()->map(Factory::individual()->mapper($tree)),
104                $this->familyQuery($tree, $sort_by_xref)->get()->map(Factory::family()->mapper($tree)),
105                $this->sourceQuery($tree, $sort_by_xref)->get()->map(Factory::source()->mapper($tree)),
106                $this->otherQuery($tree, $sort_by_xref)->get()->map(Factory::gedcomRecord()->mapper($tree)),
107                $this->mediaQuery($tree, $sort_by_xref)->get()->map(Factory::media()->mapper($tree)),
108                new Collection(['0 TRLR']),
109            ];
110        }
111
112        foreach ($data as $rows) {
113            foreach ($rows as $datum) {
114                if (is_string($datum)) {
115                    $gedcom = $datum;
116                } elseif ($datum instanceof GedcomRecord) {
117                    $gedcom = $datum->privatizeGedcom($access_level);
118                } else {
119                    $gedcom = $datum->gedcom;
120                }
121
122                if ($media_path !== '') {
123                    $gedcom = $this->convertMediaPath($gedcom, $media_path);
124                }
125
126                $gedcom = $this->wrapLongLines($gedcom, Gedcom::LINE_LENGTH) . Gedcom::EOL;
127                $gedcom = $this->convertEncoding($encoding, $gedcom);
128
129                fwrite($stream, $gedcom);
130            }
131        }
132    }
133
134    /**
135     * Create a header record for a gedcom file.
136     *
137     * @param Tree   $tree
138     * @param string $encoding
139     * @param bool   $include_sub
140     *
141     * @return string
142     */
143    public function createHeader(Tree $tree, string $encoding, bool $include_sub): string
144    {
145        // Force a ".ged" suffix
146        $filename = $tree->name();
147
148        if (strtolower(pathinfo($filename, PATHINFO_EXTENSION)) !== 'ged') {
149            $filename .= '.ged';
150        }
151
152        // Build a new header record
153        $gedcom = '0 HEAD';
154        $gedcom .= "\n1 SOUR " . Webtrees::NAME;
155        $gedcom .= "\n2 NAME " . Webtrees::NAME;
156        $gedcom .= "\n2 VERS " . Webtrees::VERSION;
157        $gedcom .= "\n1 DEST DISKETTE";
158        $gedcom .= "\n1 DATE " . strtoupper(date('d M Y'));
159        $gedcom .= "\n2 TIME " . date('H:i:s');
160        $gedcom .= "\n1 GEDC\n2 VERS 5.5.1\n2 FORM Lineage-Linked";
161        $gedcom .= "\n1 CHAR " . $encoding;
162        $gedcom .= "\n1 FILE " . $filename;
163
164        // Preserve some values from the original header
165        $header = Factory::header()->make('HEAD', $tree) ?? Factory::header()->new('HEAD', '0 HEAD', null, $tree);
166
167        foreach ($header->facts(['COPR', 'LANG', 'PLAC', 'NOTE']) as $fact) {
168            $gedcom .= "\n" . $fact->gedcom();
169        }
170
171        if ($include_sub) {
172            foreach ($header->facts(['SUBM', 'SUBN']) as $fact) {
173                $gedcom .= "\n" . $fact->gedcom();
174            }
175        }
176
177        return $gedcom;
178    }
179
180    /**
181     * Prepend a media path, such as might have been removed during import.
182     *
183     * @param string $gedcom
184     * @param string $media_path
185     *
186     * @return string
187     */
188    private function convertMediaPath(string $gedcom, string $media_path): string
189    {
190        if (preg_match('/^0 @[^@]+@ OBJE/', $gedcom)) {
191            return preg_replace_callback('/\n1 FILE (.+)/', static function (array $match) use ($media_path): string {
192                $filename = $match[1];
193
194                // Convert separators to match new path.
195                if (strpos($media_path, '\\') !== false) {
196                    $filename = strtr($filename, ['/' => '\\']);
197                }
198
199                if (strpos($filename, $media_path) !== 0) {
200                    return $media_path . $filename;
201                }
202
203                return $filename;
204            }, $gedcom);
205        }
206
207        return $gedcom;
208    }
209
210    /**
211     * @param string $encoding
212     * @param string $gedcom
213     *
214     * @return string
215     */
216    private function convertEncoding(string $encoding, string $gedcom): string
217    {
218        switch ($encoding) {
219            case 'ANSI':
220                // Many desktop applications interpret ANSI as ISO-8859-1
221                return utf8_decode($gedcom);
222
223            case 'ANSEL':
224                // coming soon...?
225            case 'ASCII':
226                // Might be needed by really old software?
227                return mb_convert_encoding($gedcom, 'UTF-8', 'ASCII');
228
229            default:
230                return $gedcom;
231        }
232    }
233
234    /**
235     * Wrap long lines using concatenation records.
236     *
237     * @param string $gedcom
238     * @param int    $max_line_length
239     *
240     * @return string
241     */
242    public function wrapLongLines(string $gedcom, int $max_line_length): string
243    {
244        $lines = [];
245
246        foreach (explode("\n", $gedcom) as $line) {
247            // Split long lines
248            // The total length of a GEDCOM line, including level number, cross-reference number,
249            // tag, value, delimiters, and terminator, must not exceed 255 (wide) characters.
250            if (mb_strlen($line) > $max_line_length) {
251                [$level, $tag] = explode(' ', $line, 3);
252                if ($tag !== 'CONT') {
253                    $level++;
254                }
255                do {
256                    // Split after $pos chars
257                    $pos = $max_line_length;
258                    // Split on a non-space (standard gedcom behavior)
259                    while (mb_substr($line, $pos - 1, 1) === ' ') {
260                        --$pos;
261                    }
262                    if ($pos === strpos($line, ' ', 3)) {
263                        // No non-spaces in the data! Can’t split it :-(
264                        break;
265                    }
266                    $lines[] = mb_substr($line, 0, $pos);
267                    $line    = $level . ' CONC ' . mb_substr($line, $pos);
268                } while (mb_strlen($line) > $max_line_length);
269            }
270            $lines[] = $line;
271        }
272
273        return implode(Gedcom::EOL, $lines);
274    }
275
276    /**
277     * @param Tree $tree
278     * @param bool $sort_by_xref
279     *
280     * @return Builder
281     */
282    private function familyQuery(Tree $tree, bool $sort_by_xref): Builder
283    {
284        $query = DB::table('families')
285            ->where('f_file', '=', $tree->id())
286            ->select(['f_gedcom AS gedcom', 'f_id AS xref']);
287
288
289        if ($sort_by_xref) {
290            $query
291                ->orderBy(new Expression('LENGTH(f_id)'))
292                ->orderBy('f_id');
293        }
294
295        return $query;
296    }
297
298    /**
299     * @param Tree $tree
300     * @param bool $sort_by_xref
301     *
302     * @return Builder
303     */
304    private function individualQuery(Tree $tree, bool $sort_by_xref): Builder
305    {
306        $query = DB::table('individuals')
307            ->where('i_file', '=', $tree->id())
308            ->select(['i_gedcom AS gedcom', 'i_id AS xref']);
309
310        if ($sort_by_xref) {
311            $query
312                ->orderBy(new Expression('LENGTH(i_id)'))
313                ->orderBy('i_id');
314        }
315
316        return $query;
317    }
318
319    /**
320     * @param Tree $tree
321     * @param bool $sort_by_xref
322     *
323     * @return Builder
324     */
325    private function sourceQuery(Tree $tree, bool $sort_by_xref): Builder
326    {
327        $query = DB::table('sources')
328            ->where('s_file', '=', $tree->id())
329            ->select(['s_gedcom AS gedcom', 's_id AS xref']);
330
331        if ($sort_by_xref) {
332            $query
333                ->orderBy(new Expression('LENGTH(s_id)'))
334                ->orderBy('s_id');
335        }
336
337        return $query;
338    }
339
340    /**
341     * @param Tree $tree
342     * @param bool $sort_by_xref
343     *
344     * @return Builder
345     */
346    private function mediaQuery(Tree $tree, bool $sort_by_xref): Builder
347    {
348        $query = DB::table('media')
349            ->where('m_file', '=', $tree->id())
350            ->select(['m_gedcom AS gedcom', 'm_id AS xref']);
351
352        if ($sort_by_xref) {
353            $query
354                ->orderBy(new Expression('LENGTH(m_id)'))
355                ->orderBy('m_id');
356        }
357
358        return $query;
359    }
360
361    /**
362     * @param Tree $tree
363     * @param bool $sort_by_xref
364     *
365     * @return Builder
366     */
367    private function otherQuery(Tree $tree, bool $sort_by_xref): Builder
368    {
369        $query = DB::table('other')
370            ->where('o_file', '=', $tree->id())
371            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
372            ->select(['o_gedcom AS gedcom', 'o_id AS xref']);
373
374        if ($sort_by_xref) {
375            $query
376                ->orderBy('o_type')
377                ->orderBy(new Expression('LENGTH(o_id)'))
378                ->orderBy('o_id');
379        }
380
381        return $query;
382    }
383}
384