xref: /webtrees/app/Services/GedcomEditService.php (revision de7821c62e0dae64ab4ee08bea22de80a086e97a)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Fact;
23use Fisharebest\Webtrees\Family;
24use Fisharebest\Webtrees\Gedcom;
25use Fisharebest\Webtrees\GedcomRecord;
26use Fisharebest\Webtrees\Individual;
27use Fisharebest\Webtrees\Note;
28use Fisharebest\Webtrees\Registry;
29use Fisharebest\Webtrees\Site;
30use Fisharebest\Webtrees\Tree;
31use Psr\Http\Message\ServerRequestInterface;
32
33use function array_filter;
34use function array_merge;
35use function array_shift;
36use function array_unique;
37use function array_values;
38use function assert;
39use function count;
40use function explode;
41use function implode;
42use function max;
43use function preg_match_all;
44use function preg_replace;
45use function preg_split;
46use function str_repeat;
47use function str_replace;
48use function substr_count;
49use function trim;
50
51use const ARRAY_FILTER_USE_KEY;
52use const PHP_INT_MAX;
53
54/**
55 * Utilities to edit/save GEDCOM data.
56 */
57class GedcomEditService
58{
59    /** @var string[] */
60    public $glevels = [];
61
62    /** @var string[] */
63    public $tag = [];
64
65    /** @var string[] */
66    public $islink = [];
67
68    /** @var string[] */
69    public $text = [];
70
71    /** @var string[] */
72    protected $glevelsSOUR = [];
73
74    /** @var string[] */
75    protected $tagSOUR = [];
76
77    /** @var string[] */
78    protected $islinkSOUR = [];
79
80    /** @var string[] */
81    protected $textSOUR = [];
82
83    /** @var string[] */
84    protected $glevelsRest = [];
85
86    /** @var string[] */
87    protected $tagRest = [];
88
89    /** @var string[] */
90    protected $islinkRest = [];
91
92    /** @var string[] */
93    protected $textRest = [];
94
95    /**
96     * This function splits the $glevels, $tag, $islink, and $text arrays so that the
97     * entries associated with a SOUR record are separate from everything else.
98     *
99     * Input arrays:
100     * - $glevels[] - an array of the gedcom level for each line that was edited
101     * - $tag[] - an array of the tags for each gedcom line that was edited
102     * - $islink[] - an array of 1 or 0 values to indicate when the text is a link element
103     * - $text[] - an array of the text data for each line
104     *
105     * Output arrays:
106     * ** For the SOUR record:
107     * - $glevelsSOUR[] - an array of the gedcom level for each line that was edited
108     * - $tagSOUR[] - an array of the tags for each gedcom line that was edited
109     * - $islinkSOUR[] - an array of 1 or 0 values to indicate when the text is a link element
110     * - $textSOUR[] - an array of the text data for each line
111     * ** For the remaining records:
112     * - $glevelsRest[] - an array of the gedcom level for each line that was edited
113     * - $tagRest[] - an array of the tags for each gedcom line that was edited
114     * - $islinkRest[] - an array of 1 or 0 values to indicate when the text is a link element
115     * - $textRest[] - an array of the text data for each line
116     *
117     * @return void
118     */
119    public function splitSource(): void
120    {
121        $this->glevelsSOUR = [];
122        $this->tagSOUR     = [];
123        $this->islinkSOUR  = [];
124        $this->textSOUR    = [];
125
126        $this->glevelsRest = [];
127        $this->tagRest     = [];
128        $this->islinkRest  = [];
129        $this->textRest    = [];
130
131        $inSOUR    = false;
132        $levelSOUR = 0;
133
134        // Assume all arrays are the same size.
135        $count = count($this->glevels);
136
137        for ($i = 0; $i < $count; $i++) {
138            if ($inSOUR) {
139                if ($levelSOUR < $this->glevels[$i]) {
140                    $dest = 'S';
141                } else {
142                    $inSOUR = false;
143                    $dest   = 'R';
144                }
145            } elseif ($this->tag[$i] === 'SOUR') {
146                $inSOUR    = true;
147                $levelSOUR = $this->glevels[$i];
148                $dest      = 'S';
149            } else {
150                $dest = 'R';
151            }
152
153            if ($dest === 'S') {
154                $this->glevelsSOUR[] = $this->glevels[$i];
155                $this->tagSOUR[]     = $this->tag[$i];
156                $this->islinkSOUR[]  = $this->islink[$i];
157                $this->textSOUR[]    = $this->text[$i];
158            } else {
159                $this->glevelsRest[] = $this->glevels[$i];
160                $this->tagRest[]     = $this->tag[$i];
161                $this->islinkRest[]  = $this->islink[$i];
162                $this->textRest[]    = $this->text[$i];
163            }
164        }
165    }
166
167    /**
168     * Add new GEDCOM lines from the $xxxRest interface update arrays, which
169     * were produced by the splitSOUR() function.
170     * See the FunctionsEdit::handle_updatesges() function for details.
171     *
172     * @param string $inputRec
173     *
174     * @return string
175     */
176    public function updateRest(string $inputRec): string
177    {
178        if (count($this->tagRest) === 0) {
179            return $inputRec; // No update required
180        }
181
182        // Save original interface update arrays before replacing them with the xxxRest ones
183        $glevelsSave = $this->glevels;
184        $tagSave     = $this->tag;
185        $islinkSave  = $this->islink;
186        $textSave    = $this->text;
187
188        $this->glevels = $this->glevelsRest;
189        $this->tag     = $this->tagRest;
190        $this->islink  = $this->islinkRest;
191        $this->text    = $this->textRest;
192
193        $myRecord = $this->handleUpdates($inputRec, 'no'); // Now do the update
194
195        // Restore the original interface update arrays (just in case ...)
196        $this->glevels = $glevelsSave;
197        $this->tag     = $tagSave;
198        $this->islink  = $islinkSave;
199        $this->text    = $textSave;
200
201        return $myRecord;
202    }
203
204    /**
205     * Add new gedcom lines from interface update arrays
206     * The edit_interface and FunctionsEdit::add_simple_tag function produce the following
207     * arrays incoming from the $_POST form
208     * - $glevels[] - an array of the gedcom level for each line that was edited
209     * - $tag[] - an array of the tags for each gedcom line that was edited
210     * - $islink[] - an array of 1 or 0 values to tell whether the text is a link element and should be surrounded by @@
211     * - $text[] - an array of the text data for each line
212     * With these arrays you can recreate the gedcom lines like this
213     * <code>$glevel[0].' '.$tag[0].' '.$text[0]</code>
214     * There will be an index in each of these arrays for each line of the gedcom
215     * fact that is being edited.
216     * If the $text[] array is empty for the given line, then it means that the
217     * user removed that line during editing or that the line is supposed to be
218     * empty (1 DEAT, 1 BIRT) for example. To know if the line should be removed
219     * there is a section of code that looks ahead to the next lines to see if there
220     * are sub lines. For example we don't want to remove the 1 DEAT line if it has
221     * a 2 PLAC or 2 DATE line following it. If there are no sub lines, then the line
222     * can be safely removed.
223     *
224     * @param string $newged        the new gedcom record to add the lines to
225     * @param string $levelOverride Override GEDCOM level specified in $glevels[0]
226     *
227     * @return string The updated gedcom record
228     */
229    public function handleUpdates(string $newged, string $levelOverride = 'no'): string
230    {
231        if ($levelOverride === 'no') {
232            $levelAdjust = 0;
233        } else {
234            $levelAdjust = 1;
235        }
236
237        // Assert all arrays are the same size.
238        assert(count($this->glevels) === count($this->tag));
239        assert(count($this->glevels) === count($this->text));
240        assert(count($this->glevels) === count($this->islink));
241
242        $count = count($this->glevels);
243
244        for ($j = 0; $j < $count; $j++) {
245            // Look for empty SOUR reference with non-empty sub-records.
246            // This can happen when the SOUR entry is deleted but its sub-records
247            // were incorrectly left intact.
248            // The sub-records should be deleted.
249            if ($this->tag[$j] === 'SOUR' && ($this->text[$j] === '@@' || $this->text[$j] === '')) {
250                $this->text[$j] = '';
251                $k              = $j + 1;
252                while ($k < $count && $this->glevels[$k] > $this->glevels[$j]) {
253                    $this->text[$k] = '';
254                    $k++;
255                }
256            }
257
258            if (trim($this->text[$j]) !== '') {
259                $pass = true;
260            } else {
261                //-- for facts with empty values they must have sub records
262                //-- this section checks if they have subrecords
263                $k    = $j + 1;
264                $pass = false;
265                while ($k < $count && $this->glevels[$k] > $this->glevels[$j]) {
266                    if ($this->text[$k] !== '') {
267                        if ($this->tag[$j] !== 'OBJE' || $this->tag[$k] === 'FILE') {
268                            $pass = true;
269                            break;
270                        }
271                    }
272                    $k++;
273                }
274            }
275
276            //-- if the value is not empty or it has sub lines
277            //--- then write the line to the gedcom record
278            //-- we have to let some emtpy text lines pass through... (DEAT, BIRT, etc)
279            if ($pass) {
280                $newline = (int) $this->glevels[$j] + $levelAdjust . ' ' . $this->tag[$j];
281                if ($this->text[$j] !== '') {
282                    if ($this->islink[$j]) {
283                        $newline .= ' @' . trim($this->text[$j], '@') . '@';
284                    } else {
285                        $newline .= ' ' . $this->text[$j];
286                    }
287                }
288                $next_level = 1 + (int) $this->glevels[$j] + $levelAdjust;
289
290                $newged .= "\n" . str_replace("\n", "\n" . $next_level . ' CONT ', $newline);
291            }
292        }
293
294        return $newged;
295    }
296
297    /**
298     * Add new GEDCOM lines from the $xxxSOUR interface update arrays, which
299     * were produced by the splitSOUR() function.
300     * See the FunctionsEdit::handle_updatesges() function for details.
301     *
302     * @param string $inputRec
303     * @param string $levelOverride
304     *
305     * @return string
306     */
307    public function updateSource(string $inputRec, string $levelOverride = 'no'): string
308    {
309        if (count($this->tagSOUR) === 0) {
310            return $inputRec; // No update required
311        }
312
313        // Save original interface update arrays before replacing them with the xxxSOUR ones
314        $glevelsSave = $this->glevels;
315        $tagSave     = $this->tag;
316        $islinkSave  = $this->islink;
317        $textSave    = $this->text;
318
319        $this->glevels = $this->glevelsSOUR;
320        $this->tag     = $this->tagSOUR;
321        $this->islink  = $this->islinkSOUR;
322        $this->text    = $this->textSOUR;
323
324        $myRecord = $this->handleUpdates($inputRec, $levelOverride); // Now do the update
325
326        // Restore the original interface update arrays (just in case ...)
327        $this->glevels = $glevelsSave;
328        $this->tag     = $tagSave;
329        $this->islink  = $islinkSave;
330        $this->text    = $textSave;
331
332        return $myRecord;
333    }
334
335    /**
336     * Reassemble edited GEDCOM fields into a GEDCOM fact/event string.
337     *
338     * @param string        $record_type
339     * @param array<string> $levels
340     * @param array<string> $tags
341     * @param array<string> $values
342     *
343     * @return string
344     */
345    public function editLinesToGedcom(string $record_type, array $levels, array $tags, array $values): string
346    {
347        // Assert all arrays are the same size.
348        $count = count($levels);
349        assert($count > 0);
350        assert(count($tags) === $count);
351        assert(count($values) === $count);
352
353        $gedcom_lines = [];
354        $hierarchy    = [$record_type];
355
356        for ($i = 0; $i < $count; $i++) {
357            $hierarchy[$levels[$i]] = $tags[$i];
358
359            $full_tag   = implode(':', array_slice($hierarchy, 0, 1 + (int) $levels[$i]));
360            $element    = Registry::elementFactory()->make($full_tag);
361            $values[$i] = $element->canonical($values[$i]);
362
363            // If "1 FACT Y" has a DATE or PLAC, then delete the value of Y
364            if ($levels[$i] === '1' && $values[$i] === 'Y') {
365                for ($j = $i + 1; $j < $count && $levels[$j] > $levels[$i]; ++$j) {
366                    if ($levels[$j] === '2' && ($tags[$j] === 'DATE' || $tags[$j] === 'PLAC') && $values[$j] !== '') {
367                        $values[$i] = '';
368                        break;
369                    }
370                }
371            }
372
373            // Include this line if there is a value - or if there is a child record with a value.
374            $include = $values[$i] !== '';
375
376            for ($j = $i + 1; !$include && $j < $count && $levels[$j] > $levels[$i]; $j++) {
377                $include = $values[$j] !== '';
378            }
379
380            if ($include) {
381                if ($values[$i] === '') {
382                    $gedcom_lines[] = $levels[$i] . ' ' . $tags[$i];
383                } else {
384                    if ($tags[$i] === 'CONC') {
385                        $next_level = (int) $levels[$i];
386                    } else {
387                        $next_level = 1 + (int) $levels[$i];
388                    }
389
390                    $gedcom_lines[] = $levels[$i] . ' ' . $tags[$i] . ' ' . str_replace("\n", "\n" . $next_level . ' CONT ', $values[$i]);
391                }
392            }
393        }
394
395        return implode("\n", $gedcom_lines);
396    }
397
398    /**
399     * Add blank lines, to allow a user to add/edit new values.
400     *
401     * @param Fact $fact
402     * @param bool $include_hidden
403     *
404     * @return string
405     */
406    public function insertMissingFactSubtags(Fact $fact, bool $include_hidden): string
407    {
408        return $this->insertMissingLevels($fact->record()->tree(), $fact->tag(), $fact->gedcom(), $include_hidden);
409    }
410
411    /**
412     * Add blank lines, to allow a user to add/edit new values.
413     *
414     * @param GedcomRecord $record
415     * @param bool         $include_hidden
416     *
417     * @return string
418     */
419    public function insertMissingRecordSubtags(GedcomRecord $record, bool $include_hidden): string
420    {
421        $gedcom = $this->insertMissingLevels($record->tree(), $record->tag(), $record->gedcom(), $include_hidden);
422
423        // NOTE records have data at level 0.  Move it to 1 CONC.
424        if ($record instanceof Note) {
425            return preg_replace('/^0 @[^@]+@ NOTE/', '1 CONC', $gedcom);
426        }
427
428        return preg_replace('/^0.*\n/', '', $gedcom);
429    }
430
431    /**
432     * List of facts/events to add to families and individuals.
433     *
434     * @param Family|Individual $record
435     * @param bool              $include_hidden
436     *
437     * @return array<string>
438     */
439    public function factsToAdd(GedcomRecord $record, bool $include_hidden): array
440    {
441        $subtags = Registry::elementFactory()->make($record->tag())->subtags();
442
443        if (!$include_hidden) {
444            $fn_hidden = fn (string $t): bool => !$this->isHiddenTag($record->tag() . ':' . $t);
445            $subtags   = array_filter($subtags, $fn_hidden);
446        }
447
448        return $subtags;
449    }
450
451    /**
452     * @param Tree   $tree
453     * @param string $tag
454     * @param string $gedcom
455     * @param bool   $include_hidden
456     *
457     * @return string
458     */
459    protected function insertMissingLevels(Tree $tree, string $tag, string $gedcom, bool $include_hidden): string
460    {
461        $next_level = substr_count($tag, ':') + 1;
462        $factory    = Registry::elementFactory();
463        $subtags    = $factory->make($tag)->subtags();
464
465        // Merge CONT records onto their parent line.
466        $gedcom = strtr($gedcom, [
467            "\n" . $next_level . ' CONT ' => "\r",
468            "\n" . $next_level . ' CONT' => "\r",
469        ]);
470
471        // The first part is level N.  The remainder are level N+1.
472        $parts  = preg_split('/\n(?=' . $next_level . ')/', $gedcom);
473        $return = array_shift($parts);
474
475        foreach ($subtags as $subtag => $occurrences) {
476            if (!$include_hidden && $this->isHiddenTag($tag . ':' . $subtag)) {
477                continue;
478            }
479
480            [$min, $max] = explode(':', $occurrences);
481
482            $min = (int) $min;
483
484            if ($max === 'M') {
485                $max = PHP_INT_MAX;
486            } else {
487                $max = (int) $max;
488            }
489
490            $count = 0;
491
492            // Add expected subtags in our preferred order.
493            foreach ($parts as $n => $part) {
494                if (str_starts_with($part, $next_level . ' ' . $subtag)) {
495                    $return .= "\n" . $this->insertMissingLevels($tree, $tag . ':' . $subtag, $part, $include_hidden);
496                    $count++;
497                    unset($parts[$n]);
498                }
499            }
500
501            // Allowed to have more of this subtag?
502            if ($count < $max) {
503                // Create a new one.
504                $gedcom  = $next_level . ' ' . $subtag;
505                $default = $factory->make($tag . ':' . $subtag)->default($tree);
506                if ($default !== '') {
507                    $gedcom .= ' ' . $default;
508                }
509
510                $number_to_add = max(1, $min - $count);
511                $gedcom_to_add = "\n" . $this->insertMissingLevels($tree, $tag . ':' . $subtag, $gedcom, $include_hidden);
512
513                $return .= str_repeat($gedcom_to_add, $number_to_add);
514            }
515        }
516
517        // Now add any unexpected/existing data.
518        if ($parts !== []) {
519            $return .= "\n" . implode("\n", $parts);
520        }
521
522        return $return;
523    }
524
525    /**
526     * List of tags to exclude when creating new data.
527     *
528     * @param string $tag
529     *
530     * @return bool
531     */
532    private function isHiddenTag(string $tag): bool
533    {
534        // Function to filter hidden tags.
535        $fn_hide = fn (string $x): bool => (bool) Site::getPreference('HIDE_' . $x);
536
537        $preferences = array_filter(Gedcom::HIDDEN_TAGS, $fn_hide, ARRAY_FILTER_USE_KEY);
538        $preferences = array_values($preferences);
539        $hidden_tags = array_merge(...$preferences);
540
541        foreach ($hidden_tags as $hidden_tag) {
542            if (str_contains($tag, $hidden_tag)) {
543                return true;
544            }
545        }
546
547        return false;
548    }
549}
550