xref: /webtrees/app/Services/GedcomEditService.php (revision abdaad0d9fecead08c23d3eb00187f320a625403)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Fact;
23use Fisharebest\Webtrees\Family;
24use Fisharebest\Webtrees\Gedcom;
25use Fisharebest\Webtrees\GedcomRecord;
26use Fisharebest\Webtrees\Individual;
27use Fisharebest\Webtrees\Note;
28use Fisharebest\Webtrees\Registry;
29use Fisharebest\Webtrees\Site;
30use Fisharebest\Webtrees\Tree;
31use Psr\Http\Message\ServerRequestInterface;
32
33use function array_filter;
34use function array_merge;
35use function array_shift;
36use function array_unique;
37use function array_values;
38use function assert;
39use function count;
40use function explode;
41use function implode;
42use function max;
43use function preg_match_all;
44use function preg_replace;
45use function preg_split;
46use function str_repeat;
47use function str_replace;
48use function substr_count;
49use function trim;
50
51use const ARRAY_FILTER_USE_KEY;
52use const PHP_INT_MAX;
53
54/**
55 * Utilities to edit/save GEDCOM data.
56 */
57class GedcomEditService
58{
59    /** @var string[] */
60    public $glevels = [];
61
62    /** @var string[] */
63    public $tag = [];
64
65    /** @var string[] */
66    public $islink = [];
67
68    /** @var string[] */
69    public $text = [];
70
71    /** @var string[] */
72    protected $glevelsSOUR = [];
73
74    /** @var string[] */
75    protected $tagSOUR = [];
76
77    /** @var string[] */
78    protected $islinkSOUR = [];
79
80    /** @var string[] */
81    protected $textSOUR = [];
82
83    /** @var string[] */
84    protected $glevelsRest = [];
85
86    /** @var string[] */
87    protected $tagRest = [];
88
89    /** @var string[] */
90    protected $islinkRest = [];
91
92    /** @var string[] */
93    protected $textRest = [];
94
95    /**
96     * This function splits the $glevels, $tag, $islink, and $text arrays so that the
97     * entries associated with a SOUR record are separate from everything else.
98     *
99     * Input arrays:
100     * - $glevels[] - an array of the gedcom level for each line that was edited
101     * - $tag[] - an array of the tags for each gedcom line that was edited
102     * - $islink[] - an array of 1 or 0 values to indicate when the text is a link element
103     * - $text[] - an array of the text data for each line
104     *
105     * Output arrays:
106     * ** For the SOUR record:
107     * - $glevelsSOUR[] - an array of the gedcom level for each line that was edited
108     * - $tagSOUR[] - an array of the tags for each gedcom line that was edited
109     * - $islinkSOUR[] - an array of 1 or 0 values to indicate when the text is a link element
110     * - $textSOUR[] - an array of the text data for each line
111     * ** For the remaining records:
112     * - $glevelsRest[] - an array of the gedcom level for each line that was edited
113     * - $tagRest[] - an array of the tags for each gedcom line that was edited
114     * - $islinkRest[] - an array of 1 or 0 values to indicate when the text is a link element
115     * - $textRest[] - an array of the text data for each line
116     *
117     * @return void
118     */
119    public function splitSource(): void
120    {
121        $this->glevelsSOUR = [];
122        $this->tagSOUR     = [];
123        $this->islinkSOUR  = [];
124        $this->textSOUR    = [];
125
126        $this->glevelsRest = [];
127        $this->tagRest     = [];
128        $this->islinkRest  = [];
129        $this->textRest    = [];
130
131        $inSOUR    = false;
132        $levelSOUR = 0;
133
134        // Assume all arrays are the same size.
135        $count = count($this->glevels);
136
137        for ($i = 0; $i < $count; $i++) {
138            if ($inSOUR) {
139                if ($levelSOUR < $this->glevels[$i]) {
140                    $dest = 'S';
141                } else {
142                    $inSOUR = false;
143                    $dest   = 'R';
144                }
145            } elseif ($this->tag[$i] === 'SOUR') {
146                $inSOUR    = true;
147                $levelSOUR = $this->glevels[$i];
148                $dest      = 'S';
149            } else {
150                $dest = 'R';
151            }
152
153            if ($dest === 'S') {
154                $this->glevelsSOUR[] = $this->glevels[$i];
155                $this->tagSOUR[]     = $this->tag[$i];
156                $this->islinkSOUR[]  = $this->islink[$i];
157                $this->textSOUR[]    = $this->text[$i];
158            } else {
159                $this->glevelsRest[] = $this->glevels[$i];
160                $this->tagRest[]     = $this->tag[$i];
161                $this->islinkRest[]  = $this->islink[$i];
162                $this->textRest[]    = $this->text[$i];
163            }
164        }
165    }
166
167    /**
168     * Add new GEDCOM lines from the $xxxRest interface update arrays, which
169     * were produced by the splitSOUR() function.
170     * See the FunctionsEdit::handle_updatesges() function for details.
171     *
172     * @param string $inputRec
173     *
174     * @return string
175     */
176    public function updateRest(string $inputRec): string
177    {
178        if (count($this->tagRest) === 0) {
179            return $inputRec; // No update required
180        }
181
182        // Save original interface update arrays before replacing them with the xxxRest ones
183        $glevelsSave = $this->glevels;
184        $tagSave     = $this->tag;
185        $islinkSave  = $this->islink;
186        $textSave    = $this->text;
187
188        $this->glevels = $this->glevelsRest;
189        $this->tag     = $this->tagRest;
190        $this->islink  = $this->islinkRest;
191        $this->text    = $this->textRest;
192
193        $myRecord = $this->handleUpdates($inputRec, 'no'); // Now do the update
194
195        // Restore the original interface update arrays (just in case ...)
196        $this->glevels = $glevelsSave;
197        $this->tag     = $tagSave;
198        $this->islink  = $islinkSave;
199        $this->text    = $textSave;
200
201        return $myRecord;
202    }
203
204    /**
205     * Add new gedcom lines from interface update arrays
206     * The edit_interface and FunctionsEdit::add_simple_tag function produce the following
207     * arrays incoming from the $_POST form
208     * - $glevels[] - an array of the gedcom level for each line that was edited
209     * - $tag[] - an array of the tags for each gedcom line that was edited
210     * - $islink[] - an array of 1 or 0 values to tell whether the text is a link element and should be surrounded by @@
211     * - $text[] - an array of the text data for each line
212     * With these arrays you can recreate the gedcom lines like this
213     * <code>$glevel[0].' '.$tag[0].' '.$text[0]</code>
214     * There will be an index in each of these arrays for each line of the gedcom
215     * fact that is being edited.
216     * If the $text[] array is empty for the given line, then it means that the
217     * user removed that line during editing or that the line is supposed to be
218     * empty (1 DEAT, 1 BIRT) for example. To know if the line should be removed
219     * there is a section of code that looks ahead to the next lines to see if there
220     * are sub lines. For example we don't want to remove the 1 DEAT line if it has
221     * a 2 PLAC or 2 DATE line following it. If there are no sub lines, then the line
222     * can be safely removed.
223     *
224     * @param string $newged        the new gedcom record to add the lines to
225     * @param string $levelOverride Override GEDCOM level specified in $glevels[0]
226     *
227     * @return string The updated gedcom record
228     */
229    public function handleUpdates(string $newged, string $levelOverride = 'no'): string
230    {
231        if ($levelOverride === 'no') {
232            $levelAdjust = 0;
233        } else {
234            $levelAdjust = 1;
235        }
236
237        // Assert all arrays are the same size.
238        assert(count($this->glevels) === count($this->tag));
239        assert(count($this->glevels) === count($this->text));
240        assert(count($this->glevels) === count($this->islink));
241
242        $count = count($this->glevels);
243
244        for ($j = 0; $j < $count; $j++) {
245            // Look for empty SOUR reference with non-empty sub-records.
246            // This can happen when the SOUR entry is deleted but its sub-records
247            // were incorrectly left intact.
248            // The sub-records should be deleted.
249            if ($this->tag[$j] === 'SOUR' && ($this->text[$j] === '@@' || $this->text[$j] === '')) {
250                $this->text[$j] = '';
251                $k              = $j + 1;
252                while ($k < $count && $this->glevels[$k] > $this->glevels[$j]) {
253                    $this->text[$k] = '';
254                    $k++;
255                }
256            }
257
258            if (trim($this->text[$j]) !== '') {
259                $pass = true;
260            } else {
261                //-- for facts with empty values they must have sub records
262                //-- this section checks if they have subrecords
263                $k    = $j + 1;
264                $pass = false;
265                while ($k < $count && $this->glevels[$k] > $this->glevels[$j]) {
266                    if ($this->text[$k] !== '') {
267                        if ($this->tag[$j] !== 'OBJE' || $this->tag[$k] === 'FILE') {
268                            $pass = true;
269                            break;
270                        }
271                    }
272                    $k++;
273                }
274            }
275
276            //-- if the value is not empty or it has sub lines
277            //--- then write the line to the gedcom record
278            //-- we have to let some emtpy text lines pass through... (DEAT, BIRT, etc)
279            if ($pass) {
280                $newline = (int) $this->glevels[$j] + $levelAdjust . ' ' . $this->tag[$j];
281                if ($this->text[$j] !== '') {
282                    if ($this->islink[$j]) {
283                        $newline .= ' @' . trim($this->text[$j], '@') . '@';
284                    } else {
285                        $newline .= ' ' . $this->text[$j];
286                    }
287                }
288                $next_level = 1 + (int) $this->glevels[$j] + $levelAdjust;
289
290                $newged .= "\n" . str_replace("\n", "\n" . $next_level . ' CONT ', $newline);
291            }
292        }
293
294        return $newged;
295    }
296
297    /**
298     * Create a form to add a new fact.
299     *
300     * @param ServerRequestInterface $request
301     * @param Tree                   $tree
302     * @param string                 $fact
303     *
304     * @return string
305     */
306    public function addNewFact(ServerRequestInterface $request, Tree $tree, string $fact): string
307    {
308        $params = (array) $request->getParsedBody();
309
310        $FACT = $params[$fact];
311        $DATE = $params[$fact . '_DATE'] ?? '';
312        $PLAC = $params[$fact . '_PLAC'] ?? '';
313
314        if ($DATE !== '' || $PLAC !== '' || $FACT !== '' && $FACT !== 'Y') {
315            if ($FACT !== '' && $FACT !== 'Y') {
316                $gedrec = "\n1 " . $fact . ' ' . $FACT;
317            } else {
318                $gedrec = "\n1 " . $fact;
319            }
320            if ($DATE !== '') {
321                $gedrec .= "\n2 DATE " . $DATE;
322            }
323            if ($PLAC !== '') {
324                $gedrec .= "\n2 PLAC " . $PLAC;
325
326                if (preg_match_all('/(' . Gedcom::REGEX_TAG . ')/', $tree->getPreference('ADVANCED_PLAC_FACTS'), $match)) {
327                    foreach ($match[1] as $tag) {
328                        $TAG = $params[$fact . '_' . $tag];
329                        if ($TAG !== '') {
330                            $gedrec .= "\n3 " . $tag . ' ' . $TAG;
331                        }
332                    }
333                }
334                $LATI = $params[$fact . '_LATI'] ?? '';
335                $LONG = $params[$fact . '_LONG'] ?? '';
336                if ($LATI !== '' || $LONG !== '') {
337                    $gedrec .= "\n3 MAP\n4 LATI " . $LATI . "\n4 LONG " . $LONG;
338                }
339            }
340            if ((bool) ($params['SOUR_' . $fact] ?? false)) {
341                return $this->updateSource($gedrec, 'yes');
342            }
343
344            return $gedrec;
345        }
346
347        if ($FACT === 'Y') {
348            if ((bool) ($params['SOUR_' . $fact] ?? false)) {
349                return $this->updateSource("\n1 " . $fact . ' Y', 'yes');
350            }
351
352            return "\n1 " . $fact . ' Y';
353        }
354
355        return '';
356    }
357
358    /**
359     * Add new GEDCOM lines from the $xxxSOUR interface update arrays, which
360     * were produced by the splitSOUR() function.
361     * See the FunctionsEdit::handle_updatesges() function for details.
362     *
363     * @param string $inputRec
364     * @param string $levelOverride
365     *
366     * @return string
367     */
368    public function updateSource(string $inputRec, string $levelOverride = 'no'): string
369    {
370        if (count($this->tagSOUR) === 0) {
371            return $inputRec; // No update required
372        }
373
374        // Save original interface update arrays before replacing them with the xxxSOUR ones
375        $glevelsSave = $this->glevels;
376        $tagSave     = $this->tag;
377        $islinkSave  = $this->islink;
378        $textSave    = $this->text;
379
380        $this->glevels = $this->glevelsSOUR;
381        $this->tag     = $this->tagSOUR;
382        $this->islink  = $this->islinkSOUR;
383        $this->text    = $this->textSOUR;
384
385        $myRecord = $this->handleUpdates($inputRec, $levelOverride); // Now do the update
386
387        // Restore the original interface update arrays (just in case ...)
388        $this->glevels = $glevelsSave;
389        $this->tag     = $tagSave;
390        $this->islink  = $islinkSave;
391        $this->text    = $textSave;
392
393        return $myRecord;
394    }
395
396    /**
397     * Create a form to add a sex record.
398     *
399     * @param ServerRequestInterface $request
400     *
401     * @return string
402     */
403    public function addNewSex(ServerRequestInterface $request): string
404    {
405        $params = (array) $request->getParsedBody();
406
407        switch ($params['SEX']) {
408            case 'M':
409                return "\n1 SEX M";
410            case 'F':
411                return "\n1 SEX F";
412            default:
413                return "\n1 SEX U";
414        }
415    }
416
417    /**
418     * Assemble the pieces of a newly created record into gedcom
419     *
420     * @param ServerRequestInterface $request
421     * @param Tree                   $tree
422     *
423     * @return string
424     */
425    public function addNewName(ServerRequestInterface $request, Tree $tree): string
426    {
427        $params = (array) $request->getParsedBody();
428        $gedrec = "\n1 NAME " . $params['NAME'];
429
430        $tags = [
431            'NPFX',
432            'GIVN',
433            'SPFX',
434            'SURN',
435            'NSFX',
436            'NICK',
437        ];
438
439        if (preg_match_all('/(' . Gedcom::REGEX_TAG . ')/', $tree->getPreference('ADVANCED_NAME_FACTS'), $match)) {
440            $tags = array_merge($tags, $match[1]);
441        }
442
443        // Paternal and Polish and Lithuanian surname traditions can also create a _MARNM
444        $SURNAME_TRADITION = $tree->getPreference('SURNAME_TRADITION');
445        if ($SURNAME_TRADITION === 'paternal' || $SURNAME_TRADITION === 'polish' || $SURNAME_TRADITION === 'lithuanian') {
446            $tags[] = '_MARNM';
447        }
448
449        foreach (array_unique($tags) as $tag) {
450            $TAG = $params[$tag];
451
452            if ($TAG !== '') {
453                $gedrec .= "\n2 " . $tag . ' ' . $TAG;
454            }
455        }
456
457        return $gedrec;
458    }
459
460    /**
461     * Reassemble edited GEDCOM fields into a GEDCOM fact/event string.
462     *
463     * @param string        $record_type
464     * @param array<string> $levels
465     * @param array<string> $tags
466     * @param array<string> $values
467     *
468     * @return string
469     */
470    public function editLinesToGedcom(string $record_type, array $levels, array $tags, array $values): string
471    {
472        // Assert all arrays are the same size.
473        $count = count($levels);
474        assert($count > 0);
475        assert(count($tags) === $count);
476        assert(count($values) === $count);
477
478        $gedcom_lines = [];
479        $hierarchy    = [$record_type];
480
481        for ($i = 0; $i < $count; $i++) {
482            $hierarchy[$levels[$i]] = $tags[$i];
483
484            $full_tag   = implode(':', array_slice($hierarchy, 0, 1 + (int) $levels[$i]));
485            $element    = Registry::elementFactory()->make($full_tag);
486            $values[$i] = $element->canonical($values[$i]);
487
488            // If "1 FACT Y" has a DATE or PLAC, then delete the value of Y
489            if ($levels[$i] === '1' && $values[$i] === 'Y') {
490                for ($j = $i + 1; $j < $count && $levels[$j] > $levels[$i]; ++$j) {
491                    if ($levels[$j] === '2' && ($tags[$j] === 'DATE' || $tags[$j] === 'PLAC') && $values[$j] !== '') {
492                        $values[$i] = '';
493                        break;
494                    }
495                }
496            }
497
498            // Include this line if there is a value - or if there is a child record with a value.
499            $include = $values[$i] !== '';
500
501            for ($j = $i + 1; !$include && $j < $count && $levels[$j] > $levels[$i]; $j++) {
502                $include = $values[$j] !== '';
503            }
504
505            if ($include) {
506                if ($values[$i] === '') {
507                    $gedcom_lines[] = $levels[$i] . ' ' . $tags[$i];
508                } else {
509                    if ($tags[$i] === 'CONC') {
510                        $next_level = (int) $levels[$i];
511                    } else {
512                        $next_level = 1 + (int) $levels[$i];
513                    }
514
515                    $gedcom_lines[] = $levels[$i] . ' ' . $tags[$i] . ' ' . str_replace("\n", "\n" . $next_level . ' CONT ', $values[$i]);
516                }
517            }
518        }
519
520        return implode("\n", $gedcom_lines);
521    }
522
523    /**
524     * Add blank lines, to allow a user to add/edit new values.
525     *
526     * @param Fact $fact
527     * @param bool $include_hidden
528     *
529     * @return string
530     */
531    public function insertMissingFactSubtags(Fact $fact, bool $include_hidden): string
532    {
533        return $this->insertMissingLevels($fact->record()->tree(), $fact->tag(), $fact->gedcom(), $include_hidden);
534    }
535
536    /**
537     * Add blank lines, to allow a user to add/edit new values.
538     *
539     * @param GedcomRecord $record
540     * @param bool         $include_hidden
541     *
542     * @return string
543     */
544    public function insertMissingRecordSubtags(GedcomRecord $record, bool $include_hidden): string
545    {
546        $gedcom = $this->insertMissingLevels($record->tree(), $record->tag(), $record->gedcom(), $include_hidden);
547
548        // NOTE records have data at level 0.  Move it to 1 CONC.
549        if ($record instanceof Note) {
550            return preg_replace('/^0 @[^@]+@ NOTE/', '1 CONC', $gedcom);
551        }
552
553        return preg_replace('/^0.*\n/', '', $gedcom);
554    }
555
556    /**
557     * List of facts/events to add to families and individuals.
558     *
559     * @param Family|Individual $record
560     * @param bool              $include_hidden
561     *
562     * @return array<string>
563     */
564    public function factsToAdd(GedcomRecord $record, bool $include_hidden): array
565    {
566        $subtags = Registry::elementFactory()->make($record->tag())->subtags();
567
568        if (!$include_hidden) {
569            $fn_hidden = fn (string $t): bool => !$this->isHiddenTag($record->tag() . ':' . $t);
570            $subtags   = array_filter($subtags, $fn_hidden);
571        }
572
573        return $subtags;
574    }
575
576    /**
577     * @param Tree   $tree
578     * @param string $tag
579     * @param string $gedcom
580     * @param bool   $include_hidden
581     *
582     * @return string
583     */
584    protected function insertMissingLevels(Tree $tree, string $tag, string $gedcom, bool $include_hidden): string
585    {
586        $next_level = substr_count($tag, ':') + 1;
587        $factory    = Registry::elementFactory();
588        $subtags    = $factory->make($tag)->subtags();
589
590        // Merge CONT records onto their parent line.
591        $gedcom = strtr($gedcom, [
592            "\n" . $next_level . ' CONT ' => "\r",
593            "\n" . $next_level . ' CONT' => "\r",
594        ]);
595
596        // The first part is level N.  The remainder are level N+1.
597        $parts  = preg_split('/\n(?=' . $next_level . ')/', $gedcom);
598        $return = array_shift($parts);
599
600        foreach ($subtags as $subtag => $occurrences) {
601            if (!$include_hidden && $this->isHiddenTag($tag . ':' . $subtag)) {
602                continue;
603            }
604
605            [$min, $max] = explode(':', $occurrences);
606
607            $min = (int) $min;
608
609            if ($max === 'M') {
610                $max = PHP_INT_MAX;
611            } else {
612                $max = (int) $max;
613            }
614
615            $count = 0;
616
617            // Add expected subtags in our preferred order.
618            foreach ($parts as $n => $part) {
619                if (str_starts_with($part, $next_level . ' ' . $subtag)) {
620                    $return .= "\n" . $this->insertMissingLevels($tree, $tag . ':' . $subtag, $part, $include_hidden);
621                    $count++;
622                    unset($parts[$n]);
623                }
624            }
625
626            // Allowed to have more of this subtag?
627            if ($count < $max) {
628                // Create a new one.
629                $gedcom  = $next_level . ' ' . $subtag;
630                $default = $factory->make($tag . ':' . $subtag)->default($tree);
631                if ($default !== '') {
632                    $gedcom .= ' ' . $default;
633                }
634
635                $number_to_add = max(1, $min - $count);
636                $gedcom_to_add = "\n" . $this->insertMissingLevels($tree, $tag . ':' . $subtag, $gedcom, $include_hidden);
637
638                $return .= str_repeat($gedcom_to_add, $number_to_add);
639            }
640        }
641
642        // Now add any unexpected/existing data.
643        if ($parts !== []) {
644            $return .= "\n" . implode("\n", $parts);
645        }
646
647        return $return;
648    }
649
650    /**
651     * List of tags to exclude when creating new data.
652     *
653     * @param string $tag
654     *
655     * @return bool
656     */
657    private function isHiddenTag(string $tag): bool
658    {
659        // Function to filter hidden tags.
660        $fn_hide = fn (string $x): bool => (bool) Site::getPreference('HIDE_' . $x);
661
662        $preferences = array_filter(Gedcom::HIDDEN_TAGS, $fn_hide, ARRAY_FILTER_USE_KEY);
663        $preferences = array_values($preferences);
664        $hidden_tags = array_merge(...$preferences);
665
666        foreach ($hidden_tags as $hidden_tag) {
667            if (str_contains($tag, $hidden_tag)) {
668                return true;
669            }
670        }
671
672        return false;
673    }
674}
675