xref: /webtrees/app/Services/GedcomEditService.php (revision 553866bf6c7c44557f84e9aff107db4703c18b65)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Fact;
23use Fisharebest\Webtrees\Gedcom;
24use Fisharebest\Webtrees\GedcomRecord;
25use Fisharebest\Webtrees\Registry;
26use Fisharebest\Webtrees\Site;
27use Fisharebest\Webtrees\Tree;
28use Psr\Http\Message\ServerRequestInterface;
29
30use function array_filter;
31use function array_merge;
32use function array_shift;
33use function array_unique;
34use function array_values;
35use function assert;
36use function count;
37use function explode;
38use function implode;
39use function max;
40use function preg_match_all;
41use function preg_replace;
42use function preg_split;
43use function str_repeat;
44use function str_replace;
45use function substr_count;
46use function trim;
47
48use const ARRAY_FILTER_USE_KEY;
49use const PHP_INT_MAX;
50
51/**
52 * Utilities to edit/save GEDCOM data.
53 */
54class GedcomEditService
55{
56    /** @var string[] */
57    public $glevels = [];
58
59    /** @var string[] */
60    public $tag = [];
61
62    /** @var string[] */
63    public $islink = [];
64
65    /** @var string[] */
66    public $text = [];
67
68    /** @var string[] */
69    protected $glevelsSOUR = [];
70
71    /** @var string[] */
72    protected $tagSOUR = [];
73
74    /** @var string[] */
75    protected $islinkSOUR = [];
76
77    /** @var string[] */
78    protected $textSOUR = [];
79
80    /** @var string[] */
81    protected $glevelsRest = [];
82
83    /** @var string[] */
84    protected $tagRest = [];
85
86    /** @var string[] */
87    protected $islinkRest = [];
88
89    /** @var string[] */
90    protected $textRest = [];
91
92    /**
93     * This function splits the $glevels, $tag, $islink, and $text arrays so that the
94     * entries associated with a SOUR record are separate from everything else.
95     *
96     * Input arrays:
97     * - $glevels[] - an array of the gedcom level for each line that was edited
98     * - $tag[] - an array of the tags for each gedcom line that was edited
99     * - $islink[] - an array of 1 or 0 values to indicate when the text is a link element
100     * - $text[] - an array of the text data for each line
101     *
102     * Output arrays:
103     * ** For the SOUR record:
104     * - $glevelsSOUR[] - an array of the gedcom level for each line that was edited
105     * - $tagSOUR[] - an array of the tags for each gedcom line that was edited
106     * - $islinkSOUR[] - an array of 1 or 0 values to indicate when the text is a link element
107     * - $textSOUR[] - an array of the text data for each line
108     * ** For the remaining records:
109     * - $glevelsRest[] - an array of the gedcom level for each line that was edited
110     * - $tagRest[] - an array of the tags for each gedcom line that was edited
111     * - $islinkRest[] - an array of 1 or 0 values to indicate when the text is a link element
112     * - $textRest[] - an array of the text data for each line
113     *
114     * @return void
115     */
116    public function splitSource(): void
117    {
118        $this->glevelsSOUR = [];
119        $this->tagSOUR     = [];
120        $this->islinkSOUR  = [];
121        $this->textSOUR    = [];
122
123        $this->glevelsRest = [];
124        $this->tagRest     = [];
125        $this->islinkRest  = [];
126        $this->textRest    = [];
127
128        $inSOUR    = false;
129        $levelSOUR = 0;
130
131        // Assume all arrays are the same size.
132        $count = count($this->glevels);
133
134        for ($i = 0; $i < $count; $i++) {
135            if ($inSOUR) {
136                if ($levelSOUR < $this->glevels[$i]) {
137                    $dest = 'S';
138                } else {
139                    $inSOUR = false;
140                    $dest   = 'R';
141                }
142            } elseif ($this->tag[$i] === 'SOUR') {
143                $inSOUR    = true;
144                $levelSOUR = $this->glevels[$i];
145                $dest      = 'S';
146            } else {
147                $dest = 'R';
148            }
149
150            if ($dest === 'S') {
151                $this->glevelsSOUR[] = $this->glevels[$i];
152                $this->tagSOUR[]     = $this->tag[$i];
153                $this->islinkSOUR[]  = $this->islink[$i];
154                $this->textSOUR[]    = $this->text[$i];
155            } else {
156                $this->glevelsRest[] = $this->glevels[$i];
157                $this->tagRest[]     = $this->tag[$i];
158                $this->islinkRest[]  = $this->islink[$i];
159                $this->textRest[]    = $this->text[$i];
160            }
161        }
162    }
163
164    /**
165     * Add new GEDCOM lines from the $xxxRest interface update arrays, which
166     * were produced by the splitSOUR() function.
167     * See the FunctionsEdit::handle_updatesges() function for details.
168     *
169     * @param string $inputRec
170     *
171     * @return string
172     */
173    public function updateRest(string $inputRec): string
174    {
175        if (count($this->tagRest) === 0) {
176            return $inputRec; // No update required
177        }
178
179        // Save original interface update arrays before replacing them with the xxxRest ones
180        $glevelsSave = $this->glevels;
181        $tagSave     = $this->tag;
182        $islinkSave  = $this->islink;
183        $textSave    = $this->text;
184
185        $this->glevels = $this->glevelsRest;
186        $this->tag     = $this->tagRest;
187        $this->islink  = $this->islinkRest;
188        $this->text    = $this->textRest;
189
190        $myRecord = $this->handleUpdates($inputRec, 'no'); // Now do the update
191
192        // Restore the original interface update arrays (just in case ...)
193        $this->glevels = $glevelsSave;
194        $this->tag     = $tagSave;
195        $this->islink  = $islinkSave;
196        $this->text    = $textSave;
197
198        return $myRecord;
199    }
200
201    /**
202     * Add new gedcom lines from interface update arrays
203     * The edit_interface and FunctionsEdit::add_simple_tag function produce the following
204     * arrays incoming from the $_POST form
205     * - $glevels[] - an array of the gedcom level for each line that was edited
206     * - $tag[] - an array of the tags for each gedcom line that was edited
207     * - $islink[] - an array of 1 or 0 values to tell whether the text is a link element and should be surrounded by @@
208     * - $text[] - an array of the text data for each line
209     * With these arrays you can recreate the gedcom lines like this
210     * <code>$glevel[0].' '.$tag[0].' '.$text[0]</code>
211     * There will be an index in each of these arrays for each line of the gedcom
212     * fact that is being edited.
213     * If the $text[] array is empty for the given line, then it means that the
214     * user removed that line during editing or that the line is supposed to be
215     * empty (1 DEAT, 1 BIRT) for example. To know if the line should be removed
216     * there is a section of code that looks ahead to the next lines to see if there
217     * are sub lines. For example we don't want to remove the 1 DEAT line if it has
218     * a 2 PLAC or 2 DATE line following it. If there are no sub lines, then the line
219     * can be safely removed.
220     *
221     * @param string $newged        the new gedcom record to add the lines to
222     * @param string $levelOverride Override GEDCOM level specified in $glevels[0]
223     *
224     * @return string The updated gedcom record
225     */
226    public function handleUpdates(string $newged, string $levelOverride = 'no'): string
227    {
228        if ($levelOverride === 'no') {
229            $levelAdjust = 0;
230        } else {
231            $levelAdjust = 1;
232        }
233
234        // Assert all arrays are the same size.
235        assert(count($this->glevels) === count($this->tag));
236        assert(count($this->glevels) === count($this->text));
237        assert(count($this->glevels) === count($this->islink));
238
239        $count = count($this->glevels);
240
241        for ($j = 0; $j < $count; $j++) {
242            // Look for empty SOUR reference with non-empty sub-records.
243            // This can happen when the SOUR entry is deleted but its sub-records
244            // were incorrectly left intact.
245            // The sub-records should be deleted.
246            if ($this->tag[$j] === 'SOUR' && ($this->text[$j] === '@@' || $this->text[$j] === '')) {
247                $this->text[$j] = '';
248                $k              = $j + 1;
249                while ($k < $count && $this->glevels[$k] > $this->glevels[$j]) {
250                    $this->text[$k] = '';
251                    $k++;
252                }
253            }
254
255            if (trim($this->text[$j]) !== '') {
256                $pass = true;
257            } else {
258                //-- for facts with empty values they must have sub records
259                //-- this section checks if they have subrecords
260                $k    = $j + 1;
261                $pass = false;
262                while ($k < $count && $this->glevels[$k] > $this->glevels[$j]) {
263                    if ($this->text[$k] !== '') {
264                        if ($this->tag[$j] !== 'OBJE' || $this->tag[$k] === 'FILE') {
265                            $pass = true;
266                            break;
267                        }
268                    }
269                    $k++;
270                }
271            }
272
273            //-- if the value is not empty or it has sub lines
274            //--- then write the line to the gedcom record
275            //-- we have to let some emtpy text lines pass through... (DEAT, BIRT, etc)
276            if ($pass) {
277                $newline = (int) $this->glevels[$j] + $levelAdjust . ' ' . $this->tag[$j];
278                if ($this->text[$j] !== '') {
279                    if ($this->islink[$j]) {
280                        $newline .= ' @' . trim($this->text[$j], '@') . '@';
281                    } else {
282                        $newline .= ' ' . $this->text[$j];
283                    }
284                }
285                $next_level = 1 + (int) $this->glevels[$j] + $levelAdjust;
286
287                $newged .= "\n" . str_replace("\n", "\n" . $next_level . ' CONT ', $newline);
288            }
289        }
290
291        return $newged;
292    }
293
294    /**
295     * Create a form to add a new fact.
296     *
297     * @param ServerRequestInterface $request
298     * @param Tree                   $tree
299     * @param string                 $fact
300     *
301     * @return string
302     */
303    public function addNewFact(ServerRequestInterface $request, Tree $tree, string $fact): string
304    {
305        $params = (array) $request->getParsedBody();
306
307        $FACT = $params[$fact];
308        $DATE = $params[$fact . '_DATE'] ?? '';
309        $PLAC = $params[$fact . '_PLAC'] ?? '';
310
311        if ($DATE !== '' || $PLAC !== '' || $FACT !== '' && $FACT !== 'Y') {
312            if ($FACT !== '' && $FACT !== 'Y') {
313                $gedrec = "\n1 " . $fact . ' ' . $FACT;
314            } else {
315                $gedrec = "\n1 " . $fact;
316            }
317            if ($DATE !== '') {
318                $gedrec .= "\n2 DATE " . $DATE;
319            }
320            if ($PLAC !== '') {
321                $gedrec .= "\n2 PLAC " . $PLAC;
322
323                if (preg_match_all('/(' . Gedcom::REGEX_TAG . ')/', $tree->getPreference('ADVANCED_PLAC_FACTS'), $match)) {
324                    foreach ($match[1] as $tag) {
325                        $TAG = $params[$fact . '_' . $tag];
326                        if ($TAG !== '') {
327                            $gedrec .= "\n3 " . $tag . ' ' . $TAG;
328                        }
329                    }
330                }
331                $LATI = $params[$fact . '_LATI'] ?? '';
332                $LONG = $params[$fact . '_LONG'] ?? '';
333                if ($LATI !== '' || $LONG !== '') {
334                    $gedrec .= "\n3 MAP\n4 LATI " . $LATI . "\n4 LONG " . $LONG;
335                }
336            }
337            if ((bool) ($params['SOUR_' . $fact] ?? false)) {
338                return $this->updateSource($gedrec, 'yes');
339            }
340
341            return $gedrec;
342        }
343
344        if ($FACT === 'Y') {
345            if ((bool) ($params['SOUR_' . $fact] ?? false)) {
346                return $this->updateSource("\n1 " . $fact . ' Y', 'yes');
347            }
348
349            return "\n1 " . $fact . ' Y';
350        }
351
352        return '';
353    }
354
355    /**
356     * Add new GEDCOM lines from the $xxxSOUR interface update arrays, which
357     * were produced by the splitSOUR() function.
358     * See the FunctionsEdit::handle_updatesges() function for details.
359     *
360     * @param string $inputRec
361     * @param string $levelOverride
362     *
363     * @return string
364     */
365    public function updateSource(string $inputRec, string $levelOverride = 'no'): string
366    {
367        if (count($this->tagSOUR) === 0) {
368            return $inputRec; // No update required
369        }
370
371        // Save original interface update arrays before replacing them with the xxxSOUR ones
372        $glevelsSave = $this->glevels;
373        $tagSave     = $this->tag;
374        $islinkSave  = $this->islink;
375        $textSave    = $this->text;
376
377        $this->glevels = $this->glevelsSOUR;
378        $this->tag     = $this->tagSOUR;
379        $this->islink  = $this->islinkSOUR;
380        $this->text    = $this->textSOUR;
381
382        $myRecord = $this->handleUpdates($inputRec, $levelOverride); // Now do the update
383
384        // Restore the original interface update arrays (just in case ...)
385        $this->glevels = $glevelsSave;
386        $this->tag     = $tagSave;
387        $this->islink  = $islinkSave;
388        $this->text    = $textSave;
389
390        return $myRecord;
391    }
392
393    /**
394     * Create a form to add a sex record.
395     *
396     * @param ServerRequestInterface $request
397     *
398     * @return string
399     */
400    public function addNewSex(ServerRequestInterface $request): string
401    {
402        $params = (array) $request->getParsedBody();
403
404        switch ($params['SEX']) {
405            case 'M':
406                return "\n1 SEX M";
407            case 'F':
408                return "\n1 SEX F";
409            default:
410                return "\n1 SEX U";
411        }
412    }
413
414    /**
415     * Assemble the pieces of a newly created record into gedcom
416     *
417     * @param ServerRequestInterface $request
418     * @param Tree                   $tree
419     *
420     * @return string
421     */
422    public function addNewName(ServerRequestInterface $request, Tree $tree): string
423    {
424        $params = (array) $request->getParsedBody();
425        $gedrec = "\n1 NAME " . $params['NAME'];
426
427        $tags = [
428            'NPFX',
429            'GIVN',
430            'SPFX',
431            'SURN',
432            'NSFX',
433            'NICK',
434        ];
435
436        if (preg_match_all('/(' . Gedcom::REGEX_TAG . ')/', $tree->getPreference('ADVANCED_NAME_FACTS'), $match)) {
437            $tags = array_merge($tags, $match[1]);
438        }
439
440        // Paternal and Polish and Lithuanian surname traditions can also create a _MARNM
441        $SURNAME_TRADITION = $tree->getPreference('SURNAME_TRADITION');
442        if ($SURNAME_TRADITION === 'paternal' || $SURNAME_TRADITION === 'polish' || $SURNAME_TRADITION === 'lithuanian') {
443            $tags[] = '_MARNM';
444        }
445
446        foreach (array_unique($tags) as $tag) {
447            $TAG = $params[$tag];
448
449            if ($TAG !== '') {
450                $gedrec .= "\n2 " . $tag . ' ' . $TAG;
451            }
452        }
453
454        return $gedrec;
455    }
456
457    /**
458     * Reassemble edited GEDCOM fields into a GEDCOM fact/event string.
459     *
460     * @param string        $record_type
461     * @param array<string> $levels
462     * @param array<string> $tags
463     * @param array<string> $values
464     *
465     * @return string
466     */
467    public function editLinesToGedcom(string $record_type, array $levels, array $tags, array $values): string
468    {
469        // Assert all arrays are the same size.
470        $count = count($levels);
471        assert($count > 0);
472        assert(count($tags) === $count);
473        assert(count($values) === $count);
474
475        $gedcom_lines = [];
476        $hierarchy    = [$record_type];
477
478        for ($i = 0; $i < $count; $i++) {
479            $hierarchy[$levels[$i]] = $tags[$i];
480
481            $full_tag   = implode(':', array_slice($hierarchy, 0, 1 + (int) $levels[$i]));
482            $element    = Registry::elementFactory()->make($full_tag);
483            $values[$i] = $element->canonical($values[$i]);
484
485            // If "1 FACT Y" has a DATE or PLAC, then delete the value of Y
486            if ($levels[$i] === '1' && $values[$i] === 'Y') {
487                for ($j = $i + 1; $j < $count && $levels[$j] > $levels[$i]; ++$j) {
488                    if ($levels[$j] === '2' && ($tags[$j] === 'DATE' || $tags[$j] === 'PLAC') && $values[$j] !== '') {
489                        $values[$i] = '';
490                        break;
491                    }
492                }
493            }
494
495            // Include this line if there is a value - or if there is a child record with a value.
496            $include = $values[$i] !== '';
497
498            for ($j = $i + 1; !$include && $j < $count && $levels[$j] > $levels[$i]; $j++) {
499                $include = $values[$j] !== '';
500            }
501
502            if ($include) {
503                if ($values[$i] === '') {
504                    $gedcom_lines[] = $levels[$i] . ' ' . $tags[$i];
505                } else {
506                    if ($tags[$i] === 'CONC') {
507                        $next_level = (int) $levels[$i];
508                    } else {
509                        $next_level = 1 + (int) $levels[$i];
510                    }
511
512                    $gedcom_lines[] = $levels[$i] . ' ' . $tags[$i] . ' ' . str_replace("\n", "\n" . $next_level . ' CONT ', $values[$i]);
513                }
514            }
515        }
516
517        return implode("\n", $gedcom_lines);
518    }
519
520    /**
521     * Add blank lines, to allow a user to add/edit new values.
522     *
523     * @param Fact $fact
524     * @param bool $include_hidden
525     *
526     * @return string
527     */
528    public function insertMissingSubtags(Fact $fact, bool $include_hidden): string
529    {
530        return $this->insertMissingLevels($fact->record()->tree(), $fact->tag(), $fact->gedcom(), $include_hidden);
531    }
532
533    /**
534     * Add blank lines, to allow a user to add/edit new values.
535     *
536     * @param GedcomRecord $record
537     * @param bool         $include_hidden
538     *
539     * @return string
540     */
541    public function insertMissingRecordSubtags(GedcomRecord $record, bool $include_hidden): string
542    {
543        $gedcom = $this->insertMissingLevels($record->tree(), $record->tag(), $record->gedcom(), $include_hidden);
544
545        // NOTE records have data at level 0.  Move it to 1 CONC.
546        if ($record->tag() === 'NOTE') {
547            return preg_replace('/^0 @[^@]+@ NOTE/', '1 CONC', $gedcom);
548        }
549
550        return preg_replace('/^0.*\n/', '', $gedcom);
551    }
552
553    /**
554     * @param Tree   $tree
555     * @param string $tag
556     * @param string $gedcom
557     * @param bool   $include_hidden
558     *
559     * @return string
560     */
561    protected function insertMissingLevels(Tree $tree, string $tag, string $gedcom, bool $include_hidden): string
562    {
563        $next_level = substr_count($tag, ':') + 1;
564        $factory    = Registry::elementFactory();
565        $subtags    = $factory->make($tag)->subtags();
566
567        // Merge CONT records onto their parent line.
568        $gedcom = strtr($gedcom, [
569            "\n" . $next_level . ' CONT ' => "\r",
570            "\n" . $next_level . ' CONT' => "\r",
571        ]);
572
573        // The first part is level N.  The remainder are level N+1.
574        $parts  = preg_split('/\n(?=' . $next_level . ')/', $gedcom);
575        $return = array_shift($parts);
576
577        foreach ($subtags as $subtag => $occurrences) {
578            if (!$include_hidden && $this->isHiddenTag($tag . ':' . $subtag)) {
579                continue;
580            }
581
582            [$min, $max] = explode(':', $occurrences);
583
584            $min = (int) $min;
585
586            if ($max === 'M') {
587                $max = PHP_INT_MAX;
588            } else {
589                $max = (int) $max;
590            }
591
592            $count = 0;
593
594            // Add expected subtags in our preferred order.
595            foreach ($parts as $n => $part) {
596                if (str_starts_with($part, $next_level . ' ' . $subtag)) {
597                    $return .= "\n" . $this->insertMissingLevels($tree, $tag . ':' . $subtag, $part, $include_hidden);
598                    $count++;
599                    unset($parts[$n]);
600                }
601            }
602
603            // Allowed to have more of this subtag?
604            if ($count < $max) {
605                // Create a new one.
606                $gedcom  = $next_level . ' ' . $subtag;
607                $default = $factory->make($tag . ':' . $subtag)->default($tree);
608                if ($default !== '') {
609                    $gedcom .= ' ' . $default;
610                }
611
612                $number_to_add = max(1, $min - $count);
613                $gedcom_to_add = "\n" . $this->insertMissingLevels($tree, $tag . ':' . $subtag, $gedcom, $include_hidden);
614
615                $return .= str_repeat($gedcom_to_add, $number_to_add);
616            }
617        }
618
619        // Now add any unexpected/existing data.
620        if ($parts !== []) {
621            $return .= "\n" . implode("\n", $parts);
622        }
623
624        return $return;
625    }
626
627    /**
628     * List of tags to exclude when creating new data.
629     *
630     * @param string $tag
631     *
632     * @return bool
633     */
634    private function isHiddenTag(string $tag): bool
635    {
636        $preferences = array_filter(Gedcom::HIDDEN_TAGS, fn (string $x): bool => (bool) Site::getPreference('HIDE_' . $x), ARRAY_FILTER_USE_KEY);
637        $preferences = array_values($preferences);
638        $hidden_tags = array_merge(...$preferences);
639
640        foreach ($hidden_tags as $hidden_tag) {
641            if (str_contains($tag, $hidden_tag)) {
642                return true;
643            }
644        }
645
646        return false;
647    }
648}
649