xref: /webtrees/app/Services/GedcomEditService.php (revision 9c7bc1e39d79638d4cbdd62ebe6ca50d76151602)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Fisharebest\Webtrees\Fact;
23use Fisharebest\Webtrees\Family;
24use Fisharebest\Webtrees\Gedcom;
25use Fisharebest\Webtrees\GedcomRecord;
26use Fisharebest\Webtrees\Individual;
27use Fisharebest\Webtrees\Note;
28use Fisharebest\Webtrees\Registry;
29use Fisharebest\Webtrees\Site;
30use Fisharebest\Webtrees\Tree;
31
32use function array_filter;
33use function array_merge;
34use function array_shift;
35use function array_values;
36use function assert;
37use function count;
38use function explode;
39use function implode;
40use function max;
41use function preg_replace;
42use function preg_split;
43use function str_repeat;
44use function str_replace;
45use function substr_count;
46use function trim;
47
48use const ARRAY_FILTER_USE_BOTH;
49use const ARRAY_FILTER_USE_KEY;
50use const PHP_INT_MAX;
51
52/**
53 * Utilities to edit/save GEDCOM data.
54 */
55class GedcomEditService
56{
57    /** @var string[] */
58    public $glevels = [];
59
60    /** @var string[] */
61    public $tag = [];
62
63    /** @var string[] */
64    public $islink = [];
65
66    /** @var string[] */
67    public $text = [];
68
69    /** @var string[] */
70    protected $glevelsSOUR = [];
71
72    /** @var string[] */
73    protected $tagSOUR = [];
74
75    /** @var string[] */
76    protected $islinkSOUR = [];
77
78    /** @var string[] */
79    protected $textSOUR = [];
80
81    /** @var string[] */
82    protected $glevelsRest = [];
83
84    /** @var string[] */
85    protected $tagRest = [];
86
87    /** @var string[] */
88    protected $islinkRest = [];
89
90    /** @var string[] */
91    protected $textRest = [];
92
93    /**
94     * This function splits the $glevels, $tag, $islink, and $text arrays so that the
95     * entries associated with a SOUR record are separate from everything else.
96     *
97     * Input arrays:
98     * - $glevels[] - an array of the gedcom level for each line that was edited
99     * - $tag[] - an array of the tags for each gedcom line that was edited
100     * - $islink[] - an array of 1 or 0 values to indicate when the text is a link element
101     * - $text[] - an array of the text data for each line
102     *
103     * Output arrays:
104     * ** For the SOUR record:
105     * - $glevelsSOUR[] - an array of the gedcom level for each line that was edited
106     * - $tagSOUR[] - an array of the tags for each gedcom line that was edited
107     * - $islinkSOUR[] - an array of 1 or 0 values to indicate when the text is a link element
108     * - $textSOUR[] - an array of the text data for each line
109     * ** For the remaining records:
110     * - $glevelsRest[] - an array of the gedcom level for each line that was edited
111     * - $tagRest[] - an array of the tags for each gedcom line that was edited
112     * - $islinkRest[] - an array of 1 or 0 values to indicate when the text is a link element
113     * - $textRest[] - an array of the text data for each line
114     *
115     * @return void
116     */
117    public function splitSource(): void
118    {
119        $this->glevelsSOUR = [];
120        $this->tagSOUR     = [];
121        $this->islinkSOUR  = [];
122        $this->textSOUR    = [];
123
124        $this->glevelsRest = [];
125        $this->tagRest     = [];
126        $this->islinkRest  = [];
127        $this->textRest    = [];
128
129        $inSOUR    = false;
130        $levelSOUR = 0;
131
132        // Assume all arrays are the same size.
133        $count = count($this->glevels);
134
135        for ($i = 0; $i < $count; $i++) {
136            if ($inSOUR) {
137                if ($levelSOUR < $this->glevels[$i]) {
138                    $dest = 'S';
139                } else {
140                    $inSOUR = false;
141                    $dest   = 'R';
142                }
143            } elseif ($this->tag[$i] === 'SOUR') {
144                $inSOUR    = true;
145                $levelSOUR = $this->glevels[$i];
146                $dest      = 'S';
147            } else {
148                $dest = 'R';
149            }
150
151            if ($dest === 'S') {
152                $this->glevelsSOUR[] = $this->glevels[$i];
153                $this->tagSOUR[]     = $this->tag[$i];
154                $this->islinkSOUR[]  = $this->islink[$i];
155                $this->textSOUR[]    = $this->text[$i];
156            } else {
157                $this->glevelsRest[] = $this->glevels[$i];
158                $this->tagRest[]     = $this->tag[$i];
159                $this->islinkRest[]  = $this->islink[$i];
160                $this->textRest[]    = $this->text[$i];
161            }
162        }
163    }
164
165    /**
166     * Add new GEDCOM lines from the $xxxRest interface update arrays, which
167     * were produced by the splitSOUR() function.
168     * See the FunctionsEdit::handle_updatesges() function for details.
169     *
170     * @param string $inputRec
171     *
172     * @return string
173     */
174    public function updateRest(string $inputRec): string
175    {
176        if (count($this->tagRest) === 0) {
177            return $inputRec; // No update required
178        }
179
180        // Save original interface update arrays before replacing them with the xxxRest ones
181        $glevelsSave = $this->glevels;
182        $tagSave     = $this->tag;
183        $islinkSave  = $this->islink;
184        $textSave    = $this->text;
185
186        $this->glevels = $this->glevelsRest;
187        $this->tag     = $this->tagRest;
188        $this->islink  = $this->islinkRest;
189        $this->text    = $this->textRest;
190
191        $myRecord = $this->handleUpdates($inputRec, 'no'); // Now do the update
192
193        // Restore the original interface update arrays (just in case ...)
194        $this->glevels = $glevelsSave;
195        $this->tag     = $tagSave;
196        $this->islink  = $islinkSave;
197        $this->text    = $textSave;
198
199        return $myRecord;
200    }
201
202    /**
203     * Add new gedcom lines from interface update arrays
204     * The edit_interface and FunctionsEdit::add_simple_tag function produce the following
205     * arrays incoming from the $_POST form
206     * - $glevels[] - an array of the gedcom level for each line that was edited
207     * - $tag[] - an array of the tags for each gedcom line that was edited
208     * - $islink[] - an array of 1 or 0 values to tell whether the text is a link element and should be surrounded by @@
209     * - $text[] - an array of the text data for each line
210     * With these arrays you can recreate the gedcom lines like this
211     * <code>$glevel[0].' '.$tag[0].' '.$text[0]</code>
212     * There will be an index in each of these arrays for each line of the gedcom
213     * fact that is being edited.
214     * If the $text[] array is empty for the given line, then it means that the
215     * user removed that line during editing or that the line is supposed to be
216     * empty (1 DEAT, 1 BIRT) for example. To know if the line should be removed
217     * there is a section of code that looks ahead to the next lines to see if there
218     * are sub lines. For example we don't want to remove the 1 DEAT line if it has
219     * a 2 PLAC or 2 DATE line following it. If there are no sub lines, then the line
220     * can be safely removed.
221     *
222     * @param string $newged        the new gedcom record to add the lines to
223     * @param string $levelOverride Override GEDCOM level specified in $glevels[0]
224     *
225     * @return string The updated gedcom record
226     */
227    public function handleUpdates(string $newged, string $levelOverride = 'no'): string
228    {
229        if ($levelOverride === 'no') {
230            $levelAdjust = 0;
231        } else {
232            $levelAdjust = 1;
233        }
234
235        // Assert all arrays are the same size.
236        assert(count($this->glevels) === count($this->tag));
237        assert(count($this->glevels) === count($this->text));
238        assert(count($this->glevels) === count($this->islink));
239
240        $count = count($this->glevels);
241
242        for ($j = 0; $j < $count; $j++) {
243            // Look for empty SOUR reference with non-empty sub-records.
244            // This can happen when the SOUR entry is deleted but its sub-records
245            // were incorrectly left intact.
246            // The sub-records should be deleted.
247            if ($this->tag[$j] === 'SOUR' && ($this->text[$j] === '@@' || $this->text[$j] === '')) {
248                $this->text[$j] = '';
249                $k              = $j + 1;
250                while ($k < $count && $this->glevels[$k] > $this->glevels[$j]) {
251                    $this->text[$k] = '';
252                    $k++;
253                }
254            }
255
256            if (trim($this->text[$j]) !== '') {
257                $pass = true;
258            } else {
259                //-- for facts with empty values they must have sub records
260                //-- this section checks if they have subrecords
261                $k    = $j + 1;
262                $pass = false;
263                while ($k < $count && $this->glevels[$k] > $this->glevels[$j]) {
264                    if ($this->text[$k] !== '') {
265                        if ($this->tag[$j] !== 'OBJE' || $this->tag[$k] === 'FILE') {
266                            $pass = true;
267                            break;
268                        }
269                    }
270                    $k++;
271                }
272            }
273
274            //-- if the value is not empty or it has sub lines
275            //--- then write the line to the gedcom record
276            //-- we have to let some emtpy text lines pass through... (DEAT, BIRT, etc)
277            if ($pass) {
278                $newline = (int) $this->glevels[$j] + $levelAdjust . ' ' . $this->tag[$j];
279                if ($this->text[$j] !== '') {
280                    if ($this->islink[$j]) {
281                        $newline .= ' @' . trim($this->text[$j], '@') . '@';
282                    } else {
283                        $newline .= ' ' . $this->text[$j];
284                    }
285                }
286                $next_level = 1 + (int) $this->glevels[$j] + $levelAdjust;
287
288                $newged .= "\n" . str_replace("\n", "\n" . $next_level . ' CONT ', $newline);
289            }
290        }
291
292        return $newged;
293    }
294
295    /**
296     * Add new GEDCOM lines from the $xxxSOUR interface update arrays, which
297     * were produced by the splitSOUR() function.
298     * See the FunctionsEdit::handle_updatesges() function for details.
299     *
300     * @param string $inputRec
301     * @param string $levelOverride
302     *
303     * @return string
304     */
305    public function updateSource(string $inputRec, string $levelOverride = 'no'): string
306    {
307        if (count($this->tagSOUR) === 0) {
308            return $inputRec; // No update required
309        }
310
311        // Save original interface update arrays before replacing them with the xxxSOUR ones
312        $glevelsSave = $this->glevels;
313        $tagSave     = $this->tag;
314        $islinkSave  = $this->islink;
315        $textSave    = $this->text;
316
317        $this->glevels = $this->glevelsSOUR;
318        $this->tag     = $this->tagSOUR;
319        $this->islink  = $this->islinkSOUR;
320        $this->text    = $this->textSOUR;
321
322        $myRecord = $this->handleUpdates($inputRec, $levelOverride); // Now do the update
323
324        // Restore the original interface update arrays (just in case ...)
325        $this->glevels = $glevelsSave;
326        $this->tag     = $tagSave;
327        $this->islink  = $islinkSave;
328        $this->text    = $textSave;
329
330        return $myRecord;
331    }
332
333    /**
334     * Reassemble edited GEDCOM fields into a GEDCOM fact/event string.
335     *
336     * @param string        $record_type
337     * @param array<string> $levels
338     * @param array<string> $tags
339     * @param array<string> $values
340     *
341     * @return string
342     */
343    public function editLinesToGedcom(string $record_type, array $levels, array $tags, array $values): string
344    {
345        // Assert all arrays are the same size.
346        $count = count($levels);
347        assert($count > 0);
348        assert(count($tags) === $count);
349        assert(count($values) === $count);
350
351        $gedcom_lines = [];
352        $hierarchy    = [$record_type];
353
354        for ($i = 0; $i < $count; $i++) {
355            $hierarchy[$levels[$i]] = $tags[$i];
356
357            $full_tag   = implode(':', array_slice($hierarchy, 0, 1 + (int) $levels[$i]));
358            $element    = Registry::elementFactory()->make($full_tag);
359            $values[$i] = $element->canonical($values[$i]);
360
361            // If "1 FACT Y" has a DATE or PLAC, then delete the value of Y
362            if ($levels[$i] === '1' && $values[$i] === 'Y') {
363                for ($j = $i + 1; $j < $count && $levels[$j] > $levels[$i]; ++$j) {
364                    if ($levels[$j] === '2' && ($tags[$j] === 'DATE' || $tags[$j] === 'PLAC') && $values[$j] !== '') {
365                        $values[$i] = '';
366                        break;
367                    }
368                }
369            }
370
371            // Include this line if there is a value - or if there is a child record with a value.
372            $include = $values[$i] !== '';
373
374            for ($j = $i + 1; !$include && $j < $count && $levels[$j] > $levels[$i]; $j++) {
375                $include = $values[$j] !== '';
376            }
377
378            if ($include) {
379                if ($values[$i] === '') {
380                    $gedcom_lines[] = $levels[$i] . ' ' . $tags[$i];
381                } else {
382                    if ($tags[$i] === 'CONC') {
383                        $next_level = (int) $levels[$i];
384                    } else {
385                        $next_level = 1 + (int) $levels[$i];
386                    }
387
388                    $gedcom_lines[] = $levels[$i] . ' ' . $tags[$i] . ' ' . str_replace("\n", "\n" . $next_level . ' CONT ', $values[$i]);
389                }
390            }
391        }
392
393        return implode("\n", $gedcom_lines);
394    }
395
396    /**
397     * Add blank lines, to allow a user to add/edit new values.
398     *
399     * @param Fact $fact
400     * @param bool $include_hidden
401     *
402     * @return string
403     */
404    public function insertMissingFactSubtags(Fact $fact, bool $include_hidden): string
405    {
406        return $this->insertMissingLevels($fact->record()->tree(), $fact->tag(), $fact->gedcom(), $include_hidden);
407    }
408
409    /**
410     * Add blank lines, to allow a user to add/edit new values.
411     *
412     * @param GedcomRecord $record
413     * @param bool         $include_hidden
414     *
415     * @return string
416     */
417    public function insertMissingRecordSubtags(GedcomRecord $record, bool $include_hidden): string
418    {
419        $gedcom = $this->insertMissingLevels($record->tree(), $record->tag(), $record->gedcom(), $include_hidden);
420
421        // NOTE records have data at level 0.  Move it to 1 CONC.
422        if ($record instanceof Note) {
423            return preg_replace('/^0 @[^@]+@ NOTE/', '1 CONC', $gedcom);
424        }
425
426        return preg_replace('/^0.*\n/', '', $gedcom);
427    }
428
429    /**
430     * List of facts/events to add to families and individuals.
431     *
432     * @param Family|Individual $record
433     * @param bool              $include_hidden
434     *
435     * @return array<string>
436     */
437    public function factsToAdd(GedcomRecord $record, bool $include_hidden): array
438    {
439        $subtags = Registry::elementFactory()->make($record->tag())->subtags();
440
441        $subtags = array_filter($subtags, fn (string $v, string $k) => !str_ends_with($v, ':1') || $record->facts([$k])->isEmpty(), ARRAY_FILTER_USE_BOTH);
442
443        $subtags = array_keys($subtags);
444
445        if (!$include_hidden) {
446            $fn_hidden = fn (string $t): bool => !$this->isHiddenTag($record->tag() . ':' . $t);
447            $subtags   = array_filter($subtags, $fn_hidden);
448        }
449
450        $subtags = array_diff($subtags, ['HUSB', 'WIFE', 'CHIL', 'FAMC', 'FAMS', 'CHAN']);
451
452        return $subtags;
453    }
454
455    /**
456     * @param Tree   $tree
457     * @param string $tag
458     * @param string $gedcom
459     * @param bool   $include_hidden
460     *
461     * @return string
462     */
463    protected function insertMissingLevels(Tree $tree, string $tag, string $gedcom, bool $include_hidden): string
464    {
465        $next_level = substr_count($tag, ':') + 1;
466        $factory    = Registry::elementFactory();
467        $subtags    = $factory->make($tag)->subtags();
468
469        // Merge CONT records onto their parent line.
470        $gedcom = strtr($gedcom, [
471            "\n" . $next_level . ' CONT ' => "\r",
472            "\n" . $next_level . ' CONT' => "\r",
473        ]);
474
475        // The first part is level N.  The remainder are level N+1.
476        $parts  = preg_split('/\n(?=' . $next_level . ')/', $gedcom);
477        $return = array_shift($parts);
478
479        foreach ($subtags as $subtag => $occurrences) {
480            if (!$include_hidden && $this->isHiddenTag($tag . ':' . $subtag)) {
481                continue;
482            }
483
484            [$min, $max] = explode(':', $occurrences);
485
486            $min = (int) $min;
487
488            if ($max === 'M') {
489                $max = PHP_INT_MAX;
490            } else {
491                $max = (int) $max;
492            }
493
494            $count = 0;
495
496            // Add expected subtags in our preferred order.
497            foreach ($parts as $n => $part) {
498                if (str_starts_with($part, $next_level . ' ' . $subtag)) {
499                    $return .= "\n" . $this->insertMissingLevels($tree, $tag . ':' . $subtag, $part, $include_hidden);
500                    $count++;
501                    unset($parts[$n]);
502                }
503            }
504
505            // Allowed to have more of this subtag?
506            if ($count < $max) {
507                // Create a new one.
508                $gedcom  = $next_level . ' ' . $subtag;
509                $default = $factory->make($tag . ':' . $subtag)->default($tree);
510                if ($default !== '') {
511                    $gedcom .= ' ' . $default;
512                }
513
514                $number_to_add = max(1, $min - $count);
515                $gedcom_to_add = "\n" . $this->insertMissingLevels($tree, $tag . ':' . $subtag, $gedcom, $include_hidden);
516
517                $return .= str_repeat($gedcom_to_add, $number_to_add);
518            }
519        }
520
521        // Now add any unexpected/existing data.
522        if ($parts !== []) {
523            $return .= "\n" . implode("\n", $parts);
524        }
525
526        return $return;
527    }
528
529    /**
530     * List of tags to exclude when creating new data.
531     *
532     * @param string $tag
533     *
534     * @return bool
535     */
536    private function isHiddenTag(string $tag): bool
537    {
538        // Function to filter hidden tags.
539        $fn_hide = fn (string $x): bool => (bool) Site::getPreference('HIDE_' . $x);
540
541        $preferences = array_filter(Gedcom::HIDDEN_TAGS, $fn_hide, ARRAY_FILTER_USE_KEY);
542        $preferences = array_values($preferences);
543        $hidden_tags = array_merge(...$preferences);
544
545        foreach ($hidden_tags as $hidden_tag) {
546            if (str_contains($tag, $hidden_tag)) {
547                return true;
548            }
549        }
550
551        return false;
552    }
553}
554