xref: /webtrees/app/Services/GedcomImportService.php (revision 3793e425c2785d459ca4db6202cc4a6b01e4a322)
12c685d76SGreg Roach<?php
22c685d76SGreg Roach
32c685d76SGreg Roach/**
42c685d76SGreg Roach * webtrees: online genealogy
55bfc6897SGreg Roach * Copyright (C) 2022 webtrees development team
62c685d76SGreg Roach * This program is free software: you can redistribute it and/or modify
72c685d76SGreg Roach * it under the terms of the GNU General Public License as published by
82c685d76SGreg Roach * the Free Software Foundation, either version 3 of the License, or
92c685d76SGreg Roach * (at your option) any later version.
102c685d76SGreg Roach * This program is distributed in the hope that it will be useful,
112c685d76SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
122c685d76SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
132c685d76SGreg Roach * GNU General Public License for more details.
142c685d76SGreg Roach * You should have received a copy of the GNU General Public License
152c685d76SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
162c685d76SGreg Roach */
172c685d76SGreg Roach
182c685d76SGreg Roachdeclare(strict_types=1);
192c685d76SGreg Roach
202c685d76SGreg Roachnamespace Fisharebest\Webtrees\Services;
212c685d76SGreg Roach
222c685d76SGreg Roachuse Fisharebest\Webtrees\Date;
232c685d76SGreg Roachuse Fisharebest\Webtrees\Elements\UnknownElement;
242c685d76SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException;
252c685d76SGreg Roachuse Fisharebest\Webtrees\Family;
262c685d76SGreg Roachuse Fisharebest\Webtrees\Gedcom;
272c685d76SGreg Roachuse Fisharebest\Webtrees\Header;
282c685d76SGreg Roachuse Fisharebest\Webtrees\Individual;
292c685d76SGreg Roachuse Fisharebest\Webtrees\Location;
302c685d76SGreg Roachuse Fisharebest\Webtrees\Media;
312c685d76SGreg Roachuse Fisharebest\Webtrees\Note;
322c685d76SGreg Roachuse Fisharebest\Webtrees\Place;
332c685d76SGreg Roachuse Fisharebest\Webtrees\PlaceLocation;
342c685d76SGreg Roachuse Fisharebest\Webtrees\Registry;
352c685d76SGreg Roachuse Fisharebest\Webtrees\Repository;
362c685d76SGreg Roachuse Fisharebest\Webtrees\Soundex;
372c685d76SGreg Roachuse Fisharebest\Webtrees\Source;
382c685d76SGreg Roachuse Fisharebest\Webtrees\Submission;
392c685d76SGreg Roachuse Fisharebest\Webtrees\Submitter;
402c685d76SGreg Roachuse Fisharebest\Webtrees\Tree;
412c685d76SGreg Roachuse Illuminate\Database\Capsule\Manager as DB;
422c685d76SGreg Roachuse Illuminate\Database\Query\JoinClause;
432c685d76SGreg Roach
442c685d76SGreg Roachuse function app;
452c685d76SGreg Roachuse function array_chunk;
462c685d76SGreg Roachuse function array_intersect_key;
472c685d76SGreg Roachuse function array_map;
482c685d76SGreg Roachuse function array_unique;
492c685d76SGreg Roachuse function assert;
502c685d76SGreg Roachuse function date;
512c685d76SGreg Roachuse function explode;
522c685d76SGreg Roachuse function max;
532c685d76SGreg Roachuse function mb_substr;
542c685d76SGreg Roachuse function preg_match;
552c685d76SGreg Roachuse function preg_match_all;
562c685d76SGreg Roachuse function preg_replace;
572c685d76SGreg Roachuse function round;
582c685d76SGreg Roachuse function str_contains;
592c685d76SGreg Roachuse function str_replace;
602c685d76SGreg Roachuse function str_starts_with;
612c685d76SGreg Roachuse function strlen;
622c685d76SGreg Roachuse function strtolower;
632c685d76SGreg Roachuse function strtoupper;
642c685d76SGreg Roachuse function strtr;
652c685d76SGreg Roachuse function substr;
662c685d76SGreg Roachuse function trim;
672c685d76SGreg Roach
682c685d76SGreg Roachuse const PREG_SET_ORDER;
692c685d76SGreg Roach
702c685d76SGreg Roach/**
712c685d76SGreg Roach * Class GedcomImportService - import GEDCOM data
722c685d76SGreg Roach */
732c685d76SGreg Roachclass GedcomImportService
742c685d76SGreg Roach{
752c685d76SGreg Roach    /**
762c685d76SGreg Roach     * Tidy up a gedcom record on import, so that we can access it consistently/efficiently.
772c685d76SGreg Roach     *
782c685d76SGreg Roach     * @param string $rec
792c685d76SGreg Roach     * @param Tree   $tree
802c685d76SGreg Roach     *
812c685d76SGreg Roach     * @return string
822c685d76SGreg Roach     */
832c685d76SGreg Roach    private function reformatRecord(string $rec, Tree $tree): string
842c685d76SGreg Roach    {
852c685d76SGreg Roach        $gedcom_service = app(GedcomService::class);
862c685d76SGreg Roach        assert($gedcom_service instanceof GedcomService);
872c685d76SGreg Roach
882c685d76SGreg Roach        // Strip out mac/msdos line endings
892c685d76SGreg Roach        $rec = preg_replace("/[\r\n]+/", "\n", $rec);
902c685d76SGreg Roach
912c685d76SGreg Roach        // Extract lines from the record; lines consist of: level + optional xref + tag + optional data
922c685d76SGreg Roach        $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER);
932c685d76SGreg Roach
942c685d76SGreg Roach        // Process the record line-by-line
952c685d76SGreg Roach        $newrec = '';
962c685d76SGreg Roach        foreach ($matches as $n => $match) {
972c685d76SGreg Roach            [, $level, $xref, $tag, $data] = $match;
982c685d76SGreg Roach
992c685d76SGreg Roach            $tag = $gedcom_service->canonicalTag($tag);
1002c685d76SGreg Roach
1012c685d76SGreg Roach            switch ($tag) {
1022c685d76SGreg Roach                case 'AFN':
1032c685d76SGreg Roach                    // AFN values are upper case
1042c685d76SGreg Roach                    $data = strtoupper($data);
1052c685d76SGreg Roach                    break;
1062c685d76SGreg Roach                case 'DATE':
1072c685d76SGreg Roach                    // Preserve text from INT dates
1082c685d76SGreg Roach                    if (str_contains($data, '(')) {
1092c685d76SGreg Roach                        [$date, $text] = explode('(', $data, 2);
1102c685d76SGreg Roach                        $text = ' (' . $text;
1112c685d76SGreg Roach                    } else {
1122c685d76SGreg Roach                        $date = $data;
1132c685d76SGreg Roach                        $text = '';
1142c685d76SGreg Roach                    }
1152c685d76SGreg Roach                    // Capitals
1162c685d76SGreg Roach                    $date = strtoupper($date);
1172c685d76SGreg Roach                    // Temporarily add leading/trailing spaces, to allow efficient matching below
1182c685d76SGreg Roach                    $date = ' ' . $date . ' ';
1192c685d76SGreg Roach                    // Ensure space digits and letters
1202c685d76SGreg Roach                    $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date);
1212c685d76SGreg Roach                    $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date);
1222c685d76SGreg Roach                    // Ensure space before/after calendar escapes
1232c685d76SGreg Roach                    $date = preg_replace('/@#[^@]+@/', ' $0 ', $date);
1242c685d76SGreg Roach                    // "BET." => "BET"
1252c685d76SGreg Roach                    $date = preg_replace('/(\w\w)\./', '$1', $date);
1262c685d76SGreg Roach                    // "CIR" => "ABT"
1272c685d76SGreg Roach                    $date = str_replace(' CIR ', ' ABT ', $date);
1282c685d76SGreg Roach                    $date = str_replace(' APX ', ' ABT ', $date);
1292c685d76SGreg Roach                    // B.C. => BC (temporarily, to allow easier handling of ".")
1302c685d76SGreg Roach                    $date = str_replace(' B.C. ', ' BC ', $date);
1312c685d76SGreg Roach                    // TMG uses "EITHER X OR Y"
1322c685d76SGreg Roach                    $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date);
1332c685d76SGreg Roach                    // "BET X - Y " => "BET X AND Y"
1342c685d76SGreg Roach                    $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date);
1352c685d76SGreg Roach                    $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date);
1362c685d76SGreg Roach                    // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y"
1372c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date);
1382c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date);
1392c685d76SGreg Roach                    // "@#ESC@ AFT X" => "AFT @#ESC@ X"
1402c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date);
1412c685d76SGreg Roach                    // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900"
1422c685d76SGreg Roach                    // (don't change "/" - it is used in NS/OS dates)
1432c685d76SGreg Roach                    $date = preg_replace('/[.,:;-]/', ' ', $date);
1442c685d76SGreg Roach                    // BC => B.C.
1452c685d76SGreg Roach                    $date = str_replace(' BC ', ' B.C. ', $date);
1462c685d76SGreg Roach                    // Append the "INT" text
1472c685d76SGreg Roach                    $data = $date . $text;
1482c685d76SGreg Roach                    break;
1492c685d76SGreg Roach                case '_FILE':
1502c685d76SGreg Roach                    $tag = 'FILE';
1512c685d76SGreg Roach                    break;
1522c685d76SGreg Roach                case 'FORM':
1532c685d76SGreg Roach                    // Consistent commas
1542c685d76SGreg Roach                    $data = preg_replace('/ *, */', ', ', $data);
1552c685d76SGreg Roach                    break;
1562c685d76SGreg Roach                case 'HEAD':
1572c685d76SGreg Roach                    // HEAD records don't have an XREF or DATA
1582c685d76SGreg Roach                    if ($level === '0') {
1592c685d76SGreg Roach                        $xref = '';
1602c685d76SGreg Roach                        $data = '';
1612c685d76SGreg Roach                    }
1622c685d76SGreg Roach                    break;
1632c685d76SGreg Roach                case 'NAME':
1642c685d76SGreg Roach                    // Tidy up non-printing characters
1652c685d76SGreg Roach                    $data = preg_replace('/  +/', ' ', trim($data));
1662c685d76SGreg Roach                    break;
1672c685d76SGreg Roach                case 'PEDI':
1682c685d76SGreg Roach                    // PEDI values are lower case
1692c685d76SGreg Roach                    $data = strtolower($data);
1702c685d76SGreg Roach                    break;
1712c685d76SGreg Roach                case 'PLAC':
1722c685d76SGreg Roach                    // Consistent commas
1732c685d76SGreg Roach                    $data = preg_replace('/ *[,,،] */u', ', ', $data);
1742c685d76SGreg Roach                    // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W
1752c685d76SGreg Roach                    if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match)) {
1762c685d76SGreg Roach                        $data =
1772c685d76SGreg Roach                            $match[1] . "\n" .
1782c685d76SGreg Roach                            ($level + 1) . " MAP\n" .
1792c685d76SGreg Roach                            ($level + 2) . ' LATI ' . ($match[5] . round($match[2] + ($match[3] / 60) + ($match[4] / 3600), 4)) . "\n" .
1802c685d76SGreg Roach                            ($level + 2) . ' LONG ' . ($match[9] . round($match[6] + ($match[7] / 60) + ($match[8] / 3600), 4));
1812c685d76SGreg Roach                    }
1822c685d76SGreg Roach                    break;
1832c685d76SGreg Roach                case 'RESN':
1842c685d76SGreg Roach                    // RESN values are lower case (confidential, privacy, locked, none)
1852c685d76SGreg Roach                    $data = strtolower($data);
1862c685d76SGreg Roach                    if ($data === 'invisible') {
1872c685d76SGreg Roach                        $data = 'confidential'; // From old versions of Legacy.
1882c685d76SGreg Roach                    }
1892c685d76SGreg Roach                    break;
1902c685d76SGreg Roach                case 'SEX':
1912c685d76SGreg Roach                    $data = strtoupper($data);
1922c685d76SGreg Roach                    break;
1932c685d76SGreg Roach                case 'STAT':
1942c685d76SGreg Roach                    if ($data === 'CANCELLED') {
1952c685d76SGreg Roach                        // PhpGedView mis-spells this tag - correct it.
1962c685d76SGreg Roach                        $data = 'CANCELED';
1972c685d76SGreg Roach                    }
1982c685d76SGreg Roach                    break;
1992c685d76SGreg Roach                case 'TEMP':
2002c685d76SGreg Roach                    // Temple codes are upper case
2012c685d76SGreg Roach                    $data = strtoupper($data);
2022c685d76SGreg Roach                    break;
2032c685d76SGreg Roach                case 'TRLR':
2042c685d76SGreg Roach                    // TRLR records don't have an XREF or DATA
2052c685d76SGreg Roach                    if ($level === '0') {
2062c685d76SGreg Roach                        $xref = '';
2072c685d76SGreg Roach                        $data = '';
2082c685d76SGreg Roach                    }
2092c685d76SGreg Roach                    break;
2102c685d76SGreg Roach            }
2112c685d76SGreg Roach            // Suppress "Y", for facts/events with a DATE or PLAC
2122c685d76SGreg Roach            if ($data === 'y') {
2132c685d76SGreg Roach                $data = 'Y';
2142c685d76SGreg Roach            }
2152c685d76SGreg Roach            if ($level === '1' && $data === 'Y') {
2162c685d76SGreg Roach                for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) {
2172c685d76SGreg Roach                    if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') {
2182c685d76SGreg Roach                        $data = '';
2192c685d76SGreg Roach                        break;
2202c685d76SGreg Roach                    }
2212c685d76SGreg Roach                }
2222c685d76SGreg Roach            }
2232c685d76SGreg Roach            // Reassemble components back into a single line
2242c685d76SGreg Roach            switch ($tag) {
2252c685d76SGreg Roach                default:
2262c685d76SGreg Roach                    // Remove tabs and multiple/leading/trailing spaces
2272c685d76SGreg Roach                    $data = strtr($data, ["\t" => ' ']);
2282c685d76SGreg Roach                    $data = trim($data, ' ');
2292c685d76SGreg Roach                    while (str_contains($data, '  ')) {
2302c685d76SGreg Roach                        $data = strtr($data, ['  ' => ' ']);
2312c685d76SGreg Roach                    }
2322c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
2332c685d76SGreg Roach                    break;
2342c685d76SGreg Roach                case 'NOTE':
2352c685d76SGreg Roach                case 'TEXT':
2362c685d76SGreg Roach                case 'DATA':
2372c685d76SGreg Roach                case 'CONT':
2382c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
2392c685d76SGreg Roach                    break;
2402c685d76SGreg Roach                case 'FILE':
2412c685d76SGreg Roach                    // Strip off the user-defined path prefix
2422c685d76SGreg Roach                    $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH');
2432c685d76SGreg Roach                    if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) {
2442c685d76SGreg Roach                        $data = substr($data, strlen($GEDCOM_MEDIA_PATH));
2452c685d76SGreg Roach                    }
2462c685d76SGreg Roach                    // convert backslashes in filenames to forward slashes
2472c685d76SGreg Roach                    $data = preg_replace("/\\\\/", '/', $data);
2482c685d76SGreg Roach
2492c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
2502c685d76SGreg Roach                    break;
2512c685d76SGreg Roach                case 'CONC':
2522c685d76SGreg Roach                    // Merge CONC lines, to simplify access later on.
2532c685d76SGreg Roach                    $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data;
2542c685d76SGreg Roach                    break;
2552c685d76SGreg Roach            }
2562c685d76SGreg Roach        }
2572c685d76SGreg Roach
2582c685d76SGreg Roach        return $newrec;
2592c685d76SGreg Roach    }
2602c685d76SGreg Roach
2612c685d76SGreg Roach    /**
2622c685d76SGreg Roach     * import record into database
2632c685d76SGreg Roach     * this function will parse the given gedcom record and add it to the database
2642c685d76SGreg Roach     *
2652c685d76SGreg Roach     * @param string $gedrec the raw gedcom record to parse
2662c685d76SGreg Roach     * @param Tree   $tree   import the record into this tree
2672c685d76SGreg Roach     * @param bool   $update whether this is an updated record that has been accepted
2682c685d76SGreg Roach     *
2692c685d76SGreg Roach     * @return void
2702c685d76SGreg Roach     * @throws GedcomErrorException
2712c685d76SGreg Roach     */
2722c685d76SGreg Roach    public function importRecord(string $gedrec, Tree $tree, bool $update): void
2732c685d76SGreg Roach    {
2742c685d76SGreg Roach        $tree_id = $tree->id();
2752c685d76SGreg Roach
2762c685d76SGreg Roach        // Escaped @ signs (only if importing from file)
2772c685d76SGreg Roach        if (!$update) {
2782c685d76SGreg Roach            $gedrec = str_replace('@@', '@', $gedrec);
2792c685d76SGreg Roach        }
2802c685d76SGreg Roach
2812c685d76SGreg Roach        // Standardise gedcom format
2822c685d76SGreg Roach        $gedrec = $this->reformatRecord($gedrec, $tree);
2832c685d76SGreg Roach
2842c685d76SGreg Roach        // import different types of records
2852c685d76SGreg Roach        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
2862c685d76SGreg Roach            [, $xref, $type] = $match;
287*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 HEAD')) {
288*3793e425SGreg Roach            $type = 'HEAD';
289*3793e425SGreg Roach            $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF.
290*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 TRLR')) {
291*3793e425SGreg Roach            $tree->setPreference('imported', '1');
292*3793e425SGreg Roach            $type = 'TRLR';
293*3793e425SGreg Roach            $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF.
2946bd4d63fSGreg Roach        } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) {
2956bd4d63fSGreg Roach            $this->importLegacyPlacDefn($gedrec);
2966bd4d63fSGreg Roach
2976bd4d63fSGreg Roach            return;
2986bd4d63fSGreg Roach        } elseif (str_starts_with($gedrec, '0 _PLAC ')) {
2996bd4d63fSGreg Roach            $this->importTNGPlac($gedrec);
3006bd4d63fSGreg Roach
3016bd4d63fSGreg Roach            return;
302*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 _EVDEF')) {
303*3793e425SGreg Roach            // Created by RootsMagic.  We cannot process these records without an XREF.
304*3793e425SGreg Roach            return;
305*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 _EVENT_DEFN')) {
306*3793e425SGreg Roach            // Created by PAF and Legacy.  We cannot process these records without an XREF.
307*3793e425SGreg Roach            return;
308*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 PEDIGREELINK')) {
309*3793e425SGreg Roach            // Created by GenoPro.  We cannot process these records without an XREF.
310*3793e425SGreg Roach            return;
311*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 GLOBAL')) {
312*3793e425SGreg Roach            // Created by GenoPro.  We cannot process these records without an XREF.
313*3793e425SGreg Roach            return;
314*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 GENOMAP')) {
315*3793e425SGreg Roach            // Created by GenoPro.  We cannot process these records without an XREF.
316*3793e425SGreg Roach            return;
317*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 EMOTIONALRELATIONSHIP')) {
318*3793e425SGreg Roach            // Created by GenoPro.  We cannot process these records without an XREF.
319*3793e425SGreg Roach            return;
320*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 SOCIALRELATIONSHIP')) {
321*3793e425SGreg Roach            // Created by GenoPro.  We cannot process these records without an XREF.
322*3793e425SGreg Roach            return;
323*3793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 LABEL')) {
324*3793e425SGreg Roach            // Created by GenoPro.  We cannot process these records without an XREF.
325*3793e425SGreg Roach            return;
3262c685d76SGreg Roach        } else {
3272c685d76SGreg Roach            throw new GedcomErrorException($gedrec);
3282c685d76SGreg Roach        }
3292c685d76SGreg Roach
3302c685d76SGreg Roach        // Add a _UID
3312c685d76SGreg Roach        if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) {
3322c685d76SGreg Roach            $element = Registry::elementFactory()->make($type . ':_UID');
3332c685d76SGreg Roach            if (!$element instanceof UnknownElement) {
3342c685d76SGreg Roach                $gedrec .= "\n1 _UID " . $element->default($tree);
3352c685d76SGreg Roach            }
3362c685d76SGreg Roach        }
3372c685d76SGreg Roach
3382c685d76SGreg Roach        // If the user has downloaded their GEDCOM data (containing media objects) and edited it
3392c685d76SGreg Roach        // using an application which does not support (and deletes) media objects, then add them
3402c685d76SGreg Roach        // back in.
3412c685d76SGreg Roach        if ($tree->getPreference('keep_media')) {
3422c685d76SGreg Roach            $old_linked_media = DB::table('link')
3432c685d76SGreg Roach                ->where('l_from', '=', $xref)
3442c685d76SGreg Roach                ->where('l_file', '=', $tree_id)
3452c685d76SGreg Roach                ->where('l_type', '=', 'OBJE')
3462c685d76SGreg Roach                ->pluck('l_to');
3472c685d76SGreg Roach
3482c685d76SGreg Roach            // Delete these links - so that we do not insert them again in updateLinks()
3492c685d76SGreg Roach            DB::table('link')
3502c685d76SGreg Roach                ->where('l_from', '=', $xref)
3512c685d76SGreg Roach                ->where('l_file', '=', $tree_id)
3522c685d76SGreg Roach                ->where('l_type', '=', 'OBJE')
3532c685d76SGreg Roach                ->delete();
3542c685d76SGreg Roach
3552c685d76SGreg Roach            foreach ($old_linked_media as $media_id) {
3562c685d76SGreg Roach                $gedrec .= "\n1 OBJE @" . $media_id . '@';
3572c685d76SGreg Roach            }
3582c685d76SGreg Roach        }
3592c685d76SGreg Roach
3602c685d76SGreg Roach        // Convert inline media into media objects
3612c685d76SGreg Roach        $gedrec = $this->convertInlineMedia($tree, $gedrec);
3622c685d76SGreg Roach
3632c685d76SGreg Roach        switch ($type) {
3642c685d76SGreg Roach            case Individual::RECORD_TYPE:
3652c685d76SGreg Roach                $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree);
3662c685d76SGreg Roach
3672c685d76SGreg Roach                if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) {
3682c685d76SGreg Roach                    $rin = $match[1];
3692c685d76SGreg Roach                } else {
3702c685d76SGreg Roach                    $rin = $xref;
3712c685d76SGreg Roach                }
3722c685d76SGreg Roach
3732c685d76SGreg Roach                DB::table('individuals')->insert([
3742c685d76SGreg Roach                    'i_id'     => $xref,
3752c685d76SGreg Roach                    'i_file'   => $tree_id,
3762c685d76SGreg Roach                    'i_rin'    => $rin,
3772c685d76SGreg Roach                    'i_sex'    => $record->sex(),
3782c685d76SGreg Roach                    'i_gedcom' => $gedrec,
3792c685d76SGreg Roach                ]);
3802c685d76SGreg Roach
3812c685d76SGreg Roach                // Update the cross-reference/index tables.
3822c685d76SGreg Roach                $this->updatePlaces($xref, $tree, $gedrec);
3832c685d76SGreg Roach                $this->updateDates($xref, $tree_id, $gedrec);
3842c685d76SGreg Roach                $this->updateNames($xref, $tree_id, $record);
3852c685d76SGreg Roach                break;
3862c685d76SGreg Roach
3872c685d76SGreg Roach            case Family::RECORD_TYPE:
3882c685d76SGreg Roach                if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
3892c685d76SGreg Roach                    $husb = $match[1];
3902c685d76SGreg Roach                } else {
3912c685d76SGreg Roach                    $husb = '';
3922c685d76SGreg Roach                }
3932c685d76SGreg Roach                if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
3942c685d76SGreg Roach                    $wife = $match[1];
3952c685d76SGreg Roach                } else {
3962c685d76SGreg Roach                    $wife = '';
3972c685d76SGreg Roach                }
3982c685d76SGreg Roach                $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match);
3992c685d76SGreg Roach                if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) {
4002c685d76SGreg Roach                    $nchi = max($nchi, $match[1]);
4012c685d76SGreg Roach                }
4022c685d76SGreg Roach
4032c685d76SGreg Roach                DB::table('families')->insert([
4042c685d76SGreg Roach                    'f_id'      => $xref,
4052c685d76SGreg Roach                    'f_file'    => $tree_id,
4062c685d76SGreg Roach                    'f_husb'    => $husb,
4072c685d76SGreg Roach                    'f_wife'    => $wife,
4082c685d76SGreg Roach                    'f_gedcom'  => $gedrec,
4092c685d76SGreg Roach                    'f_numchil' => $nchi,
4102c685d76SGreg Roach                ]);
4112c685d76SGreg Roach
4122c685d76SGreg Roach                // Update the cross-reference/index tables.
4132c685d76SGreg Roach                $this->updatePlaces($xref, $tree, $gedrec);
4142c685d76SGreg Roach                $this->updateDates($xref, $tree_id, $gedrec);
4152c685d76SGreg Roach                break;
4162c685d76SGreg Roach
4172c685d76SGreg Roach            case Source::RECORD_TYPE:
4182c685d76SGreg Roach                if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) {
4192c685d76SGreg Roach                    $name = $match[1];
4202c685d76SGreg Roach                } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) {
4212c685d76SGreg Roach                    $name = $match[1];
4222c685d76SGreg Roach                } else {
4232c685d76SGreg Roach                    $name = $xref;
4242c685d76SGreg Roach                }
4252c685d76SGreg Roach
4262c685d76SGreg Roach                DB::table('sources')->insert([
4272c685d76SGreg Roach                    's_id'     => $xref,
4282c685d76SGreg Roach                    's_file'   => $tree_id,
4292c685d76SGreg Roach                    's_name'   => mb_substr($name, 0, 255),
4302c685d76SGreg Roach                    's_gedcom' => $gedrec,
4312c685d76SGreg Roach                ]);
4322c685d76SGreg Roach                break;
4332c685d76SGreg Roach
4342c685d76SGreg Roach            case Repository::RECORD_TYPE:
4352c685d76SGreg Roach            case Note::RECORD_TYPE:
4362c685d76SGreg Roach            case Submission::RECORD_TYPE:
4372c685d76SGreg Roach            case Submitter::RECORD_TYPE:
4382c685d76SGreg Roach            case Location::RECORD_TYPE:
4392c685d76SGreg Roach                DB::table('other')->insert([
4402c685d76SGreg Roach                    'o_id'     => $xref,
4412c685d76SGreg Roach                    'o_file'   => $tree_id,
4422c685d76SGreg Roach                    'o_type'   => $type,
4432c685d76SGreg Roach                    'o_gedcom' => $gedrec,
4442c685d76SGreg Roach                ]);
4452c685d76SGreg Roach                break;
4462c685d76SGreg Roach
4472c685d76SGreg Roach            case Header::RECORD_TYPE:
4482c685d76SGreg Roach                // Force HEAD records to have a creation date.
4492c685d76SGreg Roach                if (!str_contains($gedrec, "\n1 DATE ")) {
4502c685d76SGreg Roach                    $today = strtoupper(date('d M Y'));
4512c685d76SGreg Roach                    $gedrec .= "\n1 DATE " . $today;
4522c685d76SGreg Roach                }
4532c685d76SGreg Roach
4542c685d76SGreg Roach                DB::table('other')->insert([
4552c685d76SGreg Roach                    'o_id'     => $xref,
4562c685d76SGreg Roach                    'o_file'   => $tree_id,
4572c685d76SGreg Roach                    'o_type'   => Header::RECORD_TYPE,
4582c685d76SGreg Roach                    'o_gedcom' => $gedrec,
4592c685d76SGreg Roach                ]);
4602c685d76SGreg Roach                break;
4612c685d76SGreg Roach
4622c685d76SGreg Roach
4632c685d76SGreg Roach            case Media::RECORD_TYPE:
4642c685d76SGreg Roach                $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree);
4652c685d76SGreg Roach
4662c685d76SGreg Roach                DB::table('media')->insert([
4672c685d76SGreg Roach                    'm_id'     => $xref,
4682c685d76SGreg Roach                    'm_file'   => $tree_id,
4692c685d76SGreg Roach                    'm_gedcom' => $gedrec,
4702c685d76SGreg Roach                ]);
4712c685d76SGreg Roach
4722c685d76SGreg Roach                foreach ($record->mediaFiles() as $media_file) {
4732c685d76SGreg Roach                    DB::table('media_file')->insert([
4742c685d76SGreg Roach                        'm_id'                 => $xref,
4752c685d76SGreg Roach                        'm_file'               => $tree_id,
4762c685d76SGreg Roach                        'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248),
4772c685d76SGreg Roach                        'multimedia_format'    => mb_substr($media_file->format(), 0, 4),
4782c685d76SGreg Roach                        'source_media_type'    => mb_substr($media_file->type(), 0, 15),
4792c685d76SGreg Roach                        'descriptive_title'    => mb_substr($media_file->title(), 0, 248),
4802c685d76SGreg Roach                    ]);
4812c685d76SGreg Roach                }
4822c685d76SGreg Roach                break;
4832c685d76SGreg Roach
4842c685d76SGreg Roach            default: // Custom record types.
4852c685d76SGreg Roach                DB::table('other')->insert([
4862c685d76SGreg Roach                    'o_id'     => $xref,
4872c685d76SGreg Roach                    'o_file'   => $tree_id,
4882c685d76SGreg Roach                    'o_type'   => mb_substr($type, 0, 15),
4892c685d76SGreg Roach                    'o_gedcom' => $gedrec,
4902c685d76SGreg Roach                ]);
4912c685d76SGreg Roach                break;
4922c685d76SGreg Roach        }
4932c685d76SGreg Roach
4942c685d76SGreg Roach        // Update the cross-reference/index tables.
4952c685d76SGreg Roach        $this->updateLinks($xref, $tree_id, $gedrec);
4962c685d76SGreg Roach    }
4972c685d76SGreg Roach
4982c685d76SGreg Roach    /**
4992c685d76SGreg Roach     * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values
5002c685d76SGreg Roach     *
5012c685d76SGreg Roach     * @param string $gedcom
5022c685d76SGreg Roach     */
5032c685d76SGreg Roach    private function importLegacyPlacDefn(string $gedcom): void
5042c685d76SGreg Roach    {
5052c685d76SGreg Roach        $gedcom_service = new GedcomService();
5062c685d76SGreg Roach
5072c685d76SGreg Roach        if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) {
5082c685d76SGreg Roach            $place_name = $match[1];
5092c685d76SGreg Roach        } else {
5102c685d76SGreg Roach            return;
5112c685d76SGreg Roach        }
5122c685d76SGreg Roach
5132c685d76SGreg Roach        if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) {
5142c685d76SGreg Roach            $latitude = $gedcom_service->readLatitude($match[1]);
5152c685d76SGreg Roach        } else {
5162c685d76SGreg Roach            return;
5172c685d76SGreg Roach        }
5182c685d76SGreg Roach
5192c685d76SGreg Roach        if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) {
5202c685d76SGreg Roach            $longitude = $gedcom_service->readLongitude($match[1]);
5212c685d76SGreg Roach        } else {
5222c685d76SGreg Roach            return;
5232c685d76SGreg Roach        }
5242c685d76SGreg Roach
5252c685d76SGreg Roach        $location = new PlaceLocation($place_name);
5262c685d76SGreg Roach
5272c685d76SGreg Roach        if ($location->latitude() === null && $location->longitude() === null) {
5282c685d76SGreg Roach            DB::table('place_location')
5292c685d76SGreg Roach                ->where('id', '=', $location->id())
5302c685d76SGreg Roach                ->update([
5312c685d76SGreg Roach                    'latitude'  => $latitude,
5322c685d76SGreg Roach                    'longitude' => $longitude,
5332c685d76SGreg Roach                ]);
5342c685d76SGreg Roach        }
5352c685d76SGreg Roach    }
5362c685d76SGreg Roach
5372c685d76SGreg Roach    /**
5382c685d76SGreg Roach     * Legacy Family Tree software generates _PLAC records containing LAT/LONG values
5392c685d76SGreg Roach     *
5402c685d76SGreg Roach     * @param string $gedcom
5412c685d76SGreg Roach     */
5422c685d76SGreg Roach    private function importTNGPlac(string $gedcom): void
5432c685d76SGreg Roach    {
5442c685d76SGreg Roach        if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) {
5452c685d76SGreg Roach            $place_name = $match[1];
5462c685d76SGreg Roach        } else {
5472c685d76SGreg Roach            return;
5482c685d76SGreg Roach        }
5492c685d76SGreg Roach
5502c685d76SGreg Roach        if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) {
5512c685d76SGreg Roach            $latitude = (float) $match[1];
5522c685d76SGreg Roach        } else {
5532c685d76SGreg Roach            return;
5542c685d76SGreg Roach        }
5552c685d76SGreg Roach
5562c685d76SGreg Roach        if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) {
5572c685d76SGreg Roach            $longitude = (float) $match[1];
5582c685d76SGreg Roach        } else {
5592c685d76SGreg Roach            return;
5602c685d76SGreg Roach        }
5612c685d76SGreg Roach
5622c685d76SGreg Roach        $location = new PlaceLocation($place_name);
5632c685d76SGreg Roach
5642c685d76SGreg Roach        if ($location->latitude() === null && $location->longitude() === null) {
5652c685d76SGreg Roach            DB::table('place_location')
5662c685d76SGreg Roach                ->where('id', '=', $location->id())
5672c685d76SGreg Roach                ->update([
5682c685d76SGreg Roach                    'latitude'  => $latitude,
5692c685d76SGreg Roach                    'longitude' => $longitude,
5702c685d76SGreg Roach                ]);
5712c685d76SGreg Roach        }
5722c685d76SGreg Roach    }
5732c685d76SGreg Roach
5742c685d76SGreg Roach    /**
5752c685d76SGreg Roach     * Extract all level 2 places from the given record and insert them into the places table
5762c685d76SGreg Roach     *
5772c685d76SGreg Roach     * @param string $xref
5782c685d76SGreg Roach     * @param Tree   $tree
5792c685d76SGreg Roach     * @param string $gedrec
5802c685d76SGreg Roach     *
5812c685d76SGreg Roach     * @return void
5822c685d76SGreg Roach     */
5832c685d76SGreg Roach    public function updatePlaces(string $xref, Tree $tree, string $gedrec): void
5842c685d76SGreg Roach    {
5852c685d76SGreg Roach        // Insert all new rows together
5862c685d76SGreg Roach        $rows = [];
5872c685d76SGreg Roach
5882c685d76SGreg Roach        preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches);
5892c685d76SGreg Roach
5902c685d76SGreg Roach        $places = array_unique($matches[1]);
5912c685d76SGreg Roach
5922c685d76SGreg Roach        foreach ($places as $place_name) {
5932c685d76SGreg Roach            $place = new Place($place_name, $tree);
5942c685d76SGreg Roach
5952c685d76SGreg Roach            // Calling Place::id() will create the entry in the database, if it doesn't already exist.
5962c685d76SGreg Roach            while ($place->id() !== 0) {
5972c685d76SGreg Roach                $rows[] = [
5982c685d76SGreg Roach                    'pl_p_id' => $place->id(),
5992c685d76SGreg Roach                    'pl_gid'  => $xref,
6002c685d76SGreg Roach                    'pl_file' => $tree->id(),
6012c685d76SGreg Roach                ];
6022c685d76SGreg Roach
6032c685d76SGreg Roach                $place = $place->parent();
6042c685d76SGreg Roach            }
6052c685d76SGreg Roach        }
6062c685d76SGreg Roach
6072c685d76SGreg Roach        // array_unique doesn't work with arrays of arrays
6082c685d76SGreg Roach        $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows)));
6092c685d76SGreg Roach
6102c685d76SGreg Roach        // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders.
6112c685d76SGreg Roach        foreach (array_chunk($rows, 20000) as $chunk) {
6122c685d76SGreg Roach            DB::table('placelinks')->insert($chunk);
6132c685d76SGreg Roach        }
6142c685d76SGreg Roach    }
6152c685d76SGreg Roach
6162c685d76SGreg Roach    /**
6172c685d76SGreg Roach     * Extract all the dates from the given record and insert them into the database.
6182c685d76SGreg Roach     *
6192c685d76SGreg Roach     * @param string $xref
6202c685d76SGreg Roach     * @param int    $ged_id
6212c685d76SGreg Roach     * @param string $gedrec
6222c685d76SGreg Roach     *
6232c685d76SGreg Roach     * @return void
6242c685d76SGreg Roach     */
6252c685d76SGreg Roach    private function updateDates(string $xref, int $ged_id, string $gedrec): void
6262c685d76SGreg Roach    {
6272c685d76SGreg Roach        // Insert all new rows together
6282c685d76SGreg Roach        $rows = [];
6292c685d76SGreg Roach
6302c685d76SGreg Roach        preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER);
6312c685d76SGreg Roach
6322c685d76SGreg Roach        foreach ($matches as $match) {
6332c685d76SGreg Roach            $fact = $match[1];
6342c685d76SGreg Roach            $date = new Date($match[2]);
6352c685d76SGreg Roach            $rows[] = [
6362c685d76SGreg Roach                'd_day'        => $date->minimumDate()->day,
6372c685d76SGreg Roach                'd_month'      => $date->minimumDate()->format('%O'),
6382c685d76SGreg Roach                'd_mon'        => $date->minimumDate()->month,
6392c685d76SGreg Roach                'd_year'       => $date->minimumDate()->year,
6402c685d76SGreg Roach                'd_julianday1' => $date->minimumDate()->minimumJulianDay(),
6412c685d76SGreg Roach                'd_julianday2' => $date->minimumDate()->maximumJulianDay(),
6422c685d76SGreg Roach                'd_fact'       => $fact,
6432c685d76SGreg Roach                'd_gid'        => $xref,
6442c685d76SGreg Roach                'd_file'       => $ged_id,
6452c685d76SGreg Roach                'd_type'       => $date->minimumDate()->format('%@'),
6462c685d76SGreg Roach            ];
6472c685d76SGreg Roach
6482c685d76SGreg Roach            $rows[] = [
6492c685d76SGreg Roach                'd_day'        => $date->maximumDate()->day,
6502c685d76SGreg Roach                'd_month'      => $date->maximumDate()->format('%O'),
6512c685d76SGreg Roach                'd_mon'        => $date->maximumDate()->month,
6522c685d76SGreg Roach                'd_year'       => $date->maximumDate()->year,
6532c685d76SGreg Roach                'd_julianday1' => $date->maximumDate()->minimumJulianDay(),
6542c685d76SGreg Roach                'd_julianday2' => $date->maximumDate()->maximumJulianDay(),
6552c685d76SGreg Roach                'd_fact'       => $fact,
6562c685d76SGreg Roach                'd_gid'        => $xref,
6572c685d76SGreg Roach                'd_file'       => $ged_id,
6582c685d76SGreg Roach                'd_type'       => $date->minimumDate()->format('%@'),
6592c685d76SGreg Roach            ];
6602c685d76SGreg Roach        }
6612c685d76SGreg Roach
6622c685d76SGreg Roach        // array_unique doesn't work with arrays of arrays
6632c685d76SGreg Roach        $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows)));
6642c685d76SGreg Roach
6652c685d76SGreg Roach        DB::table('dates')->insert($rows);
6662c685d76SGreg Roach    }
6672c685d76SGreg Roach
6682c685d76SGreg Roach    /**
6692c685d76SGreg Roach     * Extract all the links from the given record and insert them into the database
6702c685d76SGreg Roach     *
6712c685d76SGreg Roach     * @param string $xref
6722c685d76SGreg Roach     * @param int    $ged_id
6732c685d76SGreg Roach     * @param string $gedrec
6742c685d76SGreg Roach     *
6752c685d76SGreg Roach     * @return void
6762c685d76SGreg Roach     */
6772c685d76SGreg Roach    private function updateLinks(string $xref, int $ged_id, string $gedrec): void
6782c685d76SGreg Roach    {
6792c685d76SGreg Roach        // Insert all new rows together
6802c685d76SGreg Roach        $rows = [];
6812c685d76SGreg Roach
6822c685d76SGreg Roach        preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER);
6832c685d76SGreg Roach
6842c685d76SGreg Roach        foreach ($matches as $match) {
685*3793e425SGreg Roach            // Some applications (e.g. GenoPro) create links longer than 15 characters.
686*3793e425SGreg Roach            $link = mb_substr($match[1], 15);
687*3793e425SGreg Roach
6882c685d76SGreg Roach            // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@"
689*3793e425SGreg Roach            $rows[$link . strtoupper($match[2])] = [
6902c685d76SGreg Roach                'l_from' => $xref,
6912c685d76SGreg Roach                'l_to'   => $match[2],
692*3793e425SGreg Roach                'l_type' => $link,
6932c685d76SGreg Roach                'l_file' => $ged_id,
6942c685d76SGreg Roach            ];
6952c685d76SGreg Roach        }
6962c685d76SGreg Roach
6972c685d76SGreg Roach        DB::table('link')->insert($rows);
6982c685d76SGreg Roach    }
6992c685d76SGreg Roach
7002c685d76SGreg Roach    /**
7012c685d76SGreg Roach     * Extract all the names from the given record and insert them into the database.
7022c685d76SGreg Roach     *
7032c685d76SGreg Roach     * @param string     $xref
7042c685d76SGreg Roach     * @param int        $ged_id
7052c685d76SGreg Roach     * @param Individual $record
7062c685d76SGreg Roach     *
7072c685d76SGreg Roach     * @return void
7082c685d76SGreg Roach     */
7092c685d76SGreg Roach    private function updateNames(string $xref, int $ged_id, Individual $record): void
7102c685d76SGreg Roach    {
7112c685d76SGreg Roach        // Insert all new rows together
7122c685d76SGreg Roach        $rows = [];
7132c685d76SGreg Roach
7142c685d76SGreg Roach        foreach ($record->getAllNames() as $n => $name) {
7152c685d76SGreg Roach            if ($name['givn'] === Individual::PRAENOMEN_NESCIO) {
7162c685d76SGreg Roach                $soundex_givn_std = null;
7172c685d76SGreg Roach                $soundex_givn_dm  = null;
7182c685d76SGreg Roach            } else {
7192c685d76SGreg Roach                $soundex_givn_std = Soundex::russell($name['givn']);
7202c685d76SGreg Roach                $soundex_givn_dm  = Soundex::daitchMokotoff($name['givn']);
7212c685d76SGreg Roach            }
7222c685d76SGreg Roach
7232c685d76SGreg Roach            if ($name['surn'] === Individual::NOMEN_NESCIO) {
7242c685d76SGreg Roach                $soundex_surn_std = null;
7252c685d76SGreg Roach                $soundex_surn_dm  = null;
7262c685d76SGreg Roach            } else {
7272c685d76SGreg Roach                $soundex_surn_std = Soundex::russell($name['surname']);
7282c685d76SGreg Roach                $soundex_surn_dm  = Soundex::daitchMokotoff($name['surname']);
7292c685d76SGreg Roach            }
7302c685d76SGreg Roach
7312c685d76SGreg Roach            $rows[] = [
7322c685d76SGreg Roach                'n_file'             => $ged_id,
7332c685d76SGreg Roach                'n_id'               => $xref,
7342c685d76SGreg Roach                'n_num'              => $n,
7352c685d76SGreg Roach                'n_type'             => $name['type'],
7362c685d76SGreg Roach                'n_sort'             => mb_substr($name['sort'], 0, 255),
7372c685d76SGreg Roach                'n_full'             => mb_substr($name['fullNN'], 0, 255),
7382c685d76SGreg Roach                'n_surname'          => mb_substr($name['surname'], 0, 255),
7392c685d76SGreg Roach                'n_surn'             => mb_substr($name['surn'], 0, 255),
7402c685d76SGreg Roach                'n_givn'             => mb_substr($name['givn'], 0, 255),
7412c685d76SGreg Roach                'n_soundex_givn_std' => $soundex_givn_std,
7422c685d76SGreg Roach                'n_soundex_surn_std' => $soundex_surn_std,
7432c685d76SGreg Roach                'n_soundex_givn_dm'  => $soundex_givn_dm,
7442c685d76SGreg Roach                'n_soundex_surn_dm'  => $soundex_surn_dm,
7452c685d76SGreg Roach            ];
7462c685d76SGreg Roach        }
7472c685d76SGreg Roach
7482c685d76SGreg Roach        DB::table('name')->insert($rows);
7492c685d76SGreg Roach    }
7502c685d76SGreg Roach
7512c685d76SGreg Roach    /**
7522c685d76SGreg Roach     * Extract inline media data, and convert to media objects.
7532c685d76SGreg Roach     *
7542c685d76SGreg Roach     * @param Tree   $tree
7552c685d76SGreg Roach     * @param string $gedcom
7562c685d76SGreg Roach     *
7572c685d76SGreg Roach     * @return string
7582c685d76SGreg Roach     */
7592c685d76SGreg Roach    private function convertInlineMedia(Tree $tree, string $gedcom): string
7602c685d76SGreg Roach    {
7612c685d76SGreg Roach        while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) {
7622c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7632c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n1 OBJE @" . $xref . '@']);
7642c685d76SGreg Roach        }
7652c685d76SGreg Roach        while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) {
7662c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7672c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n2 OBJE @" . $xref . '@']);
7682c685d76SGreg Roach        }
7692c685d76SGreg Roach        while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) {
7702c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7712c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n3 OBJE @" . $xref . '@']);
7722c685d76SGreg Roach        }
7732c685d76SGreg Roach
7742c685d76SGreg Roach        return $gedcom;
7752c685d76SGreg Roach    }
7762c685d76SGreg Roach
7772c685d76SGreg Roach    /**
7782c685d76SGreg Roach     * Create a new media object, from inline media data.
7792c685d76SGreg Roach     *
7802c685d76SGreg Roach     * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL
7812c685d76SGreg Roach     * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL
7822c685d76SGreg Roach     * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies:  +1 FILE / +1 FORM / +2 MEDI
7832c685d76SGreg Roach     *
7842c685d76SGreg Roach     * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE
7852c685d76SGreg Roach     * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL
7862c685d76SGreg Roach     *
7872c685d76SGreg Roach     * @param string $gedcom
7882c685d76SGreg Roach     * @param Tree   $tree
7892c685d76SGreg Roach     *
7902c685d76SGreg Roach     * @return string
7912c685d76SGreg Roach     */
7922c685d76SGreg Roach    private function createMediaObject(string $gedcom, Tree $tree): string
7932c685d76SGreg Roach    {
7942c685d76SGreg Roach        preg_match('/\n\d FILE (.+)/', $gedcom, $match);
7952c685d76SGreg Roach        $file = $match[1] ?? '';
7962c685d76SGreg Roach
7972c685d76SGreg Roach        preg_match('/\n\d TITL (.+)/', $gedcom, $match);
7982c685d76SGreg Roach        $title = $match[1] ?? '';
7992c685d76SGreg Roach
8002c685d76SGreg Roach        preg_match('/\n\d FORM (.+)/', $gedcom, $match);
8012c685d76SGreg Roach        $format = $match[1] ?? '';
8022c685d76SGreg Roach
8032c685d76SGreg Roach        preg_match('/\n\d MEDI (.+)/', $gedcom, $match);
8042c685d76SGreg Roach        $media = $match[1] ?? '';
8052c685d76SGreg Roach
8062c685d76SGreg Roach        preg_match('/\n\d _SCBK (.+)/', $gedcom, $match);
8072c685d76SGreg Roach        $scrapbook = $match[1] ?? '';
8082c685d76SGreg Roach
8092c685d76SGreg Roach        preg_match('/\n\d _PRIM (.+)/', $gedcom, $match);
8102c685d76SGreg Roach        $primary = $match[1] ?? '';
8112c685d76SGreg Roach
8122c685d76SGreg Roach        preg_match('/\n\d _TYPE (.+)/', $gedcom, $match);
8132c685d76SGreg Roach        if ($media === '') {
8142c685d76SGreg Roach            // Legacy uses _TYPE instead of MEDI
8152c685d76SGreg Roach            $media = $match[1] ?? '';
8162c685d76SGreg Roach            $type  = '';
8172c685d76SGreg Roach        } else {
8182c685d76SGreg Roach            $type = $match[1] ?? '';
8192c685d76SGreg Roach        }
8202c685d76SGreg Roach
8212c685d76SGreg Roach        preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches);
8222c685d76SGreg Roach        $notes = $matches[1] ?? [];
8232c685d76SGreg Roach
8242c685d76SGreg Roach        // Have we already created a media object with the same title/filename?
8252c685d76SGreg Roach        $xref = DB::table('media_file')
8262c685d76SGreg Roach            ->where('m_file', '=', $tree->id())
8272c685d76SGreg Roach            ->where('descriptive_title', '=', mb_substr($title, 0, 248))
8282c685d76SGreg Roach            ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248))
8292c685d76SGreg Roach            ->value('m_id');
8302c685d76SGreg Roach
8312c685d76SGreg Roach        if ($xref === null) {
8322c685d76SGreg Roach            $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE);
8332c685d76SGreg Roach
8342c685d76SGreg Roach            // convert to a media-object
8352c685d76SGreg Roach            $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file;
8362c685d76SGreg Roach
8372c685d76SGreg Roach            if ($format !== '') {
8382c685d76SGreg Roach                $gedcom .= "\n2 FORM " . $format;
8392c685d76SGreg Roach
8402c685d76SGreg Roach                if ($media !== '') {
8412c685d76SGreg Roach                    $gedcom .= "\n3 TYPE " . $media;
8422c685d76SGreg Roach                }
8432c685d76SGreg Roach            }
8442c685d76SGreg Roach
8452c685d76SGreg Roach            if ($title !== '') {
8462c685d76SGreg Roach                $gedcom .= "\n3 TITL " . $title;
8472c685d76SGreg Roach            }
8482c685d76SGreg Roach
8492c685d76SGreg Roach            if ($scrapbook !== '') {
8502c685d76SGreg Roach                $gedcom .= "\n1 _SCBK " . $scrapbook;
8512c685d76SGreg Roach            }
8522c685d76SGreg Roach
8532c685d76SGreg Roach            if ($primary !== '') {
8542c685d76SGreg Roach                $gedcom .= "\n1 _PRIM " . $primary;
8552c685d76SGreg Roach            }
8562c685d76SGreg Roach
8572c685d76SGreg Roach            if ($type !== '') {
8582c685d76SGreg Roach                $gedcom .= "\n1 _TYPE " . $type;
8592c685d76SGreg Roach            }
8602c685d76SGreg Roach
8612c685d76SGreg Roach            foreach ($notes as $note) {
8622c685d76SGreg Roach                $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]);
8632c685d76SGreg Roach            }
8642c685d76SGreg Roach
8652c685d76SGreg Roach            DB::table('media')->insert([
8662c685d76SGreg Roach                'm_id'     => $xref,
8672c685d76SGreg Roach                'm_file'   => $tree->id(),
8682c685d76SGreg Roach                'm_gedcom' => $gedcom,
8692c685d76SGreg Roach            ]);
8702c685d76SGreg Roach
8712c685d76SGreg Roach            DB::table('media_file')->insert([
8722c685d76SGreg Roach                'm_id'                 => $xref,
8732c685d76SGreg Roach                'm_file'               => $tree->id(),
8742c685d76SGreg Roach                'multimedia_file_refn' => mb_substr($file, 0, 248),
8752c685d76SGreg Roach                'multimedia_format'    => mb_substr($format, 0, 4),
8762c685d76SGreg Roach                'source_media_type'    => mb_substr($media, 0, 15),
8772c685d76SGreg Roach                'descriptive_title'    => mb_substr($title, 0, 248),
8782c685d76SGreg Roach            ]);
8792c685d76SGreg Roach        }
8802c685d76SGreg Roach
8812c685d76SGreg Roach        return $xref;
8822c685d76SGreg Roach    }
8832c685d76SGreg Roach
8842c685d76SGreg Roach    /**
8852c685d76SGreg Roach     * update a record in the database
8862c685d76SGreg Roach     *
8872c685d76SGreg Roach     * @param string $gedrec
8882c685d76SGreg Roach     * @param Tree   $tree
8892c685d76SGreg Roach     * @param bool   $delete
8902c685d76SGreg Roach     *
8912c685d76SGreg Roach     * @return void
8922c685d76SGreg Roach     * @throws GedcomErrorException
8932c685d76SGreg Roach     */
8942c685d76SGreg Roach    public function updateRecord(string $gedrec, Tree $tree, bool $delete): void
8952c685d76SGreg Roach    {
8962c685d76SGreg Roach        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
8972c685d76SGreg Roach            [, $gid, $type] = $match;
8982c685d76SGreg Roach        } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) {
8992c685d76SGreg Roach            // The HEAD record has no XREF.  Any others?
9002c685d76SGreg Roach            $gid  = $match[1];
9012c685d76SGreg Roach            $type = $match[1];
9022c685d76SGreg Roach        } else {
9032c685d76SGreg Roach            throw new GedcomErrorException($gedrec);
9042c685d76SGreg Roach        }
9052c685d76SGreg Roach
9062c685d76SGreg Roach        // Place links
9072c685d76SGreg Roach        DB::table('placelinks')
9082c685d76SGreg Roach            ->where('pl_gid', '=', $gid)
9092c685d76SGreg Roach            ->where('pl_file', '=', $tree->id())
9102c685d76SGreg Roach            ->delete();
9112c685d76SGreg Roach
9122c685d76SGreg Roach        // Orphaned places.  If we're deleting  "Westminster, London, England",
9132c685d76SGreg Roach        // then we may also need to delete "London, England" and "England".
9142c685d76SGreg Roach        do {
9152c685d76SGreg Roach            $affected = DB::table('places')
9162c685d76SGreg Roach                ->leftJoin('placelinks', function (JoinClause $join): void {
9172c685d76SGreg Roach                    $join
9182c685d76SGreg Roach                        ->on('p_id', '=', 'pl_p_id')
9192c685d76SGreg Roach                        ->on('p_file', '=', 'pl_file');
9202c685d76SGreg Roach                })
9212c685d76SGreg Roach                ->whereNull('pl_p_id')
9222c685d76SGreg Roach                ->delete();
9232c685d76SGreg Roach        } while ($affected > 0);
9242c685d76SGreg Roach
9252c685d76SGreg Roach        DB::table('dates')
9262c685d76SGreg Roach            ->where('d_gid', '=', $gid)
9272c685d76SGreg Roach            ->where('d_file', '=', $tree->id())
9282c685d76SGreg Roach            ->delete();
9292c685d76SGreg Roach
9302c685d76SGreg Roach        DB::table('name')
9312c685d76SGreg Roach            ->where('n_id', '=', $gid)
9322c685d76SGreg Roach            ->where('n_file', '=', $tree->id())
9332c685d76SGreg Roach            ->delete();
9342c685d76SGreg Roach
9352c685d76SGreg Roach        DB::table('link')
9362c685d76SGreg Roach            ->where('l_from', '=', $gid)
9372c685d76SGreg Roach            ->where('l_file', '=', $tree->id())
9382c685d76SGreg Roach            ->delete();
9392c685d76SGreg Roach
9402c685d76SGreg Roach        switch ($type) {
9412c685d76SGreg Roach            case Individual::RECORD_TYPE:
9422c685d76SGreg Roach                DB::table('individuals')
9432c685d76SGreg Roach                    ->where('i_id', '=', $gid)
9442c685d76SGreg Roach                    ->where('i_file', '=', $tree->id())
9452c685d76SGreg Roach                    ->delete();
9462c685d76SGreg Roach                break;
9472c685d76SGreg Roach
9482c685d76SGreg Roach            case Family::RECORD_TYPE:
9492c685d76SGreg Roach                DB::table('families')
9502c685d76SGreg Roach                    ->where('f_id', '=', $gid)
9512c685d76SGreg Roach                    ->where('f_file', '=', $tree->id())
9522c685d76SGreg Roach                    ->delete();
9532c685d76SGreg Roach                break;
9542c685d76SGreg Roach
9552c685d76SGreg Roach            case Source::RECORD_TYPE:
9562c685d76SGreg Roach                DB::table('sources')
9572c685d76SGreg Roach                    ->where('s_id', '=', $gid)
9582c685d76SGreg Roach                    ->where('s_file', '=', $tree->id())
9592c685d76SGreg Roach                    ->delete();
9602c685d76SGreg Roach                break;
9612c685d76SGreg Roach
9622c685d76SGreg Roach            case Media::RECORD_TYPE:
9632c685d76SGreg Roach                DB::table('media_file')
9642c685d76SGreg Roach                    ->where('m_id', '=', $gid)
9652c685d76SGreg Roach                    ->where('m_file', '=', $tree->id())
9662c685d76SGreg Roach                    ->delete();
9672c685d76SGreg Roach
9682c685d76SGreg Roach                DB::table('media')
9692c685d76SGreg Roach                    ->where('m_id', '=', $gid)
9702c685d76SGreg Roach                    ->where('m_file', '=', $tree->id())
9712c685d76SGreg Roach                    ->delete();
9722c685d76SGreg Roach                break;
9732c685d76SGreg Roach
9742c685d76SGreg Roach            default:
9752c685d76SGreg Roach                DB::table('other')
9762c685d76SGreg Roach                    ->where('o_id', '=', $gid)
9772c685d76SGreg Roach                    ->where('o_file', '=', $tree->id())
9782c685d76SGreg Roach                    ->delete();
9792c685d76SGreg Roach                break;
9802c685d76SGreg Roach        }
9812c685d76SGreg Roach
9822c685d76SGreg Roach        if (!$delete) {
9832c685d76SGreg Roach            $this->importRecord($gedrec, $tree, true);
9842c685d76SGreg Roach        }
9852c685d76SGreg Roach    }
9862c685d76SGreg Roach}
987