xref: /webtrees/app/Services/GedcomImportService.php (revision 5bfc689774bb9a6401271c4ed15a6d50652c991b)
12c685d76SGreg Roach<?php
22c685d76SGreg Roach
32c685d76SGreg Roach/**
42c685d76SGreg Roach * webtrees: online genealogy
5*5bfc6897SGreg Roach * Copyright (C) 2022 webtrees development team
62c685d76SGreg Roach * This program is free software: you can redistribute it and/or modify
72c685d76SGreg Roach * it under the terms of the GNU General Public License as published by
82c685d76SGreg Roach * the Free Software Foundation, either version 3 of the License, or
92c685d76SGreg Roach * (at your option) any later version.
102c685d76SGreg Roach * This program is distributed in the hope that it will be useful,
112c685d76SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
122c685d76SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
132c685d76SGreg Roach * GNU General Public License for more details.
142c685d76SGreg Roach * You should have received a copy of the GNU General Public License
152c685d76SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
162c685d76SGreg Roach */
172c685d76SGreg Roach
182c685d76SGreg Roachdeclare(strict_types=1);
192c685d76SGreg Roach
202c685d76SGreg Roachnamespace Fisharebest\Webtrees\Services;
212c685d76SGreg Roach
222c685d76SGreg Roachuse Fisharebest\Webtrees\Date;
232c685d76SGreg Roachuse Fisharebest\Webtrees\Elements\UnknownElement;
242c685d76SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException;
252c685d76SGreg Roachuse Fisharebest\Webtrees\Family;
262c685d76SGreg Roachuse Fisharebest\Webtrees\Gedcom;
272c685d76SGreg Roachuse Fisharebest\Webtrees\Header;
282c685d76SGreg Roachuse Fisharebest\Webtrees\Individual;
292c685d76SGreg Roachuse Fisharebest\Webtrees\Location;
302c685d76SGreg Roachuse Fisharebest\Webtrees\Media;
312c685d76SGreg Roachuse Fisharebest\Webtrees\Note;
322c685d76SGreg Roachuse Fisharebest\Webtrees\Place;
332c685d76SGreg Roachuse Fisharebest\Webtrees\PlaceLocation;
342c685d76SGreg Roachuse Fisharebest\Webtrees\Registry;
352c685d76SGreg Roachuse Fisharebest\Webtrees\Repository;
362c685d76SGreg Roachuse Fisharebest\Webtrees\Soundex;
372c685d76SGreg Roachuse Fisharebest\Webtrees\Source;
382c685d76SGreg Roachuse Fisharebest\Webtrees\Submission;
392c685d76SGreg Roachuse Fisharebest\Webtrees\Submitter;
402c685d76SGreg Roachuse Fisharebest\Webtrees\Tree;
412c685d76SGreg Roachuse Illuminate\Database\Capsule\Manager as DB;
422c685d76SGreg Roachuse Illuminate\Database\Query\JoinClause;
432c685d76SGreg Roach
442c685d76SGreg Roachuse function app;
452c685d76SGreg Roachuse function array_chunk;
462c685d76SGreg Roachuse function array_intersect_key;
472c685d76SGreg Roachuse function array_map;
482c685d76SGreg Roachuse function array_unique;
492c685d76SGreg Roachuse function assert;
502c685d76SGreg Roachuse function date;
512c685d76SGreg Roachuse function explode;
522c685d76SGreg Roachuse function max;
532c685d76SGreg Roachuse function mb_substr;
542c685d76SGreg Roachuse function preg_match;
552c685d76SGreg Roachuse function preg_match_all;
562c685d76SGreg Roachuse function preg_replace;
572c685d76SGreg Roachuse function round;
582c685d76SGreg Roachuse function str_contains;
592c685d76SGreg Roachuse function str_replace;
602c685d76SGreg Roachuse function str_starts_with;
612c685d76SGreg Roachuse function strlen;
622c685d76SGreg Roachuse function strtolower;
632c685d76SGreg Roachuse function strtoupper;
642c685d76SGreg Roachuse function strtr;
652c685d76SGreg Roachuse function substr;
662c685d76SGreg Roachuse function trim;
672c685d76SGreg Roach
682c685d76SGreg Roachuse const PREG_SET_ORDER;
692c685d76SGreg Roach
702c685d76SGreg Roach/**
712c685d76SGreg Roach * Class GedcomImportService - import GEDCOM data
722c685d76SGreg Roach */
732c685d76SGreg Roachclass GedcomImportService
742c685d76SGreg Roach{
752c685d76SGreg Roach    /**
762c685d76SGreg Roach     * Tidy up a gedcom record on import, so that we can access it consistently/efficiently.
772c685d76SGreg Roach     *
782c685d76SGreg Roach     * @param string $rec
792c685d76SGreg Roach     * @param Tree   $tree
802c685d76SGreg Roach     *
812c685d76SGreg Roach     * @return string
822c685d76SGreg Roach     */
832c685d76SGreg Roach    private function reformatRecord(string $rec, Tree $tree): string
842c685d76SGreg Roach    {
852c685d76SGreg Roach        $gedcom_service = app(GedcomService::class);
862c685d76SGreg Roach        assert($gedcom_service instanceof GedcomService);
872c685d76SGreg Roach
882c685d76SGreg Roach        // Strip out mac/msdos line endings
892c685d76SGreg Roach        $rec = preg_replace("/[\r\n]+/", "\n", $rec);
902c685d76SGreg Roach
912c685d76SGreg Roach        // Extract lines from the record; lines consist of: level + optional xref + tag + optional data
922c685d76SGreg Roach        $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER);
932c685d76SGreg Roach
942c685d76SGreg Roach        // Process the record line-by-line
952c685d76SGreg Roach        $newrec = '';
962c685d76SGreg Roach        foreach ($matches as $n => $match) {
972c685d76SGreg Roach            [, $level, $xref, $tag, $data] = $match;
982c685d76SGreg Roach
992c685d76SGreg Roach            $tag = $gedcom_service->canonicalTag($tag);
1002c685d76SGreg Roach
1012c685d76SGreg Roach            switch ($tag) {
1022c685d76SGreg Roach                case 'AFN':
1032c685d76SGreg Roach                    // AFN values are upper case
1042c685d76SGreg Roach                    $data = strtoupper($data);
1052c685d76SGreg Roach                    break;
1062c685d76SGreg Roach                case 'DATE':
1072c685d76SGreg Roach                    // Preserve text from INT dates
1082c685d76SGreg Roach                    if (str_contains($data, '(')) {
1092c685d76SGreg Roach                        [$date, $text] = explode('(', $data, 2);
1102c685d76SGreg Roach                        $text = ' (' . $text;
1112c685d76SGreg Roach                    } else {
1122c685d76SGreg Roach                        $date = $data;
1132c685d76SGreg Roach                        $text = '';
1142c685d76SGreg Roach                    }
1152c685d76SGreg Roach                    // Capitals
1162c685d76SGreg Roach                    $date = strtoupper($date);
1172c685d76SGreg Roach                    // Temporarily add leading/trailing spaces, to allow efficient matching below
1182c685d76SGreg Roach                    $date = ' ' . $date . ' ';
1192c685d76SGreg Roach                    // Ensure space digits and letters
1202c685d76SGreg Roach                    $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date);
1212c685d76SGreg Roach                    $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date);
1222c685d76SGreg Roach                    // Ensure space before/after calendar escapes
1232c685d76SGreg Roach                    $date = preg_replace('/@#[^@]+@/', ' $0 ', $date);
1242c685d76SGreg Roach                    // "BET." => "BET"
1252c685d76SGreg Roach                    $date = preg_replace('/(\w\w)\./', '$1', $date);
1262c685d76SGreg Roach                    // "CIR" => "ABT"
1272c685d76SGreg Roach                    $date = str_replace(' CIR ', ' ABT ', $date);
1282c685d76SGreg Roach                    $date = str_replace(' APX ', ' ABT ', $date);
1292c685d76SGreg Roach                    // B.C. => BC (temporarily, to allow easier handling of ".")
1302c685d76SGreg Roach                    $date = str_replace(' B.C. ', ' BC ', $date);
1312c685d76SGreg Roach                    // TMG uses "EITHER X OR Y"
1322c685d76SGreg Roach                    $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date);
1332c685d76SGreg Roach                    // "BET X - Y " => "BET X AND Y"
1342c685d76SGreg Roach                    $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date);
1352c685d76SGreg Roach                    $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date);
1362c685d76SGreg Roach                    // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y"
1372c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date);
1382c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date);
1392c685d76SGreg Roach                    // "@#ESC@ AFT X" => "AFT @#ESC@ X"
1402c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date);
1412c685d76SGreg Roach                    // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900"
1422c685d76SGreg Roach                    // (don't change "/" - it is used in NS/OS dates)
1432c685d76SGreg Roach                    $date = preg_replace('/[.,:;-]/', ' ', $date);
1442c685d76SGreg Roach                    // BC => B.C.
1452c685d76SGreg Roach                    $date = str_replace(' BC ', ' B.C. ', $date);
1462c685d76SGreg Roach                    // Append the "INT" text
1472c685d76SGreg Roach                    $data = $date . $text;
1482c685d76SGreg Roach                    break;
1492c685d76SGreg Roach                case '_FILE':
1502c685d76SGreg Roach                    $tag = 'FILE';
1512c685d76SGreg Roach                    break;
1522c685d76SGreg Roach                case 'FORM':
1532c685d76SGreg Roach                    // Consistent commas
1542c685d76SGreg Roach                    $data = preg_replace('/ *, */', ', ', $data);
1552c685d76SGreg Roach                    break;
1562c685d76SGreg Roach                case 'HEAD':
1572c685d76SGreg Roach                    // HEAD records don't have an XREF or DATA
1582c685d76SGreg Roach                    if ($level === '0') {
1592c685d76SGreg Roach                        $xref = '';
1602c685d76SGreg Roach                        $data = '';
1612c685d76SGreg Roach                    }
1622c685d76SGreg Roach                    break;
1632c685d76SGreg Roach                case 'NAME':
1642c685d76SGreg Roach                    // Tidy up non-printing characters
1652c685d76SGreg Roach                    $data = preg_replace('/  +/', ' ', trim($data));
1662c685d76SGreg Roach                    break;
1672c685d76SGreg Roach                case 'PEDI':
1682c685d76SGreg Roach                    // PEDI values are lower case
1692c685d76SGreg Roach                    $data = strtolower($data);
1702c685d76SGreg Roach                    break;
1712c685d76SGreg Roach                case 'PLAC':
1722c685d76SGreg Roach                    // Consistent commas
1732c685d76SGreg Roach                    $data = preg_replace('/ *[,,،] */u', ', ', $data);
1742c685d76SGreg Roach                    // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W
1752c685d76SGreg Roach                    if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match)) {
1762c685d76SGreg Roach                        $data =
1772c685d76SGreg Roach                            $match[1] . "\n" .
1782c685d76SGreg Roach                            ($level + 1) . " MAP\n" .
1792c685d76SGreg Roach                            ($level + 2) . ' LATI ' . ($match[5] . round($match[2] + ($match[3] / 60) + ($match[4] / 3600), 4)) . "\n" .
1802c685d76SGreg Roach                            ($level + 2) . ' LONG ' . ($match[9] . round($match[6] + ($match[7] / 60) + ($match[8] / 3600), 4));
1812c685d76SGreg Roach                    }
1822c685d76SGreg Roach                    break;
1832c685d76SGreg Roach                case 'RESN':
1842c685d76SGreg Roach                    // RESN values are lower case (confidential, privacy, locked, none)
1852c685d76SGreg Roach                    $data = strtolower($data);
1862c685d76SGreg Roach                    if ($data === 'invisible') {
1872c685d76SGreg Roach                        $data = 'confidential'; // From old versions of Legacy.
1882c685d76SGreg Roach                    }
1892c685d76SGreg Roach                    break;
1902c685d76SGreg Roach                case 'SEX':
1912c685d76SGreg Roach                    $data = strtoupper($data);
1922c685d76SGreg Roach                    break;
1932c685d76SGreg Roach                case 'STAT':
1942c685d76SGreg Roach                    if ($data === 'CANCELLED') {
1952c685d76SGreg Roach                        // PhpGedView mis-spells this tag - correct it.
1962c685d76SGreg Roach                        $data = 'CANCELED';
1972c685d76SGreg Roach                    }
1982c685d76SGreg Roach                    break;
1992c685d76SGreg Roach                case 'TEMP':
2002c685d76SGreg Roach                    // Temple codes are upper case
2012c685d76SGreg Roach                    $data = strtoupper($data);
2022c685d76SGreg Roach                    break;
2032c685d76SGreg Roach                case 'TRLR':
2042c685d76SGreg Roach                    // TRLR records don't have an XREF or DATA
2052c685d76SGreg Roach                    if ($level === '0') {
2062c685d76SGreg Roach                        $xref = '';
2072c685d76SGreg Roach                        $data = '';
2082c685d76SGreg Roach                    }
2092c685d76SGreg Roach                    break;
2102c685d76SGreg Roach            }
2112c685d76SGreg Roach            // Suppress "Y", for facts/events with a DATE or PLAC
2122c685d76SGreg Roach            if ($data === 'y') {
2132c685d76SGreg Roach                $data = 'Y';
2142c685d76SGreg Roach            }
2152c685d76SGreg Roach            if ($level === '1' && $data === 'Y') {
2162c685d76SGreg Roach                for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) {
2172c685d76SGreg Roach                    if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') {
2182c685d76SGreg Roach                        $data = '';
2192c685d76SGreg Roach                        break;
2202c685d76SGreg Roach                    }
2212c685d76SGreg Roach                }
2222c685d76SGreg Roach            }
2232c685d76SGreg Roach            // Reassemble components back into a single line
2242c685d76SGreg Roach            switch ($tag) {
2252c685d76SGreg Roach                default:
2262c685d76SGreg Roach                    // Remove tabs and multiple/leading/trailing spaces
2272c685d76SGreg Roach                    $data = strtr($data, ["\t" => ' ']);
2282c685d76SGreg Roach                    $data = trim($data, ' ');
2292c685d76SGreg Roach                    while (str_contains($data, '  ')) {
2302c685d76SGreg Roach                        $data = strtr($data, ['  ' => ' ']);
2312c685d76SGreg Roach                    }
2322c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
2332c685d76SGreg Roach                    break;
2342c685d76SGreg Roach                case 'NOTE':
2352c685d76SGreg Roach                case 'TEXT':
2362c685d76SGreg Roach                case 'DATA':
2372c685d76SGreg Roach                case 'CONT':
2382c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
2392c685d76SGreg Roach                    break;
2402c685d76SGreg Roach                case 'FILE':
2412c685d76SGreg Roach                    // Strip off the user-defined path prefix
2422c685d76SGreg Roach                    $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH');
2432c685d76SGreg Roach                    if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) {
2442c685d76SGreg Roach                        $data = substr($data, strlen($GEDCOM_MEDIA_PATH));
2452c685d76SGreg Roach                    }
2462c685d76SGreg Roach                    // convert backslashes in filenames to forward slashes
2472c685d76SGreg Roach                    $data = preg_replace("/\\\\/", '/', $data);
2482c685d76SGreg Roach
2492c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
2502c685d76SGreg Roach                    break;
2512c685d76SGreg Roach                case 'CONC':
2522c685d76SGreg Roach                    // Merge CONC lines, to simplify access later on.
2532c685d76SGreg Roach                    $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data;
2542c685d76SGreg Roach                    break;
2552c685d76SGreg Roach            }
2562c685d76SGreg Roach        }
2572c685d76SGreg Roach
2582c685d76SGreg Roach        return $newrec;
2592c685d76SGreg Roach    }
2602c685d76SGreg Roach
2612c685d76SGreg Roach    /**
2622c685d76SGreg Roach     * import record into database
2632c685d76SGreg Roach     * this function will parse the given gedcom record and add it to the database
2642c685d76SGreg Roach     *
2652c685d76SGreg Roach     * @param string $gedrec the raw gedcom record to parse
2662c685d76SGreg Roach     * @param Tree   $tree   import the record into this tree
2672c685d76SGreg Roach     * @param bool   $update whether this is an updated record that has been accepted
2682c685d76SGreg Roach     *
2692c685d76SGreg Roach     * @return void
2702c685d76SGreg Roach     * @throws GedcomErrorException
2712c685d76SGreg Roach     */
2722c685d76SGreg Roach    public function importRecord(string $gedrec, Tree $tree, bool $update): void
2732c685d76SGreg Roach    {
2742c685d76SGreg Roach        $tree_id = $tree->id();
2752c685d76SGreg Roach
2762c685d76SGreg Roach        // Escaped @ signs (only if importing from file)
2772c685d76SGreg Roach        if (!$update) {
2782c685d76SGreg Roach            $gedrec = str_replace('@@', '@', $gedrec);
2792c685d76SGreg Roach        }
2802c685d76SGreg Roach
2812c685d76SGreg Roach        // Standardise gedcom format
2822c685d76SGreg Roach        $gedrec = $this->reformatRecord($gedrec, $tree);
2832c685d76SGreg Roach
2842c685d76SGreg Roach        // import different types of records
2852c685d76SGreg Roach        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
2862c685d76SGreg Roach            [, $xref, $type] = $match;
2872c685d76SGreg Roach        } elseif (preg_match('/0 (HEAD|TRLR|_PLAC |_PLAC_DEFN)/', $gedrec, $match)) {
2882c685d76SGreg Roach            $type = $match[1];
2892c685d76SGreg Roach            $xref = $type; // For records without an XREF, use the type as a pseudo XREF.
2902c685d76SGreg Roach        } else {
2912c685d76SGreg Roach            throw new GedcomErrorException($gedrec);
2922c685d76SGreg Roach        }
2932c685d76SGreg Roach
2942c685d76SGreg Roach        // Add a _UID
2952c685d76SGreg Roach        if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) {
2962c685d76SGreg Roach            $element = Registry::elementFactory()->make($type . ':_UID');
2972c685d76SGreg Roach            if (!$element instanceof UnknownElement) {
2982c685d76SGreg Roach                $gedrec .= "\n1 _UID " . $element->default($tree);
2992c685d76SGreg Roach            }
3002c685d76SGreg Roach        }
3012c685d76SGreg Roach
3022c685d76SGreg Roach        // If the user has downloaded their GEDCOM data (containing media objects) and edited it
3032c685d76SGreg Roach        // using an application which does not support (and deletes) media objects, then add them
3042c685d76SGreg Roach        // back in.
3052c685d76SGreg Roach        if ($tree->getPreference('keep_media')) {
3062c685d76SGreg Roach            $old_linked_media = DB::table('link')
3072c685d76SGreg Roach                ->where('l_from', '=', $xref)
3082c685d76SGreg Roach                ->where('l_file', '=', $tree_id)
3092c685d76SGreg Roach                ->where('l_type', '=', 'OBJE')
3102c685d76SGreg Roach                ->pluck('l_to');
3112c685d76SGreg Roach
3122c685d76SGreg Roach            // Delete these links - so that we do not insert them again in updateLinks()
3132c685d76SGreg Roach            DB::table('link')
3142c685d76SGreg Roach                ->where('l_from', '=', $xref)
3152c685d76SGreg Roach                ->where('l_file', '=', $tree_id)
3162c685d76SGreg Roach                ->where('l_type', '=', 'OBJE')
3172c685d76SGreg Roach                ->delete();
3182c685d76SGreg Roach
3192c685d76SGreg Roach            foreach ($old_linked_media as $media_id) {
3202c685d76SGreg Roach                $gedrec .= "\n1 OBJE @" . $media_id . '@';
3212c685d76SGreg Roach            }
3222c685d76SGreg Roach        }
3232c685d76SGreg Roach
3242c685d76SGreg Roach        // Convert inline media into media objects
3252c685d76SGreg Roach        $gedrec = $this->convertInlineMedia($tree, $gedrec);
3262c685d76SGreg Roach
3272c685d76SGreg Roach        switch ($type) {
3282c685d76SGreg Roach            case Individual::RECORD_TYPE:
3292c685d76SGreg Roach                $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree);
3302c685d76SGreg Roach
3312c685d76SGreg Roach                if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) {
3322c685d76SGreg Roach                    $rin = $match[1];
3332c685d76SGreg Roach                } else {
3342c685d76SGreg Roach                    $rin = $xref;
3352c685d76SGreg Roach                }
3362c685d76SGreg Roach
3372c685d76SGreg Roach                DB::table('individuals')->insert([
3382c685d76SGreg Roach                    'i_id'     => $xref,
3392c685d76SGreg Roach                    'i_file'   => $tree_id,
3402c685d76SGreg Roach                    'i_rin'    => $rin,
3412c685d76SGreg Roach                    'i_sex'    => $record->sex(),
3422c685d76SGreg Roach                    'i_gedcom' => $gedrec,
3432c685d76SGreg Roach                ]);
3442c685d76SGreg Roach
3452c685d76SGreg Roach                // Update the cross-reference/index tables.
3462c685d76SGreg Roach                $this->updatePlaces($xref, $tree, $gedrec);
3472c685d76SGreg Roach                $this->updateDates($xref, $tree_id, $gedrec);
3482c685d76SGreg Roach                $this->updateNames($xref, $tree_id, $record);
3492c685d76SGreg Roach                break;
3502c685d76SGreg Roach
3512c685d76SGreg Roach            case Family::RECORD_TYPE:
3522c685d76SGreg Roach                if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
3532c685d76SGreg Roach                    $husb = $match[1];
3542c685d76SGreg Roach                } else {
3552c685d76SGreg Roach                    $husb = '';
3562c685d76SGreg Roach                }
3572c685d76SGreg Roach                if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
3582c685d76SGreg Roach                    $wife = $match[1];
3592c685d76SGreg Roach                } else {
3602c685d76SGreg Roach                    $wife = '';
3612c685d76SGreg Roach                }
3622c685d76SGreg Roach                $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match);
3632c685d76SGreg Roach                if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) {
3642c685d76SGreg Roach                    $nchi = max($nchi, $match[1]);
3652c685d76SGreg Roach                }
3662c685d76SGreg Roach
3672c685d76SGreg Roach                DB::table('families')->insert([
3682c685d76SGreg Roach                    'f_id'      => $xref,
3692c685d76SGreg Roach                    'f_file'    => $tree_id,
3702c685d76SGreg Roach                    'f_husb'    => $husb,
3712c685d76SGreg Roach                    'f_wife'    => $wife,
3722c685d76SGreg Roach                    'f_gedcom'  => $gedrec,
3732c685d76SGreg Roach                    'f_numchil' => $nchi,
3742c685d76SGreg Roach                ]);
3752c685d76SGreg Roach
3762c685d76SGreg Roach                // Update the cross-reference/index tables.
3772c685d76SGreg Roach                $this->updatePlaces($xref, $tree, $gedrec);
3782c685d76SGreg Roach                $this->updateDates($xref, $tree_id, $gedrec);
3792c685d76SGreg Roach                break;
3802c685d76SGreg Roach
3812c685d76SGreg Roach            case Source::RECORD_TYPE:
3822c685d76SGreg Roach                if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) {
3832c685d76SGreg Roach                    $name = $match[1];
3842c685d76SGreg Roach                } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) {
3852c685d76SGreg Roach                    $name = $match[1];
3862c685d76SGreg Roach                } else {
3872c685d76SGreg Roach                    $name = $xref;
3882c685d76SGreg Roach                }
3892c685d76SGreg Roach
3902c685d76SGreg Roach                DB::table('sources')->insert([
3912c685d76SGreg Roach                    's_id'     => $xref,
3922c685d76SGreg Roach                    's_file'   => $tree_id,
3932c685d76SGreg Roach                    's_name'   => mb_substr($name, 0, 255),
3942c685d76SGreg Roach                    's_gedcom' => $gedrec,
3952c685d76SGreg Roach                ]);
3962c685d76SGreg Roach                break;
3972c685d76SGreg Roach
3982c685d76SGreg Roach            case Repository::RECORD_TYPE:
3992c685d76SGreg Roach            case Note::RECORD_TYPE:
4002c685d76SGreg Roach            case Submission::RECORD_TYPE:
4012c685d76SGreg Roach            case Submitter::RECORD_TYPE:
4022c685d76SGreg Roach            case Location::RECORD_TYPE:
4032c685d76SGreg Roach                DB::table('other')->insert([
4042c685d76SGreg Roach                    'o_id'     => $xref,
4052c685d76SGreg Roach                    'o_file'   => $tree_id,
4062c685d76SGreg Roach                    'o_type'   => $type,
4072c685d76SGreg Roach                    'o_gedcom' => $gedrec,
4082c685d76SGreg Roach                ]);
4092c685d76SGreg Roach                break;
4102c685d76SGreg Roach
4112c685d76SGreg Roach            case Header::RECORD_TYPE:
4122c685d76SGreg Roach                // Force HEAD records to have a creation date.
4132c685d76SGreg Roach                if (!str_contains($gedrec, "\n1 DATE ")) {
4142c685d76SGreg Roach                    $today = strtoupper(date('d M Y'));
4152c685d76SGreg Roach                    $gedrec .= "\n1 DATE " . $today;
4162c685d76SGreg Roach                }
4172c685d76SGreg Roach
4182c685d76SGreg Roach                DB::table('other')->insert([
4192c685d76SGreg Roach                    'o_id'     => $xref,
4202c685d76SGreg Roach                    'o_file'   => $tree_id,
4212c685d76SGreg Roach                    'o_type'   => Header::RECORD_TYPE,
4222c685d76SGreg Roach                    'o_gedcom' => $gedrec,
4232c685d76SGreg Roach                ]);
4242c685d76SGreg Roach                break;
4252c685d76SGreg Roach
4262c685d76SGreg Roach
4272c685d76SGreg Roach            case Media::RECORD_TYPE:
4282c685d76SGreg Roach                $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree);
4292c685d76SGreg Roach
4302c685d76SGreg Roach                DB::table('media')->insert([
4312c685d76SGreg Roach                    'm_id'     => $xref,
4322c685d76SGreg Roach                    'm_file'   => $tree_id,
4332c685d76SGreg Roach                    'm_gedcom' => $gedrec,
4342c685d76SGreg Roach                ]);
4352c685d76SGreg Roach
4362c685d76SGreg Roach                foreach ($record->mediaFiles() as $media_file) {
4372c685d76SGreg Roach                    DB::table('media_file')->insert([
4382c685d76SGreg Roach                        'm_id'                 => $xref,
4392c685d76SGreg Roach                        'm_file'               => $tree_id,
4402c685d76SGreg Roach                        'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248),
4412c685d76SGreg Roach                        'multimedia_format'    => mb_substr($media_file->format(), 0, 4),
4422c685d76SGreg Roach                        'source_media_type'    => mb_substr($media_file->type(), 0, 15),
4432c685d76SGreg Roach                        'descriptive_title'    => mb_substr($media_file->title(), 0, 248),
4442c685d76SGreg Roach                    ]);
4452c685d76SGreg Roach                }
4462c685d76SGreg Roach                break;
4472c685d76SGreg Roach
4482c685d76SGreg Roach            case '_PLAC ':
4492c685d76SGreg Roach                $this->importTNGPlac($gedrec);
4502c685d76SGreg Roach                return;
4512c685d76SGreg Roach
4522c685d76SGreg Roach            case '_PLAC_DEFN':
4532c685d76SGreg Roach                $this->importLegacyPlacDefn($gedrec);
4542c685d76SGreg Roach                return;
4552c685d76SGreg Roach
4562c685d76SGreg Roach            default: // Custom record types.
4572c685d76SGreg Roach                DB::table('other')->insert([
4582c685d76SGreg Roach                    'o_id'     => $xref,
4592c685d76SGreg Roach                    'o_file'   => $tree_id,
4602c685d76SGreg Roach                    'o_type'   => mb_substr($type, 0, 15),
4612c685d76SGreg Roach                    'o_gedcom' => $gedrec,
4622c685d76SGreg Roach                ]);
4632c685d76SGreg Roach                break;
4642c685d76SGreg Roach        }
4652c685d76SGreg Roach
4662c685d76SGreg Roach        // Update the cross-reference/index tables.
4672c685d76SGreg Roach        $this->updateLinks($xref, $tree_id, $gedrec);
4682c685d76SGreg Roach    }
4692c685d76SGreg Roach
4702c685d76SGreg Roach    /**
4712c685d76SGreg Roach     * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values
4722c685d76SGreg Roach     *
4732c685d76SGreg Roach     * @param string $gedcom
4742c685d76SGreg Roach     */
4752c685d76SGreg Roach    private function importLegacyPlacDefn(string $gedcom): void
4762c685d76SGreg Roach    {
4772c685d76SGreg Roach        $gedcom_service = new GedcomService();
4782c685d76SGreg Roach
4792c685d76SGreg Roach        if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) {
4802c685d76SGreg Roach            $place_name = $match[1];
4812c685d76SGreg Roach        } else {
4822c685d76SGreg Roach            return;
4832c685d76SGreg Roach        }
4842c685d76SGreg Roach
4852c685d76SGreg Roach        if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) {
4862c685d76SGreg Roach            $latitude = $gedcom_service->readLatitude($match[1]);
4872c685d76SGreg Roach        } else {
4882c685d76SGreg Roach            return;
4892c685d76SGreg Roach        }
4902c685d76SGreg Roach
4912c685d76SGreg Roach        if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) {
4922c685d76SGreg Roach            $longitude = $gedcom_service->readLongitude($match[1]);
4932c685d76SGreg Roach        } else {
4942c685d76SGreg Roach            return;
4952c685d76SGreg Roach        }
4962c685d76SGreg Roach
4972c685d76SGreg Roach        $location = new PlaceLocation($place_name);
4982c685d76SGreg Roach
4992c685d76SGreg Roach        if ($location->latitude() === null && $location->longitude() === null) {
5002c685d76SGreg Roach            DB::table('place_location')
5012c685d76SGreg Roach                ->where('id', '=', $location->id())
5022c685d76SGreg Roach                ->update([
5032c685d76SGreg Roach                    'latitude'  => $latitude,
5042c685d76SGreg Roach                    'longitude' => $longitude,
5052c685d76SGreg Roach                ]);
5062c685d76SGreg Roach        }
5072c685d76SGreg Roach    }
5082c685d76SGreg Roach
5092c685d76SGreg Roach    /**
5102c685d76SGreg Roach     * Legacy Family Tree software generates _PLAC records containing LAT/LONG values
5112c685d76SGreg Roach     *
5122c685d76SGreg Roach     * @param string $gedcom
5132c685d76SGreg Roach     */
5142c685d76SGreg Roach    private function importTNGPlac(string $gedcom): void
5152c685d76SGreg Roach    {
5162c685d76SGreg Roach        if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) {
5172c685d76SGreg Roach            $place_name = $match[1];
5182c685d76SGreg Roach        } else {
5192c685d76SGreg Roach            return;
5202c685d76SGreg Roach        }
5212c685d76SGreg Roach
5222c685d76SGreg Roach        if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) {
5232c685d76SGreg Roach            $latitude = (float) $match[1];
5242c685d76SGreg Roach        } else {
5252c685d76SGreg Roach            return;
5262c685d76SGreg Roach        }
5272c685d76SGreg Roach
5282c685d76SGreg Roach        if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) {
5292c685d76SGreg Roach            $longitude = (float) $match[1];
5302c685d76SGreg Roach        } else {
5312c685d76SGreg Roach            return;
5322c685d76SGreg Roach        }
5332c685d76SGreg Roach
5342c685d76SGreg Roach        $location = new PlaceLocation($place_name);
5352c685d76SGreg Roach
5362c685d76SGreg Roach        if ($location->latitude() === null && $location->longitude() === null) {
5372c685d76SGreg Roach            DB::table('place_location')
5382c685d76SGreg Roach                ->where('id', '=', $location->id())
5392c685d76SGreg Roach                ->update([
5402c685d76SGreg Roach                    'latitude'  => $latitude,
5412c685d76SGreg Roach                    'longitude' => $longitude,
5422c685d76SGreg Roach                ]);
5432c685d76SGreg Roach        }
5442c685d76SGreg Roach    }
5452c685d76SGreg Roach
5462c685d76SGreg Roach    /**
5472c685d76SGreg Roach     * Extract all level 2 places from the given record and insert them into the places table
5482c685d76SGreg Roach     *
5492c685d76SGreg Roach     * @param string $xref
5502c685d76SGreg Roach     * @param Tree   $tree
5512c685d76SGreg Roach     * @param string $gedrec
5522c685d76SGreg Roach     *
5532c685d76SGreg Roach     * @return void
5542c685d76SGreg Roach     */
5552c685d76SGreg Roach    public function updatePlaces(string $xref, Tree $tree, string $gedrec): void
5562c685d76SGreg Roach    {
5572c685d76SGreg Roach        // Insert all new rows together
5582c685d76SGreg Roach        $rows = [];
5592c685d76SGreg Roach
5602c685d76SGreg Roach        preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches);
5612c685d76SGreg Roach
5622c685d76SGreg Roach        $places = array_unique($matches[1]);
5632c685d76SGreg Roach
5642c685d76SGreg Roach        foreach ($places as $place_name) {
5652c685d76SGreg Roach            $place = new Place($place_name, $tree);
5662c685d76SGreg Roach
5672c685d76SGreg Roach            // Calling Place::id() will create the entry in the database, if it doesn't already exist.
5682c685d76SGreg Roach            while ($place->id() !== 0) {
5692c685d76SGreg Roach                $rows[] = [
5702c685d76SGreg Roach                    'pl_p_id' => $place->id(),
5712c685d76SGreg Roach                    'pl_gid'  => $xref,
5722c685d76SGreg Roach                    'pl_file' => $tree->id(),
5732c685d76SGreg Roach                ];
5742c685d76SGreg Roach
5752c685d76SGreg Roach                $place = $place->parent();
5762c685d76SGreg Roach            }
5772c685d76SGreg Roach        }
5782c685d76SGreg Roach
5792c685d76SGreg Roach        // array_unique doesn't work with arrays of arrays
5802c685d76SGreg Roach        $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows)));
5812c685d76SGreg Roach
5822c685d76SGreg Roach        // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders.
5832c685d76SGreg Roach        foreach (array_chunk($rows, 20000) as $chunk) {
5842c685d76SGreg Roach            DB::table('placelinks')->insert($chunk);
5852c685d76SGreg Roach        }
5862c685d76SGreg Roach    }
5872c685d76SGreg Roach
5882c685d76SGreg Roach    /**
5892c685d76SGreg Roach     * Extract all the dates from the given record and insert them into the database.
5902c685d76SGreg Roach     *
5912c685d76SGreg Roach     * @param string $xref
5922c685d76SGreg Roach     * @param int    $ged_id
5932c685d76SGreg Roach     * @param string $gedrec
5942c685d76SGreg Roach     *
5952c685d76SGreg Roach     * @return void
5962c685d76SGreg Roach     */
5972c685d76SGreg Roach    private function updateDates(string $xref, int $ged_id, string $gedrec): void
5982c685d76SGreg Roach    {
5992c685d76SGreg Roach        // Insert all new rows together
6002c685d76SGreg Roach        $rows = [];
6012c685d76SGreg Roach
6022c685d76SGreg Roach        preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER);
6032c685d76SGreg Roach
6042c685d76SGreg Roach        foreach ($matches as $match) {
6052c685d76SGreg Roach            $fact = $match[1];
6062c685d76SGreg Roach            $date = new Date($match[2]);
6072c685d76SGreg Roach            $rows[] = [
6082c685d76SGreg Roach                'd_day'        => $date->minimumDate()->day,
6092c685d76SGreg Roach                'd_month'      => $date->minimumDate()->format('%O'),
6102c685d76SGreg Roach                'd_mon'        => $date->minimumDate()->month,
6112c685d76SGreg Roach                'd_year'       => $date->minimumDate()->year,
6122c685d76SGreg Roach                'd_julianday1' => $date->minimumDate()->minimumJulianDay(),
6132c685d76SGreg Roach                'd_julianday2' => $date->minimumDate()->maximumJulianDay(),
6142c685d76SGreg Roach                'd_fact'       => $fact,
6152c685d76SGreg Roach                'd_gid'        => $xref,
6162c685d76SGreg Roach                'd_file'       => $ged_id,
6172c685d76SGreg Roach                'd_type'       => $date->minimumDate()->format('%@'),
6182c685d76SGreg Roach            ];
6192c685d76SGreg Roach
6202c685d76SGreg Roach            $rows[] = [
6212c685d76SGreg Roach                'd_day'        => $date->maximumDate()->day,
6222c685d76SGreg Roach                'd_month'      => $date->maximumDate()->format('%O'),
6232c685d76SGreg Roach                'd_mon'        => $date->maximumDate()->month,
6242c685d76SGreg Roach                'd_year'       => $date->maximumDate()->year,
6252c685d76SGreg Roach                'd_julianday1' => $date->maximumDate()->minimumJulianDay(),
6262c685d76SGreg Roach                'd_julianday2' => $date->maximumDate()->maximumJulianDay(),
6272c685d76SGreg Roach                'd_fact'       => $fact,
6282c685d76SGreg Roach                'd_gid'        => $xref,
6292c685d76SGreg Roach                'd_file'       => $ged_id,
6302c685d76SGreg Roach                'd_type'       => $date->minimumDate()->format('%@'),
6312c685d76SGreg Roach            ];
6322c685d76SGreg Roach        }
6332c685d76SGreg Roach
6342c685d76SGreg Roach        // array_unique doesn't work with arrays of arrays
6352c685d76SGreg Roach        $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows)));
6362c685d76SGreg Roach
6372c685d76SGreg Roach        DB::table('dates')->insert($rows);
6382c685d76SGreg Roach    }
6392c685d76SGreg Roach
6402c685d76SGreg Roach    /**
6412c685d76SGreg Roach     * Extract all the links from the given record and insert them into the database
6422c685d76SGreg Roach     *
6432c685d76SGreg Roach     * @param string $xref
6442c685d76SGreg Roach     * @param int    $ged_id
6452c685d76SGreg Roach     * @param string $gedrec
6462c685d76SGreg Roach     *
6472c685d76SGreg Roach     * @return void
6482c685d76SGreg Roach     */
6492c685d76SGreg Roach    private function updateLinks(string $xref, int $ged_id, string $gedrec): void
6502c685d76SGreg Roach    {
6512c685d76SGreg Roach        // Insert all new rows together
6522c685d76SGreg Roach        $rows = [];
6532c685d76SGreg Roach
6542c685d76SGreg Roach        preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER);
6552c685d76SGreg Roach
6562c685d76SGreg Roach        foreach ($matches as $match) {
6572c685d76SGreg Roach            // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@"
6582c685d76SGreg Roach            $rows[$match[1] . strtoupper($match[2])] = [
6592c685d76SGreg Roach                'l_from' => $xref,
6602c685d76SGreg Roach                'l_to'   => $match[2],
6612c685d76SGreg Roach                'l_type' => $match[1],
6622c685d76SGreg Roach                'l_file' => $ged_id,
6632c685d76SGreg Roach            ];
6642c685d76SGreg Roach        }
6652c685d76SGreg Roach
6662c685d76SGreg Roach        DB::table('link')->insert($rows);
6672c685d76SGreg Roach    }
6682c685d76SGreg Roach
6692c685d76SGreg Roach    /**
6702c685d76SGreg Roach     * Extract all the names from the given record and insert them into the database.
6712c685d76SGreg Roach     *
6722c685d76SGreg Roach     * @param string     $xref
6732c685d76SGreg Roach     * @param int        $ged_id
6742c685d76SGreg Roach     * @param Individual $record
6752c685d76SGreg Roach     *
6762c685d76SGreg Roach     * @return void
6772c685d76SGreg Roach     */
6782c685d76SGreg Roach    private function updateNames(string $xref, int $ged_id, Individual $record): void
6792c685d76SGreg Roach    {
6802c685d76SGreg Roach        // Insert all new rows together
6812c685d76SGreg Roach        $rows = [];
6822c685d76SGreg Roach
6832c685d76SGreg Roach        foreach ($record->getAllNames() as $n => $name) {
6842c685d76SGreg Roach            if ($name['givn'] === Individual::PRAENOMEN_NESCIO) {
6852c685d76SGreg Roach                $soundex_givn_std = null;
6862c685d76SGreg Roach                $soundex_givn_dm  = null;
6872c685d76SGreg Roach            } else {
6882c685d76SGreg Roach                $soundex_givn_std = Soundex::russell($name['givn']);
6892c685d76SGreg Roach                $soundex_givn_dm  = Soundex::daitchMokotoff($name['givn']);
6902c685d76SGreg Roach            }
6912c685d76SGreg Roach
6922c685d76SGreg Roach            if ($name['surn'] === Individual::NOMEN_NESCIO) {
6932c685d76SGreg Roach                $soundex_surn_std = null;
6942c685d76SGreg Roach                $soundex_surn_dm  = null;
6952c685d76SGreg Roach            } else {
6962c685d76SGreg Roach                $soundex_surn_std = Soundex::russell($name['surname']);
6972c685d76SGreg Roach                $soundex_surn_dm  = Soundex::daitchMokotoff($name['surname']);
6982c685d76SGreg Roach            }
6992c685d76SGreg Roach
7002c685d76SGreg Roach            $rows[] = [
7012c685d76SGreg Roach                'n_file'             => $ged_id,
7022c685d76SGreg Roach                'n_id'               => $xref,
7032c685d76SGreg Roach                'n_num'              => $n,
7042c685d76SGreg Roach                'n_type'             => $name['type'],
7052c685d76SGreg Roach                'n_sort'             => mb_substr($name['sort'], 0, 255),
7062c685d76SGreg Roach                'n_full'             => mb_substr($name['fullNN'], 0, 255),
7072c685d76SGreg Roach                'n_surname'          => mb_substr($name['surname'], 0, 255),
7082c685d76SGreg Roach                'n_surn'             => mb_substr($name['surn'], 0, 255),
7092c685d76SGreg Roach                'n_givn'             => mb_substr($name['givn'], 0, 255),
7102c685d76SGreg Roach                'n_soundex_givn_std' => $soundex_givn_std,
7112c685d76SGreg Roach                'n_soundex_surn_std' => $soundex_surn_std,
7122c685d76SGreg Roach                'n_soundex_givn_dm'  => $soundex_givn_dm,
7132c685d76SGreg Roach                'n_soundex_surn_dm'  => $soundex_surn_dm,
7142c685d76SGreg Roach            ];
7152c685d76SGreg Roach        }
7162c685d76SGreg Roach
7172c685d76SGreg Roach        DB::table('name')->insert($rows);
7182c685d76SGreg Roach    }
7192c685d76SGreg Roach
7202c685d76SGreg Roach    /**
7212c685d76SGreg Roach     * Extract inline media data, and convert to media objects.
7222c685d76SGreg Roach     *
7232c685d76SGreg Roach     * @param Tree   $tree
7242c685d76SGreg Roach     * @param string $gedcom
7252c685d76SGreg Roach     *
7262c685d76SGreg Roach     * @return string
7272c685d76SGreg Roach     */
7282c685d76SGreg Roach    private function convertInlineMedia(Tree $tree, string $gedcom): string
7292c685d76SGreg Roach    {
7302c685d76SGreg Roach        while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) {
7312c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7322c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n1 OBJE @" . $xref . '@']);
7332c685d76SGreg Roach        }
7342c685d76SGreg Roach        while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) {
7352c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7362c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n2 OBJE @" . $xref . '@']);
7372c685d76SGreg Roach        }
7382c685d76SGreg Roach        while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) {
7392c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7402c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n3 OBJE @" . $xref . '@']);
7412c685d76SGreg Roach        }
7422c685d76SGreg Roach
7432c685d76SGreg Roach        return $gedcom;
7442c685d76SGreg Roach    }
7452c685d76SGreg Roach
7462c685d76SGreg Roach    /**
7472c685d76SGreg Roach     * Create a new media object, from inline media data.
7482c685d76SGreg Roach     *
7492c685d76SGreg Roach     * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL
7502c685d76SGreg Roach     * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL
7512c685d76SGreg Roach     * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies:  +1 FILE / +1 FORM / +2 MEDI
7522c685d76SGreg Roach     *
7532c685d76SGreg Roach     * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE
7542c685d76SGreg Roach     * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL
7552c685d76SGreg Roach     *
7562c685d76SGreg Roach     * @param string $gedcom
7572c685d76SGreg Roach     * @param Tree   $tree
7582c685d76SGreg Roach     *
7592c685d76SGreg Roach     * @return string
7602c685d76SGreg Roach     */
7612c685d76SGreg Roach    private function createMediaObject(string $gedcom, Tree $tree): string
7622c685d76SGreg Roach    {
7632c685d76SGreg Roach        preg_match('/\n\d FILE (.+)/', $gedcom, $match);
7642c685d76SGreg Roach        $file = $match[1] ?? '';
7652c685d76SGreg Roach
7662c685d76SGreg Roach        preg_match('/\n\d TITL (.+)/', $gedcom, $match);
7672c685d76SGreg Roach        $title = $match[1] ?? '';
7682c685d76SGreg Roach
7692c685d76SGreg Roach        preg_match('/\n\d FORM (.+)/', $gedcom, $match);
7702c685d76SGreg Roach        $format = $match[1] ?? '';
7712c685d76SGreg Roach
7722c685d76SGreg Roach        preg_match('/\n\d MEDI (.+)/', $gedcom, $match);
7732c685d76SGreg Roach        $media = $match[1] ?? '';
7742c685d76SGreg Roach
7752c685d76SGreg Roach        preg_match('/\n\d _SCBK (.+)/', $gedcom, $match);
7762c685d76SGreg Roach        $scrapbook = $match[1] ?? '';
7772c685d76SGreg Roach
7782c685d76SGreg Roach        preg_match('/\n\d _PRIM (.+)/', $gedcom, $match);
7792c685d76SGreg Roach        $primary = $match[1] ?? '';
7802c685d76SGreg Roach
7812c685d76SGreg Roach        preg_match('/\n\d _TYPE (.+)/', $gedcom, $match);
7822c685d76SGreg Roach        if ($media === '') {
7832c685d76SGreg Roach            // Legacy uses _TYPE instead of MEDI
7842c685d76SGreg Roach            $media = $match[1] ?? '';
7852c685d76SGreg Roach            $type  = '';
7862c685d76SGreg Roach        } else {
7872c685d76SGreg Roach            $type = $match[1] ?? '';
7882c685d76SGreg Roach        }
7892c685d76SGreg Roach
7902c685d76SGreg Roach        preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches);
7912c685d76SGreg Roach        $notes = $matches[1] ?? [];
7922c685d76SGreg Roach
7932c685d76SGreg Roach        // Have we already created a media object with the same title/filename?
7942c685d76SGreg Roach        $xref = DB::table('media_file')
7952c685d76SGreg Roach            ->where('m_file', '=', $tree->id())
7962c685d76SGreg Roach            ->where('descriptive_title', '=', mb_substr($title, 0, 248))
7972c685d76SGreg Roach            ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248))
7982c685d76SGreg Roach            ->value('m_id');
7992c685d76SGreg Roach
8002c685d76SGreg Roach        if ($xref === null) {
8012c685d76SGreg Roach            $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE);
8022c685d76SGreg Roach
8032c685d76SGreg Roach            // convert to a media-object
8042c685d76SGreg Roach            $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file;
8052c685d76SGreg Roach
8062c685d76SGreg Roach            if ($format !== '') {
8072c685d76SGreg Roach                $gedcom .= "\n2 FORM " . $format;
8082c685d76SGreg Roach
8092c685d76SGreg Roach                if ($media !== '') {
8102c685d76SGreg Roach                    $gedcom .= "\n3 TYPE " . $media;
8112c685d76SGreg Roach                }
8122c685d76SGreg Roach            }
8132c685d76SGreg Roach
8142c685d76SGreg Roach            if ($title !== '') {
8152c685d76SGreg Roach                $gedcom .= "\n3 TITL " . $title;
8162c685d76SGreg Roach            }
8172c685d76SGreg Roach
8182c685d76SGreg Roach            if ($scrapbook !== '') {
8192c685d76SGreg Roach                $gedcom .= "\n1 _SCBK " . $scrapbook;
8202c685d76SGreg Roach            }
8212c685d76SGreg Roach
8222c685d76SGreg Roach            if ($primary !== '') {
8232c685d76SGreg Roach                $gedcom .= "\n1 _PRIM " . $primary;
8242c685d76SGreg Roach            }
8252c685d76SGreg Roach
8262c685d76SGreg Roach            if ($type !== '') {
8272c685d76SGreg Roach                $gedcom .= "\n1 _TYPE " . $type;
8282c685d76SGreg Roach            }
8292c685d76SGreg Roach
8302c685d76SGreg Roach            foreach ($notes as $note) {
8312c685d76SGreg Roach                $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]);
8322c685d76SGreg Roach            }
8332c685d76SGreg Roach
8342c685d76SGreg Roach            DB::table('media')->insert([
8352c685d76SGreg Roach                'm_id'     => $xref,
8362c685d76SGreg Roach                'm_file'   => $tree->id(),
8372c685d76SGreg Roach                'm_gedcom' => $gedcom,
8382c685d76SGreg Roach            ]);
8392c685d76SGreg Roach
8402c685d76SGreg Roach            DB::table('media_file')->insert([
8412c685d76SGreg Roach                'm_id'                 => $xref,
8422c685d76SGreg Roach                'm_file'               => $tree->id(),
8432c685d76SGreg Roach                'multimedia_file_refn' => mb_substr($file, 0, 248),
8442c685d76SGreg Roach                'multimedia_format'    => mb_substr($format, 0, 4),
8452c685d76SGreg Roach                'source_media_type'    => mb_substr($media, 0, 15),
8462c685d76SGreg Roach                'descriptive_title'    => mb_substr($title, 0, 248),
8472c685d76SGreg Roach            ]);
8482c685d76SGreg Roach        }
8492c685d76SGreg Roach
8502c685d76SGreg Roach        return $xref;
8512c685d76SGreg Roach    }
8522c685d76SGreg Roach
8532c685d76SGreg Roach    /**
8542c685d76SGreg Roach     * update a record in the database
8552c685d76SGreg Roach     *
8562c685d76SGreg Roach     * @param string $gedrec
8572c685d76SGreg Roach     * @param Tree   $tree
8582c685d76SGreg Roach     * @param bool   $delete
8592c685d76SGreg Roach     *
8602c685d76SGreg Roach     * @return void
8612c685d76SGreg Roach     * @throws GedcomErrorException
8622c685d76SGreg Roach     */
8632c685d76SGreg Roach    public function updateRecord(string $gedrec, Tree $tree, bool $delete): void
8642c685d76SGreg Roach    {
8652c685d76SGreg Roach        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
8662c685d76SGreg Roach            [, $gid, $type] = $match;
8672c685d76SGreg Roach        } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) {
8682c685d76SGreg Roach            // The HEAD record has no XREF.  Any others?
8692c685d76SGreg Roach            $gid  = $match[1];
8702c685d76SGreg Roach            $type = $match[1];
8712c685d76SGreg Roach        } else {
8722c685d76SGreg Roach            throw new GedcomErrorException($gedrec);
8732c685d76SGreg Roach        }
8742c685d76SGreg Roach
8752c685d76SGreg Roach        // Place links
8762c685d76SGreg Roach        DB::table('placelinks')
8772c685d76SGreg Roach            ->where('pl_gid', '=', $gid)
8782c685d76SGreg Roach            ->where('pl_file', '=', $tree->id())
8792c685d76SGreg Roach            ->delete();
8802c685d76SGreg Roach
8812c685d76SGreg Roach        // Orphaned places.  If we're deleting  "Westminster, London, England",
8822c685d76SGreg Roach        // then we may also need to delete "London, England" and "England".
8832c685d76SGreg Roach        do {
8842c685d76SGreg Roach            $affected = DB::table('places')
8852c685d76SGreg Roach                ->leftJoin('placelinks', function (JoinClause $join): void {
8862c685d76SGreg Roach                    $join
8872c685d76SGreg Roach                        ->on('p_id', '=', 'pl_p_id')
8882c685d76SGreg Roach                        ->on('p_file', '=', 'pl_file');
8892c685d76SGreg Roach                })
8902c685d76SGreg Roach                ->whereNull('pl_p_id')
8912c685d76SGreg Roach                ->delete();
8922c685d76SGreg Roach        } while ($affected > 0);
8932c685d76SGreg Roach
8942c685d76SGreg Roach        DB::table('dates')
8952c685d76SGreg Roach            ->where('d_gid', '=', $gid)
8962c685d76SGreg Roach            ->where('d_file', '=', $tree->id())
8972c685d76SGreg Roach            ->delete();
8982c685d76SGreg Roach
8992c685d76SGreg Roach        DB::table('name')
9002c685d76SGreg Roach            ->where('n_id', '=', $gid)
9012c685d76SGreg Roach            ->where('n_file', '=', $tree->id())
9022c685d76SGreg Roach            ->delete();
9032c685d76SGreg Roach
9042c685d76SGreg Roach        DB::table('link')
9052c685d76SGreg Roach            ->where('l_from', '=', $gid)
9062c685d76SGreg Roach            ->where('l_file', '=', $tree->id())
9072c685d76SGreg Roach            ->delete();
9082c685d76SGreg Roach
9092c685d76SGreg Roach        switch ($type) {
9102c685d76SGreg Roach            case Individual::RECORD_TYPE:
9112c685d76SGreg Roach                DB::table('individuals')
9122c685d76SGreg Roach                    ->where('i_id', '=', $gid)
9132c685d76SGreg Roach                    ->where('i_file', '=', $tree->id())
9142c685d76SGreg Roach                    ->delete();
9152c685d76SGreg Roach                break;
9162c685d76SGreg Roach
9172c685d76SGreg Roach            case Family::RECORD_TYPE:
9182c685d76SGreg Roach                DB::table('families')
9192c685d76SGreg Roach                    ->where('f_id', '=', $gid)
9202c685d76SGreg Roach                    ->where('f_file', '=', $tree->id())
9212c685d76SGreg Roach                    ->delete();
9222c685d76SGreg Roach                break;
9232c685d76SGreg Roach
9242c685d76SGreg Roach            case Source::RECORD_TYPE:
9252c685d76SGreg Roach                DB::table('sources')
9262c685d76SGreg Roach                    ->where('s_id', '=', $gid)
9272c685d76SGreg Roach                    ->where('s_file', '=', $tree->id())
9282c685d76SGreg Roach                    ->delete();
9292c685d76SGreg Roach                break;
9302c685d76SGreg Roach
9312c685d76SGreg Roach            case Media::RECORD_TYPE:
9322c685d76SGreg Roach                DB::table('media_file')
9332c685d76SGreg Roach                    ->where('m_id', '=', $gid)
9342c685d76SGreg Roach                    ->where('m_file', '=', $tree->id())
9352c685d76SGreg Roach                    ->delete();
9362c685d76SGreg Roach
9372c685d76SGreg Roach                DB::table('media')
9382c685d76SGreg Roach                    ->where('m_id', '=', $gid)
9392c685d76SGreg Roach                    ->where('m_file', '=', $tree->id())
9402c685d76SGreg Roach                    ->delete();
9412c685d76SGreg Roach                break;
9422c685d76SGreg Roach
9432c685d76SGreg Roach            default:
9442c685d76SGreg Roach                DB::table('other')
9452c685d76SGreg Roach                    ->where('o_id', '=', $gid)
9462c685d76SGreg Roach                    ->where('o_file', '=', $tree->id())
9472c685d76SGreg Roach                    ->delete();
9482c685d76SGreg Roach                break;
9492c685d76SGreg Roach        }
9502c685d76SGreg Roach
9512c685d76SGreg Roach        if (!$delete) {
9522c685d76SGreg Roach            $this->importRecord($gedrec, $tree, true);
9532c685d76SGreg Roach        }
9542c685d76SGreg Roach    }
9552c685d76SGreg Roach}
956