xref: /webtrees/app/Services/GedcomImportService.php (revision f077613912070df8f1da7e8a3a41f95474fd35b6)
12c685d76SGreg Roach<?php
22c685d76SGreg Roach
32c685d76SGreg Roach/**
42c685d76SGreg Roach * webtrees: online genealogy
5d11be702SGreg Roach * Copyright (C) 2023 webtrees development team
62c685d76SGreg Roach * This program is free software: you can redistribute it and/or modify
72c685d76SGreg Roach * it under the terms of the GNU General Public License as published by
82c685d76SGreg Roach * the Free Software Foundation, either version 3 of the License, or
92c685d76SGreg Roach * (at your option) any later version.
102c685d76SGreg Roach * This program is distributed in the hope that it will be useful,
112c685d76SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
122c685d76SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
132c685d76SGreg Roach * GNU General Public License for more details.
142c685d76SGreg Roach * You should have received a copy of the GNU General Public License
152c685d76SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
162c685d76SGreg Roach */
172c685d76SGreg Roach
182c685d76SGreg Roachdeclare(strict_types=1);
192c685d76SGreg Roach
202c685d76SGreg Roachnamespace Fisharebest\Webtrees\Services;
212c685d76SGreg Roach
222c685d76SGreg Roachuse Fisharebest\Webtrees\Date;
236f4ec3caSGreg Roachuse Fisharebest\Webtrees\DB;
242c685d76SGreg Roachuse Fisharebest\Webtrees\Elements\UnknownElement;
252c685d76SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException;
262c685d76SGreg Roachuse Fisharebest\Webtrees\Family;
272c685d76SGreg Roachuse Fisharebest\Webtrees\Gedcom;
282c685d76SGreg Roachuse Fisharebest\Webtrees\Header;
292c685d76SGreg Roachuse Fisharebest\Webtrees\Individual;
302c685d76SGreg Roachuse Fisharebest\Webtrees\Location;
312c685d76SGreg Roachuse Fisharebest\Webtrees\Media;
322c685d76SGreg Roachuse Fisharebest\Webtrees\Note;
332c685d76SGreg Roachuse Fisharebest\Webtrees\Place;
342c685d76SGreg Roachuse Fisharebest\Webtrees\PlaceLocation;
352c685d76SGreg Roachuse Fisharebest\Webtrees\Registry;
362c685d76SGreg Roachuse Fisharebest\Webtrees\Repository;
372c685d76SGreg Roachuse Fisharebest\Webtrees\Soundex;
382c685d76SGreg Roachuse Fisharebest\Webtrees\Source;
392c685d76SGreg Roachuse Fisharebest\Webtrees\Submission;
402c685d76SGreg Roachuse Fisharebest\Webtrees\Submitter;
412c685d76SGreg Roachuse Fisharebest\Webtrees\Tree;
422c685d76SGreg Roachuse Illuminate\Database\Query\JoinClause;
432c685d76SGreg Roach
442c685d76SGreg Roachuse function array_chunk;
452c685d76SGreg Roachuse function array_intersect_key;
462c685d76SGreg Roachuse function array_map;
472c685d76SGreg Roachuse function array_unique;
482c685d76SGreg Roachuse function date;
492c685d76SGreg Roachuse function explode;
502c685d76SGreg Roachuse function max;
512c685d76SGreg Roachuse function mb_substr;
522c685d76SGreg Roachuse function preg_match;
532c685d76SGreg Roachuse function preg_match_all;
542c685d76SGreg Roachuse function preg_replace;
552c685d76SGreg Roachuse function round;
562c685d76SGreg Roachuse function str_contains;
572c685d76SGreg Roachuse function str_replace;
582c685d76SGreg Roachuse function str_starts_with;
592c685d76SGreg Roachuse function strlen;
602c685d76SGreg Roachuse function strtoupper;
612c685d76SGreg Roachuse function strtr;
622c685d76SGreg Roachuse function substr;
632c685d76SGreg Roachuse function trim;
642c685d76SGreg Roach
652c685d76SGreg Roachuse const PREG_SET_ORDER;
662c685d76SGreg Roach
672c685d76SGreg Roach/**
682c685d76SGreg Roach * Class GedcomImportService - import GEDCOM data
692c685d76SGreg Roach */
702c685d76SGreg Roachclass GedcomImportService
712c685d76SGreg Roach{
722c685d76SGreg Roach    /**
732c685d76SGreg Roach     * Tidy up a gedcom record on import, so that we can access it consistently/efficiently.
742c685d76SGreg Roach     *
752c685d76SGreg Roach     * @param string $rec
762c685d76SGreg Roach     * @param Tree   $tree
772c685d76SGreg Roach     *
782c685d76SGreg Roach     * @return string
792c685d76SGreg Roach     */
802c685d76SGreg Roach    private function reformatRecord(string $rec, Tree $tree): string
812c685d76SGreg Roach    {
82d35568b4SGreg Roach        $gedcom_service = Registry::container()->get(GedcomService::class);
832c685d76SGreg Roach
842c685d76SGreg Roach        // Strip out mac/msdos line endings
852c685d76SGreg Roach        $rec = preg_replace("/[\r\n]+/", "\n", $rec);
862c685d76SGreg Roach
872c685d76SGreg Roach        // Extract lines from the record; lines consist of: level + optional xref + tag + optional data
882c685d76SGreg Roach        $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER);
892c685d76SGreg Roach
902c685d76SGreg Roach        // Process the record line-by-line
912c685d76SGreg Roach        $newrec = '';
922c685d76SGreg Roach        foreach ($matches as $n => $match) {
932c685d76SGreg Roach            [, $level, $xref, $tag, $data] = $match;
942c685d76SGreg Roach
952c685d76SGreg Roach            $tag = $gedcom_service->canonicalTag($tag);
962c685d76SGreg Roach
972c685d76SGreg Roach            switch ($tag) {
982c685d76SGreg Roach                case 'DATE':
992c685d76SGreg Roach                    // Preserve text from INT dates
1002c685d76SGreg Roach                    if (str_contains($data, '(')) {
1012c685d76SGreg Roach                        [$date, $text] = explode('(', $data, 2);
1022c685d76SGreg Roach                        $text = ' (' . $text;
1032c685d76SGreg Roach                    } else {
1042c685d76SGreg Roach                        $date = $data;
1052c685d76SGreg Roach                        $text = '';
1062c685d76SGreg Roach                    }
1072c685d76SGreg Roach                    // Capitals
1082c685d76SGreg Roach                    $date = strtoupper($date);
1092c685d76SGreg Roach                    // Temporarily add leading/trailing spaces, to allow efficient matching below
1102c685d76SGreg Roach                    $date = ' ' . $date . ' ';
1112c685d76SGreg Roach                    // Ensure space digits and letters
1122c685d76SGreg Roach                    $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date);
1132c685d76SGreg Roach                    $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date);
1142c685d76SGreg Roach                    // Ensure space before/after calendar escapes
1152c685d76SGreg Roach                    $date = preg_replace('/@#[^@]+@/', ' $0 ', $date);
1162c685d76SGreg Roach                    // "BET." => "BET"
1172c685d76SGreg Roach                    $date = preg_replace('/(\w\w)\./', '$1', $date);
1182c685d76SGreg Roach                    // "CIR" => "ABT"
1192c685d76SGreg Roach                    $date = str_replace(' CIR ', ' ABT ', $date);
1202c685d76SGreg Roach                    $date = str_replace(' APX ', ' ABT ', $date);
1212c685d76SGreg Roach                    // B.C. => BC (temporarily, to allow easier handling of ".")
1222c685d76SGreg Roach                    $date = str_replace(' B.C. ', ' BC ', $date);
1232c685d76SGreg Roach                    // TMG uses "EITHER X OR Y"
1242c685d76SGreg Roach                    $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date);
1252c685d76SGreg Roach                    // "BET X - Y " => "BET X AND Y"
1262c685d76SGreg Roach                    $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date);
1272c685d76SGreg Roach                    $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date);
1282c685d76SGreg Roach                    // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y"
1292c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date);
1302c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date);
1312c685d76SGreg Roach                    // "@#ESC@ AFT X" => "AFT @#ESC@ X"
1322c685d76SGreg Roach                    $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date);
1332c685d76SGreg Roach                    // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900"
1342c685d76SGreg Roach                    // (don't change "/" - it is used in NS/OS dates)
1352c685d76SGreg Roach                    $date = preg_replace('/[.,:;-]/', ' ', $date);
1362c685d76SGreg Roach                    // BC => B.C.
1372c685d76SGreg Roach                    $date = str_replace(' BC ', ' B.C. ', $date);
1382c685d76SGreg Roach                    // Append the "INT" text
1392c685d76SGreg Roach                    $data = $date . $text;
1402c685d76SGreg Roach                    break;
1412c685d76SGreg Roach                case 'HEAD':
14235e7ad0cSGreg Roach                case 'TRLR':
14335e7ad0cSGreg Roach                    // HEAD and TRLR records do not have an XREF or DATA
1442c685d76SGreg Roach                    if ($level === '0') {
1452c685d76SGreg Roach                        $xref = '';
1462c685d76SGreg Roach                        $data = '';
1472c685d76SGreg Roach                    }
1482c685d76SGreg Roach                    break;
1492c685d76SGreg Roach                case 'NAME':
1502c685d76SGreg Roach                    // Tidy up non-printing characters
1512c685d76SGreg Roach                    $data = preg_replace('/  +/', ' ', trim($data));
1522c685d76SGreg Roach                    break;
1532c685d76SGreg Roach                case 'PLAC':
1542c685d76SGreg Roach                    // Consistent commas
1552c685d76SGreg Roach                    $data = preg_replace('/ *[,,،] */u', ', ', $data);
1562c685d76SGreg Roach                    // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W
157ef475b14SGreg Roach                    if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match) === 1) {
158ef475b14SGreg Roach                        $degns = (int) $match[2];
159ef475b14SGreg Roach                        $minns = (int) $match[3];
160ef475b14SGreg Roach                        $secns = (int) $match[4];
161ef475b14SGreg Roach                        $degew = (int) $match[6];
162ef475b14SGreg Roach                        $minew = (int) $match[7];
163ef475b14SGreg Roach                        $secew = (int) $match[8];
1642c685d76SGreg Roach                        $data =
1652c685d76SGreg Roach                            $match[1] . "\n" .
166ef475b14SGreg Roach                            (1 + (int) $level) . " MAP\n" .
167ef475b14SGreg Roach                            (2 + (int) $level) . ' LATI ' . ($match[5] . round($degns + $minns / 60 + $secns / 3600, 4)) . "\n" .
168ef475b14SGreg Roach                            (2 + (int) $level) . ' LONG ' . ($match[9] . round($degew + $minew / 60 + $secew / 3600, 4));
1692c685d76SGreg Roach                    }
1702c685d76SGreg Roach                    break;
1712c685d76SGreg Roach                case 'SEX':
1722c685d76SGreg Roach                    $data = strtoupper($data);
1732c685d76SGreg Roach                    break;
1742c685d76SGreg Roach            }
1752c685d76SGreg Roach            // Suppress "Y", for facts/events with a DATE or PLAC
1762c685d76SGreg Roach            if ($data === 'y') {
1772c685d76SGreg Roach                $data = 'Y';
1782c685d76SGreg Roach            }
1792c685d76SGreg Roach            if ($level === '1' && $data === 'Y') {
1802c685d76SGreg Roach                for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) {
1812c685d76SGreg Roach                    if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') {
1822c685d76SGreg Roach                        $data = '';
1832c685d76SGreg Roach                        break;
1842c685d76SGreg Roach                    }
1852c685d76SGreg Roach                }
1862c685d76SGreg Roach            }
1872c685d76SGreg Roach            // Reassemble components back into a single line
1882c685d76SGreg Roach            switch ($tag) {
1892c685d76SGreg Roach                default:
1902c685d76SGreg Roach                    // Remove tabs and multiple/leading/trailing spaces
1912c685d76SGreg Roach                    $data = strtr($data, ["\t" => ' ']);
1922c685d76SGreg Roach                    $data = trim($data, ' ');
1932c685d76SGreg Roach                    while (str_contains($data, '  ')) {
1942c685d76SGreg Roach                        $data = strtr($data, ['  ' => ' ']);
1952c685d76SGreg Roach                    }
1962c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
1972c685d76SGreg Roach                    break;
1982c685d76SGreg Roach                case 'NOTE':
1992c685d76SGreg Roach                case 'TEXT':
2002c685d76SGreg Roach                case 'DATA':
2012c685d76SGreg Roach                case 'CONT':
2022c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
2032c685d76SGreg Roach                    break;
2042c685d76SGreg Roach                case 'FILE':
2052c685d76SGreg Roach                    // Strip off the user-defined path prefix
2062c685d76SGreg Roach                    $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH');
2072c685d76SGreg Roach                    if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) {
2082c685d76SGreg Roach                        $data = substr($data, strlen($GEDCOM_MEDIA_PATH));
2092c685d76SGreg Roach                    }
2102c685d76SGreg Roach                    // convert backslashes in filenames to forward slashes
2112c685d76SGreg Roach                    $data = preg_replace("/\\\\/", '/', $data);
2122c685d76SGreg Roach
2132c685d76SGreg Roach                    $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data);
2142c685d76SGreg Roach                    break;
2152c685d76SGreg Roach                case 'CONC':
2162c685d76SGreg Roach                    // Merge CONC lines, to simplify access later on.
2172c685d76SGreg Roach                    $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data;
2182c685d76SGreg Roach                    break;
2192c685d76SGreg Roach            }
2202c685d76SGreg Roach        }
2212c685d76SGreg Roach
2222c685d76SGreg Roach        return $newrec;
2232c685d76SGreg Roach    }
2242c685d76SGreg Roach
2252c685d76SGreg Roach    /**
2262c685d76SGreg Roach     * import record into database
2272c685d76SGreg Roach     * this function will parse the given gedcom record and add it to the database
2282c685d76SGreg Roach     *
2292c685d76SGreg Roach     * @param string $gedrec the raw gedcom record to parse
2302c685d76SGreg Roach     * @param Tree   $tree   import the record into this tree
2312c685d76SGreg Roach     * @param bool   $update whether this is an updated record that has been accepted
2322c685d76SGreg Roach     *
2332c685d76SGreg Roach     * @return void
2342c685d76SGreg Roach     * @throws GedcomErrorException
2352c685d76SGreg Roach     */
2362c685d76SGreg Roach    public function importRecord(string $gedrec, Tree $tree, bool $update): void
2372c685d76SGreg Roach    {
2382c685d76SGreg Roach        $tree_id = $tree->id();
2392c685d76SGreg Roach
2402c685d76SGreg Roach        // Escaped @ signs (only if importing from file)
2412c685d76SGreg Roach        if (!$update) {
2422c685d76SGreg Roach            $gedrec = str_replace('@@', '@', $gedrec);
2432c685d76SGreg Roach        }
2442c685d76SGreg Roach
2452c685d76SGreg Roach        // Standardise gedcom format
2462c685d76SGreg Roach        $gedrec = $this->reformatRecord($gedrec, $tree);
2472c685d76SGreg Roach
2482c685d76SGreg Roach        // import different types of records
2492c685d76SGreg Roach        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
2502c685d76SGreg Roach            [, $xref, $type] = $match;
2513793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 HEAD')) {
2523793e425SGreg Roach            $type = 'HEAD';
2533793e425SGreg Roach            $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF.
2543793e425SGreg Roach        } elseif (str_starts_with($gedrec, '0 TRLR')) {
2553793e425SGreg Roach            $tree->setPreference('imported', '1');
2563793e425SGreg Roach            $type = 'TRLR';
2573793e425SGreg Roach            $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF.
2586bd4d63fSGreg Roach        } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) {
2596bd4d63fSGreg Roach            $this->importLegacyPlacDefn($gedrec);
2606bd4d63fSGreg Roach
2616bd4d63fSGreg Roach            return;
2626bd4d63fSGreg Roach        } elseif (str_starts_with($gedrec, '0 _PLAC ')) {
2636bd4d63fSGreg Roach            $this->importTNGPlac($gedrec);
2646bd4d63fSGreg Roach
2656bd4d63fSGreg Roach            return;
2662c685d76SGreg Roach        } else {
267356588a1SGreg Roach            foreach (Gedcom::CUSTOM_RECORDS_WITHOUT_XREFS as $record_type) {
268356588a1SGreg Roach                if (preg_match('/^0 ' . $record_type . '\b/', $gedrec) === 1) {
269356588a1SGreg Roach                    return;
270356588a1SGreg Roach                }
271356588a1SGreg Roach            }
272356588a1SGreg Roach
2732c685d76SGreg Roach            throw new GedcomErrorException($gedrec);
2742c685d76SGreg Roach        }
2752c685d76SGreg Roach
2762c685d76SGreg Roach        // Add a _UID
2772c685d76SGreg Roach        if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) {
2782c685d76SGreg Roach            $element = Registry::elementFactory()->make($type . ':_UID');
2792c685d76SGreg Roach            if (!$element instanceof UnknownElement) {
2802c685d76SGreg Roach                $gedrec .= "\n1 _UID " . $element->default($tree);
2812c685d76SGreg Roach            }
2822c685d76SGreg Roach        }
2832c685d76SGreg Roach
2842c685d76SGreg Roach        // If the user has downloaded their GEDCOM data (containing media objects) and edited it
2852c685d76SGreg Roach        // using an application which does not support (and deletes) media objects, then add them
2862c685d76SGreg Roach        // back in.
287b6ec1ccfSGreg Roach        if ($tree->getPreference('keep_media') === '1') {
2882c685d76SGreg Roach            $old_linked_media = DB::table('link')
2892c685d76SGreg Roach                ->where('l_from', '=', $xref)
2902c685d76SGreg Roach                ->where('l_file', '=', $tree_id)
2912c685d76SGreg Roach                ->where('l_type', '=', 'OBJE')
2922c685d76SGreg Roach                ->pluck('l_to');
2932c685d76SGreg Roach
2942c685d76SGreg Roach            // Delete these links - so that we do not insert them again in updateLinks()
2952c685d76SGreg Roach            DB::table('link')
2962c685d76SGreg Roach                ->where('l_from', '=', $xref)
2972c685d76SGreg Roach                ->where('l_file', '=', $tree_id)
2982c685d76SGreg Roach                ->where('l_type', '=', 'OBJE')
2992c685d76SGreg Roach                ->delete();
3002c685d76SGreg Roach
3012c685d76SGreg Roach            foreach ($old_linked_media as $media_id) {
3022c685d76SGreg Roach                $gedrec .= "\n1 OBJE @" . $media_id . '@';
3032c685d76SGreg Roach            }
3042c685d76SGreg Roach        }
3052c685d76SGreg Roach
3062c685d76SGreg Roach        // Convert inline media into media objects
3072c685d76SGreg Roach        $gedrec = $this->convertInlineMedia($tree, $gedrec);
3082c685d76SGreg Roach
3092c685d76SGreg Roach        switch ($type) {
3102c685d76SGreg Roach            case Individual::RECORD_TYPE:
3112c685d76SGreg Roach                $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree);
3122c685d76SGreg Roach
3132c685d76SGreg Roach                if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) {
3142c685d76SGreg Roach                    $rin = $match[1];
3152c685d76SGreg Roach                } else {
3162c685d76SGreg Roach                    $rin = $xref;
3172c685d76SGreg Roach                }
3182c685d76SGreg Roach
319b4ec8324SGreg Roach                // The database can only store MFU, and many of the stats queries assume this.
320b4ec8324SGreg Roach                $sex = $record->sex();
321b4ec8324SGreg Roach                $sex = $sex === 'M' || $sex === 'F' ? $sex : 'U';
322b4ec8324SGreg Roach
3232c685d76SGreg Roach                DB::table('individuals')->insert([
3242c685d76SGreg Roach                    'i_id'     => $xref,
3252c685d76SGreg Roach                    'i_file'   => $tree_id,
3262c685d76SGreg Roach                    'i_rin'    => $rin,
327b4ec8324SGreg Roach                    'i_sex'    => $sex,
3282c685d76SGreg Roach                    'i_gedcom' => $gedrec,
3292c685d76SGreg Roach                ]);
3302c685d76SGreg Roach
3312c685d76SGreg Roach                // Update the cross-reference/index tables.
3322c685d76SGreg Roach                $this->updatePlaces($xref, $tree, $gedrec);
3332c685d76SGreg Roach                $this->updateDates($xref, $tree_id, $gedrec);
3342c685d76SGreg Roach                $this->updateNames($xref, $tree_id, $record);
3352c685d76SGreg Roach                break;
3362c685d76SGreg Roach
3372c685d76SGreg Roach            case Family::RECORD_TYPE:
3382c685d76SGreg Roach                if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
3392c685d76SGreg Roach                    $husb = $match[1];
3402c685d76SGreg Roach                } else {
3412c685d76SGreg Roach                    $husb = '';
3422c685d76SGreg Roach                }
3432c685d76SGreg Roach                if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) {
3442c685d76SGreg Roach                    $wife = $match[1];
3452c685d76SGreg Roach                } else {
3462c685d76SGreg Roach                    $wife = '';
3472c685d76SGreg Roach                }
3482c685d76SGreg Roach                $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match);
3492c685d76SGreg Roach                if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) {
3502c685d76SGreg Roach                    $nchi = max($nchi, $match[1]);
3512c685d76SGreg Roach                }
3522c685d76SGreg Roach
3532c685d76SGreg Roach                DB::table('families')->insert([
3542c685d76SGreg Roach                    'f_id'      => $xref,
3552c685d76SGreg Roach                    'f_file'    => $tree_id,
3562c685d76SGreg Roach                    'f_husb'    => $husb,
3572c685d76SGreg Roach                    'f_wife'    => $wife,
3582c685d76SGreg Roach                    'f_gedcom'  => $gedrec,
3592c685d76SGreg Roach                    'f_numchil' => $nchi,
3602c685d76SGreg Roach                ]);
3612c685d76SGreg Roach
3622c685d76SGreg Roach                // Update the cross-reference/index tables.
3632c685d76SGreg Roach                $this->updatePlaces($xref, $tree, $gedrec);
3642c685d76SGreg Roach                $this->updateDates($xref, $tree_id, $gedrec);
3652c685d76SGreg Roach                break;
3662c685d76SGreg Roach
3672c685d76SGreg Roach            case Source::RECORD_TYPE:
3682c685d76SGreg Roach                if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) {
3692c685d76SGreg Roach                    $name = $match[1];
3702c685d76SGreg Roach                } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) {
3712c685d76SGreg Roach                    $name = $match[1];
3722c685d76SGreg Roach                } else {
3732c685d76SGreg Roach                    $name = $xref;
3742c685d76SGreg Roach                }
3752c685d76SGreg Roach
3762c685d76SGreg Roach                DB::table('sources')->insert([
3772c685d76SGreg Roach                    's_id'     => $xref,
3782c685d76SGreg Roach                    's_file'   => $tree_id,
3792c685d76SGreg Roach                    's_name'   => mb_substr($name, 0, 255),
3802c685d76SGreg Roach                    's_gedcom' => $gedrec,
3812c685d76SGreg Roach                ]);
3822c685d76SGreg Roach                break;
3832c685d76SGreg Roach
3842c685d76SGreg Roach            case Repository::RECORD_TYPE:
3852c685d76SGreg Roach            case Note::RECORD_TYPE:
3862c685d76SGreg Roach            case Submission::RECORD_TYPE:
3872c685d76SGreg Roach            case Submitter::RECORD_TYPE:
3882c685d76SGreg Roach            case Location::RECORD_TYPE:
3892c685d76SGreg Roach                DB::table('other')->insert([
3902c685d76SGreg Roach                    'o_id'     => $xref,
3912c685d76SGreg Roach                    'o_file'   => $tree_id,
3922c685d76SGreg Roach                    'o_type'   => $type,
3932c685d76SGreg Roach                    'o_gedcom' => $gedrec,
3942c685d76SGreg Roach                ]);
3952c685d76SGreg Roach                break;
3962c685d76SGreg Roach
3972c685d76SGreg Roach            case Header::RECORD_TYPE:
3982c685d76SGreg Roach                // Force HEAD records to have a creation date.
3992c685d76SGreg Roach                if (!str_contains($gedrec, "\n1 DATE ")) {
4002c685d76SGreg Roach                    $today = strtoupper(date('d M Y'));
4012c685d76SGreg Roach                    $gedrec .= "\n1 DATE " . $today;
4022c685d76SGreg Roach                }
4032c685d76SGreg Roach
4042c685d76SGreg Roach                DB::table('other')->insert([
4052c685d76SGreg Roach                    'o_id'     => $xref,
4062c685d76SGreg Roach                    'o_file'   => $tree_id,
4072c685d76SGreg Roach                    'o_type'   => Header::RECORD_TYPE,
4082c685d76SGreg Roach                    'o_gedcom' => $gedrec,
4092c685d76SGreg Roach                ]);
4102c685d76SGreg Roach                break;
4112c685d76SGreg Roach
4122c685d76SGreg Roach            case Media::RECORD_TYPE:
4132c685d76SGreg Roach                $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree);
4142c685d76SGreg Roach
4152c685d76SGreg Roach                DB::table('media')->insert([
4162c685d76SGreg Roach                    'm_id'     => $xref,
4172c685d76SGreg Roach                    'm_file'   => $tree_id,
4182c685d76SGreg Roach                    'm_gedcom' => $gedrec,
4192c685d76SGreg Roach                ]);
4202c685d76SGreg Roach
4212c685d76SGreg Roach                foreach ($record->mediaFiles() as $media_file) {
4222c685d76SGreg Roach                    DB::table('media_file')->insert([
4232c685d76SGreg Roach                        'm_id'                 => $xref,
4242c685d76SGreg Roach                        'm_file'               => $tree_id,
4252c685d76SGreg Roach                        'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248),
4262c685d76SGreg Roach                        'multimedia_format'    => mb_substr($media_file->format(), 0, 4),
4272c685d76SGreg Roach                        'source_media_type'    => mb_substr($media_file->type(), 0, 15),
4282c685d76SGreg Roach                        'descriptive_title'    => mb_substr($media_file->title(), 0, 248),
4292c685d76SGreg Roach                    ]);
4302c685d76SGreg Roach                }
4312c685d76SGreg Roach                break;
4322c685d76SGreg Roach
4332c685d76SGreg Roach            default: // Custom record types.
4342c685d76SGreg Roach                DB::table('other')->insert([
4352c685d76SGreg Roach                    'o_id'     => $xref,
4362c685d76SGreg Roach                    'o_file'   => $tree_id,
4372c685d76SGreg Roach                    'o_type'   => mb_substr($type, 0, 15),
4382c685d76SGreg Roach                    'o_gedcom' => $gedrec,
4392c685d76SGreg Roach                ]);
4402c685d76SGreg Roach                break;
4412c685d76SGreg Roach        }
4422c685d76SGreg Roach
4432c685d76SGreg Roach        // Update the cross-reference/index tables.
4442c685d76SGreg Roach        $this->updateLinks($xref, $tree_id, $gedrec);
4452c685d76SGreg Roach    }
4462c685d76SGreg Roach
4472c685d76SGreg Roach    /**
4482c685d76SGreg Roach     * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values
4492c685d76SGreg Roach     *
4502c685d76SGreg Roach     * @param string $gedcom
4512c685d76SGreg Roach     */
4522c685d76SGreg Roach    private function importLegacyPlacDefn(string $gedcom): void
4532c685d76SGreg Roach    {
4542c685d76SGreg Roach        $gedcom_service = new GedcomService();
4552c685d76SGreg Roach
4562c685d76SGreg Roach        if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) {
4572c685d76SGreg Roach            $place_name = $match[1];
4582c685d76SGreg Roach        } else {
4592c685d76SGreg Roach            return;
4602c685d76SGreg Roach        }
4612c685d76SGreg Roach
4622c685d76SGreg Roach        if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) {
4632c685d76SGreg Roach            $latitude = $gedcom_service->readLatitude($match[1]);
4642c685d76SGreg Roach        } else {
4652c685d76SGreg Roach            return;
4662c685d76SGreg Roach        }
4672c685d76SGreg Roach
4682c685d76SGreg Roach        if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) {
4692c685d76SGreg Roach            $longitude = $gedcom_service->readLongitude($match[1]);
4702c685d76SGreg Roach        } else {
4712c685d76SGreg Roach            return;
4722c685d76SGreg Roach        }
4732c685d76SGreg Roach
4742c685d76SGreg Roach        $location = new PlaceLocation($place_name);
4752c685d76SGreg Roach
4762c685d76SGreg Roach        if ($location->latitude() === null && $location->longitude() === null) {
4772c685d76SGreg Roach            DB::table('place_location')
4782c685d76SGreg Roach                ->where('id', '=', $location->id())
4792c685d76SGreg Roach                ->update([
4802c685d76SGreg Roach                    'latitude'  => $latitude,
4812c685d76SGreg Roach                    'longitude' => $longitude,
4822c685d76SGreg Roach                ]);
4832c685d76SGreg Roach        }
4842c685d76SGreg Roach    }
4852c685d76SGreg Roach
4862c685d76SGreg Roach    /**
4872c685d76SGreg Roach     * Legacy Family Tree software generates _PLAC records containing LAT/LONG values
4882c685d76SGreg Roach     *
4892c685d76SGreg Roach     * @param string $gedcom
4902c685d76SGreg Roach     */
4912c685d76SGreg Roach    private function importTNGPlac(string $gedcom): void
4922c685d76SGreg Roach    {
4932c685d76SGreg Roach        if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) {
4942c685d76SGreg Roach            $place_name = $match[1];
4952c685d76SGreg Roach        } else {
4962c685d76SGreg Roach            return;
4972c685d76SGreg Roach        }
4982c685d76SGreg Roach
4992c685d76SGreg Roach        if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) {
5002c685d76SGreg Roach            $latitude = (float) $match[1];
5012c685d76SGreg Roach        } else {
5022c685d76SGreg Roach            return;
5032c685d76SGreg Roach        }
5042c685d76SGreg Roach
5052c685d76SGreg Roach        if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) {
5062c685d76SGreg Roach            $longitude = (float) $match[1];
5072c685d76SGreg Roach        } else {
5082c685d76SGreg Roach            return;
5092c685d76SGreg Roach        }
5102c685d76SGreg Roach
5112c685d76SGreg Roach        $location = new PlaceLocation($place_name);
5122c685d76SGreg Roach
5132c685d76SGreg Roach        if ($location->latitude() === null && $location->longitude() === null) {
5142c685d76SGreg Roach            DB::table('place_location')
5152c685d76SGreg Roach                ->where('id', '=', $location->id())
5162c685d76SGreg Roach                ->update([
5172c685d76SGreg Roach                    'latitude'  => $latitude,
5182c685d76SGreg Roach                    'longitude' => $longitude,
5192c685d76SGreg Roach                ]);
5202c685d76SGreg Roach        }
5212c685d76SGreg Roach    }
5222c685d76SGreg Roach
5232c685d76SGreg Roach    /**
5242c685d76SGreg Roach     * Extract all level 2 places from the given record and insert them into the places table
5252c685d76SGreg Roach     *
5262c685d76SGreg Roach     * @param string $xref
5272c685d76SGreg Roach     * @param Tree   $tree
5282c685d76SGreg Roach     * @param string $gedrec
5292c685d76SGreg Roach     *
5302c685d76SGreg Roach     * @return void
5312c685d76SGreg Roach     */
5322c685d76SGreg Roach    public function updatePlaces(string $xref, Tree $tree, string $gedrec): void
5332c685d76SGreg Roach    {
5342c685d76SGreg Roach        // Insert all new rows together
5352c685d76SGreg Roach        $rows = [];
5362c685d76SGreg Roach
5372c685d76SGreg Roach        preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches);
5382c685d76SGreg Roach
5392c685d76SGreg Roach        $places = array_unique($matches[1]);
5402c685d76SGreg Roach
5412c685d76SGreg Roach        foreach ($places as $place_name) {
5422c685d76SGreg Roach            $place = new Place($place_name, $tree);
5432c685d76SGreg Roach
5442c685d76SGreg Roach            // Calling Place::id() will create the entry in the database, if it doesn't already exist.
5452c685d76SGreg Roach            while ($place->id() !== 0) {
5462c685d76SGreg Roach                $rows[] = [
5472c685d76SGreg Roach                    'pl_p_id' => $place->id(),
5482c685d76SGreg Roach                    'pl_gid'  => $xref,
5492c685d76SGreg Roach                    'pl_file' => $tree->id(),
5502c685d76SGreg Roach                ];
5512c685d76SGreg Roach
5522c685d76SGreg Roach                $place = $place->parent();
5532c685d76SGreg Roach            }
5542c685d76SGreg Roach        }
5552c685d76SGreg Roach
5562c685d76SGreg Roach        // array_unique doesn't work with arrays of arrays
5572c685d76SGreg Roach        $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows)));
5582c685d76SGreg Roach
5592c685d76SGreg Roach        // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders.
5602c685d76SGreg Roach        foreach (array_chunk($rows, 20000) as $chunk) {
5612c685d76SGreg Roach            DB::table('placelinks')->insert($chunk);
5622c685d76SGreg Roach        }
5632c685d76SGreg Roach    }
5642c685d76SGreg Roach
5652c685d76SGreg Roach    /**
5662c685d76SGreg Roach     * Extract all the dates from the given record and insert them into the database.
5672c685d76SGreg Roach     *
5682c685d76SGreg Roach     * @param string $xref
5692c685d76SGreg Roach     * @param int    $ged_id
5702c685d76SGreg Roach     * @param string $gedrec
5712c685d76SGreg Roach     *
5722c685d76SGreg Roach     * @return void
5732c685d76SGreg Roach     */
5742c685d76SGreg Roach    private function updateDates(string $xref, int $ged_id, string $gedrec): void
5752c685d76SGreg Roach    {
5762c685d76SGreg Roach        // Insert all new rows together
5772c685d76SGreg Roach        $rows = [];
5782c685d76SGreg Roach
5792c685d76SGreg Roach        preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER);
5802c685d76SGreg Roach
5812c685d76SGreg Roach        foreach ($matches as $match) {
5822c685d76SGreg Roach            $fact = $match[1];
5832c685d76SGreg Roach            $date = new Date($match[2]);
5842c685d76SGreg Roach            $rows[] = [
5852c685d76SGreg Roach                'd_day'        => $date->minimumDate()->day,
5862c685d76SGreg Roach                'd_month'      => $date->minimumDate()->format('%O'),
5872c685d76SGreg Roach                'd_mon'        => $date->minimumDate()->month,
5882c685d76SGreg Roach                'd_year'       => $date->minimumDate()->year,
5892c685d76SGreg Roach                'd_julianday1' => $date->minimumDate()->minimumJulianDay(),
5902c685d76SGreg Roach                'd_julianday2' => $date->minimumDate()->maximumJulianDay(),
5912c685d76SGreg Roach                'd_fact'       => $fact,
5922c685d76SGreg Roach                'd_gid'        => $xref,
5932c685d76SGreg Roach                'd_file'       => $ged_id,
5942c685d76SGreg Roach                'd_type'       => $date->minimumDate()->format('%@'),
5952c685d76SGreg Roach            ];
5962c685d76SGreg Roach
5972c685d76SGreg Roach            $rows[] = [
5982c685d76SGreg Roach                'd_day'        => $date->maximumDate()->day,
5992c685d76SGreg Roach                'd_month'      => $date->maximumDate()->format('%O'),
6002c685d76SGreg Roach                'd_mon'        => $date->maximumDate()->month,
6012c685d76SGreg Roach                'd_year'       => $date->maximumDate()->year,
6022c685d76SGreg Roach                'd_julianday1' => $date->maximumDate()->minimumJulianDay(),
6032c685d76SGreg Roach                'd_julianday2' => $date->maximumDate()->maximumJulianDay(),
6042c685d76SGreg Roach                'd_fact'       => $fact,
6052c685d76SGreg Roach                'd_gid'        => $xref,
6062c685d76SGreg Roach                'd_file'       => $ged_id,
6072c685d76SGreg Roach                'd_type'       => $date->minimumDate()->format('%@'),
6082c685d76SGreg Roach            ];
6092c685d76SGreg Roach        }
6102c685d76SGreg Roach
6112c685d76SGreg Roach        // array_unique doesn't work with arrays of arrays
6122c685d76SGreg Roach        $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows)));
6132c685d76SGreg Roach
6142c685d76SGreg Roach        DB::table('dates')->insert($rows);
6152c685d76SGreg Roach    }
6162c685d76SGreg Roach
6172c685d76SGreg Roach    /**
6182c685d76SGreg Roach     * Extract all the links from the given record and insert them into the database
6192c685d76SGreg Roach     *
6202c685d76SGreg Roach     * @param string $xref
6212c685d76SGreg Roach     * @param int    $ged_id
6222c685d76SGreg Roach     * @param string $gedrec
6232c685d76SGreg Roach     *
6242c685d76SGreg Roach     * @return void
6252c685d76SGreg Roach     */
6262c685d76SGreg Roach    private function updateLinks(string $xref, int $ged_id, string $gedrec): void
6272c685d76SGreg Roach    {
6282c685d76SGreg Roach        // Insert all new rows together
6292c685d76SGreg Roach        $rows = [];
6302c685d76SGreg Roach
6312c685d76SGreg Roach        preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER);
6322c685d76SGreg Roach
6332c685d76SGreg Roach        foreach ($matches as $match) {
6343793e425SGreg Roach            // Some applications (e.g. GenoPro) create links longer than 15 characters.
635f6bc4f93SGreg Roach            $link = mb_substr($match[1], 0, 15);
6363793e425SGreg Roach
6372c685d76SGreg Roach            // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@"
6383793e425SGreg Roach            $rows[$link . strtoupper($match[2])] = [
6392c685d76SGreg Roach                'l_from' => $xref,
6402c685d76SGreg Roach                'l_to'   => $match[2],
6413793e425SGreg Roach                'l_type' => $link,
6422c685d76SGreg Roach                'l_file' => $ged_id,
6432c685d76SGreg Roach            ];
6442c685d76SGreg Roach        }
6452c685d76SGreg Roach
6462c685d76SGreg Roach        DB::table('link')->insert($rows);
6472c685d76SGreg Roach    }
6482c685d76SGreg Roach
6492c685d76SGreg Roach    /**
6502c685d76SGreg Roach     * Extract all the names from the given record and insert them into the database.
6512c685d76SGreg Roach     *
6522c685d76SGreg Roach     * @param string     $xref
6532c685d76SGreg Roach     * @param int        $ged_id
6542c685d76SGreg Roach     * @param Individual $record
6552c685d76SGreg Roach     *
6562c685d76SGreg Roach     * @return void
6572c685d76SGreg Roach     */
6582c685d76SGreg Roach    private function updateNames(string $xref, int $ged_id, Individual $record): void
6592c685d76SGreg Roach    {
6602c685d76SGreg Roach        // Insert all new rows together
6612c685d76SGreg Roach        $rows = [];
6622c685d76SGreg Roach
6632c685d76SGreg Roach        foreach ($record->getAllNames() as $n => $name) {
6642c685d76SGreg Roach            if ($name['givn'] === Individual::PRAENOMEN_NESCIO) {
6652c685d76SGreg Roach                $soundex_givn_std = null;
6662c685d76SGreg Roach                $soundex_givn_dm  = null;
6672c685d76SGreg Roach            } else {
6682c685d76SGreg Roach                $soundex_givn_std = Soundex::russell($name['givn']);
6692c685d76SGreg Roach                $soundex_givn_dm  = Soundex::daitchMokotoff($name['givn']);
6702c685d76SGreg Roach            }
6712c685d76SGreg Roach
6722c685d76SGreg Roach            if ($name['surn'] === Individual::NOMEN_NESCIO) {
6732c685d76SGreg Roach                $soundex_surn_std = null;
6742c685d76SGreg Roach                $soundex_surn_dm  = null;
6752c685d76SGreg Roach            } else {
6762c685d76SGreg Roach                $soundex_surn_std = Soundex::russell($name['surname']);
6772c685d76SGreg Roach                $soundex_surn_dm  = Soundex::daitchMokotoff($name['surname']);
6782c685d76SGreg Roach            }
6792c685d76SGreg Roach
6802c685d76SGreg Roach            $rows[] = [
6812c685d76SGreg Roach                'n_file'             => $ged_id,
6822c685d76SGreg Roach                'n_id'               => $xref,
6832c685d76SGreg Roach                'n_num'              => $n,
6842c685d76SGreg Roach                'n_type'             => $name['type'],
6852c685d76SGreg Roach                'n_sort'             => mb_substr($name['sort'], 0, 255),
6862c685d76SGreg Roach                'n_full'             => mb_substr($name['fullNN'], 0, 255),
6872c685d76SGreg Roach                'n_surname'          => mb_substr($name['surname'], 0, 255),
6882c685d76SGreg Roach                'n_surn'             => mb_substr($name['surn'], 0, 255),
6892c685d76SGreg Roach                'n_givn'             => mb_substr($name['givn'], 0, 255),
6902c685d76SGreg Roach                'n_soundex_givn_std' => $soundex_givn_std,
6912c685d76SGreg Roach                'n_soundex_surn_std' => $soundex_surn_std,
6922c685d76SGreg Roach                'n_soundex_givn_dm'  => $soundex_givn_dm,
6932c685d76SGreg Roach                'n_soundex_surn_dm'  => $soundex_surn_dm,
6942c685d76SGreg Roach            ];
6952c685d76SGreg Roach        }
6962c685d76SGreg Roach
6972c685d76SGreg Roach        DB::table('name')->insert($rows);
6982c685d76SGreg Roach    }
6992c685d76SGreg Roach
7002c685d76SGreg Roach    /**
7012c685d76SGreg Roach     * Extract inline media data, and convert to media objects.
7022c685d76SGreg Roach     *
7032c685d76SGreg Roach     * @param Tree   $tree
7042c685d76SGreg Roach     * @param string $gedcom
7052c685d76SGreg Roach     *
7062c685d76SGreg Roach     * @return string
7072c685d76SGreg Roach     */
7082c685d76SGreg Roach    private function convertInlineMedia(Tree $tree, string $gedcom): string
7092c685d76SGreg Roach    {
7102c685d76SGreg Roach        while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) {
7112c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7122c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n1 OBJE @" . $xref . '@']);
7132c685d76SGreg Roach        }
7142c685d76SGreg Roach        while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) {
7152c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7162c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n2 OBJE @" . $xref . '@']);
7172c685d76SGreg Roach        }
7182c685d76SGreg Roach        while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) {
7192c685d76SGreg Roach            $xref   = $this->createMediaObject($match[0], $tree);
7202c685d76SGreg Roach            $gedcom = strtr($gedcom, [$match[0] =>  "\n3 OBJE @" . $xref . '@']);
7212c685d76SGreg Roach        }
7222c685d76SGreg Roach
7232c685d76SGreg Roach        return $gedcom;
7242c685d76SGreg Roach    }
7252c685d76SGreg Roach
7262c685d76SGreg Roach    /**
7272c685d76SGreg Roach     * Create a new media object, from inline media data.
7282c685d76SGreg Roach     *
7292c685d76SGreg Roach     * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL
7302c685d76SGreg Roach     * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL
7312c685d76SGreg Roach     * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies:  +1 FILE / +1 FORM / +2 MEDI
7322c685d76SGreg Roach     *
7332c685d76SGreg Roach     * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE
7342c685d76SGreg Roach     * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL
7352c685d76SGreg Roach     *
7362c685d76SGreg Roach     * @param string $gedcom
7372c685d76SGreg Roach     * @param Tree   $tree
7382c685d76SGreg Roach     *
7392c685d76SGreg Roach     * @return string
7402c685d76SGreg Roach     */
7412c685d76SGreg Roach    private function createMediaObject(string $gedcom, Tree $tree): string
7422c685d76SGreg Roach    {
7432c685d76SGreg Roach        preg_match('/\n\d FILE (.+)/', $gedcom, $match);
7442c685d76SGreg Roach        $file = $match[1] ?? '';
7452c685d76SGreg Roach
7462c685d76SGreg Roach        preg_match('/\n\d TITL (.+)/', $gedcom, $match);
7472c685d76SGreg Roach        $title = $match[1] ?? '';
7482c685d76SGreg Roach
7492c685d76SGreg Roach        preg_match('/\n\d FORM (.+)/', $gedcom, $match);
7502c685d76SGreg Roach        $format = $match[1] ?? '';
7512c685d76SGreg Roach
7522c685d76SGreg Roach        preg_match('/\n\d MEDI (.+)/', $gedcom, $match);
7532c685d76SGreg Roach        $media = $match[1] ?? '';
7542c685d76SGreg Roach
7552c685d76SGreg Roach        preg_match('/\n\d _SCBK (.+)/', $gedcom, $match);
7562c685d76SGreg Roach        $scrapbook = $match[1] ?? '';
7572c685d76SGreg Roach
7582c685d76SGreg Roach        preg_match('/\n\d _PRIM (.+)/', $gedcom, $match);
7592c685d76SGreg Roach        $primary = $match[1] ?? '';
7602c685d76SGreg Roach
7612c685d76SGreg Roach        preg_match('/\n\d _TYPE (.+)/', $gedcom, $match);
7622c685d76SGreg Roach        if ($media === '') {
7632c685d76SGreg Roach            // Legacy uses _TYPE instead of MEDI
7642c685d76SGreg Roach            $media = $match[1] ?? '';
7652c685d76SGreg Roach            $type  = '';
7662c685d76SGreg Roach        } else {
7672c685d76SGreg Roach            $type = $match[1] ?? '';
7682c685d76SGreg Roach        }
7692c685d76SGreg Roach
7702c685d76SGreg Roach        preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches);
771*f0776139SGreg Roach        $notes = $matches[1];
7722c685d76SGreg Roach
7732c685d76SGreg Roach        // Have we already created a media object with the same title/filename?
7742c685d76SGreg Roach        $xref = DB::table('media_file')
7752c685d76SGreg Roach            ->where('m_file', '=', $tree->id())
7762c685d76SGreg Roach            ->where('descriptive_title', '=', mb_substr($title, 0, 248))
7772c685d76SGreg Roach            ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248))
7782c685d76SGreg Roach            ->value('m_id');
7792c685d76SGreg Roach
7802c685d76SGreg Roach        if ($xref === null) {
7812c685d76SGreg Roach            $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE);
7822c685d76SGreg Roach
7832c685d76SGreg Roach            // convert to a media-object
7842c685d76SGreg Roach            $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file;
7852c685d76SGreg Roach
7862c685d76SGreg Roach            if ($format !== '') {
7872c685d76SGreg Roach                $gedcom .= "\n2 FORM " . $format;
7882c685d76SGreg Roach
7892c685d76SGreg Roach                if ($media !== '') {
7902c685d76SGreg Roach                    $gedcom .= "\n3 TYPE " . $media;
7912c685d76SGreg Roach                }
7922c685d76SGreg Roach            }
7932c685d76SGreg Roach
7942c685d76SGreg Roach            if ($title !== '') {
7958a45bddeSGreg Roach                $gedcom .= "\n2 TITL " . $title;
7962c685d76SGreg Roach            }
7972c685d76SGreg Roach
7982c685d76SGreg Roach            if ($scrapbook !== '') {
7992c685d76SGreg Roach                $gedcom .= "\n1 _SCBK " . $scrapbook;
8002c685d76SGreg Roach            }
8012c685d76SGreg Roach
8022c685d76SGreg Roach            if ($primary !== '') {
8032c685d76SGreg Roach                $gedcom .= "\n1 _PRIM " . $primary;
8042c685d76SGreg Roach            }
8052c685d76SGreg Roach
8062c685d76SGreg Roach            if ($type !== '') {
8072c685d76SGreg Roach                $gedcom .= "\n1 _TYPE " . $type;
8082c685d76SGreg Roach            }
8092c685d76SGreg Roach
8102c685d76SGreg Roach            foreach ($notes as $note) {
8112c685d76SGreg Roach                $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]);
8122c685d76SGreg Roach            }
8132c685d76SGreg Roach
8142c685d76SGreg Roach            DB::table('media')->insert([
8152c685d76SGreg Roach                'm_id'     => $xref,
8162c685d76SGreg Roach                'm_file'   => $tree->id(),
8172c685d76SGreg Roach                'm_gedcom' => $gedcom,
8182c685d76SGreg Roach            ]);
8192c685d76SGreg Roach
8202c685d76SGreg Roach            DB::table('media_file')->insert([
8212c685d76SGreg Roach                'm_id'                 => $xref,
8222c685d76SGreg Roach                'm_file'               => $tree->id(),
8232c685d76SGreg Roach                'multimedia_file_refn' => mb_substr($file, 0, 248),
8242c685d76SGreg Roach                'multimedia_format'    => mb_substr($format, 0, 4),
8252c685d76SGreg Roach                'source_media_type'    => mb_substr($media, 0, 15),
8262c685d76SGreg Roach                'descriptive_title'    => mb_substr($title, 0, 248),
8272c685d76SGreg Roach            ]);
8282c685d76SGreg Roach        }
8292c685d76SGreg Roach
8302c685d76SGreg Roach        return $xref;
8312c685d76SGreg Roach    }
8322c685d76SGreg Roach
8332c685d76SGreg Roach    /**
8342c685d76SGreg Roach     * update a record in the database
8352c685d76SGreg Roach     *
8362c685d76SGreg Roach     * @param string $gedrec
8372c685d76SGreg Roach     * @param Tree   $tree
8382c685d76SGreg Roach     * @param bool   $delete
8392c685d76SGreg Roach     *
8402c685d76SGreg Roach     * @return void
8412c685d76SGreg Roach     * @throws GedcomErrorException
8422c685d76SGreg Roach     */
8432c685d76SGreg Roach    public function updateRecord(string $gedrec, Tree $tree, bool $delete): void
8442c685d76SGreg Roach    {
8452c685d76SGreg Roach        if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) {
8462c685d76SGreg Roach            [, $gid, $type] = $match;
8472c685d76SGreg Roach        } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) {
8482c685d76SGreg Roach            // The HEAD record has no XREF.  Any others?
8492c685d76SGreg Roach            $gid  = $match[1];
8502c685d76SGreg Roach            $type = $match[1];
8512c685d76SGreg Roach        } else {
8522c685d76SGreg Roach            throw new GedcomErrorException($gedrec);
8532c685d76SGreg Roach        }
8542c685d76SGreg Roach
8552c685d76SGreg Roach        // Place links
8562c685d76SGreg Roach        DB::table('placelinks')
8572c685d76SGreg Roach            ->where('pl_gid', '=', $gid)
8582c685d76SGreg Roach            ->where('pl_file', '=', $tree->id())
8592c685d76SGreg Roach            ->delete();
8602c685d76SGreg Roach
8612c685d76SGreg Roach        // Orphaned places.  If we're deleting  "Westminster, London, England",
8622c685d76SGreg Roach        // then we may also need to delete "London, England" and "England".
8632c685d76SGreg Roach        do {
8642c685d76SGreg Roach            $affected = DB::table('places')
8652c685d76SGreg Roach                ->leftJoin('placelinks', function (JoinClause $join): void {
8662c685d76SGreg Roach                    $join
8672c685d76SGreg Roach                        ->on('p_id', '=', 'pl_p_id')
8682c685d76SGreg Roach                        ->on('p_file', '=', 'pl_file');
8692c685d76SGreg Roach                })
8702c685d76SGreg Roach                ->whereNull('pl_p_id')
8712c685d76SGreg Roach                ->delete();
8722c685d76SGreg Roach        } while ($affected > 0);
8732c685d76SGreg Roach
8742c685d76SGreg Roach        DB::table('dates')
8752c685d76SGreg Roach            ->where('d_gid', '=', $gid)
8762c685d76SGreg Roach            ->where('d_file', '=', $tree->id())
8772c685d76SGreg Roach            ->delete();
8782c685d76SGreg Roach
8792c685d76SGreg Roach        DB::table('name')
8802c685d76SGreg Roach            ->where('n_id', '=', $gid)
8812c685d76SGreg Roach            ->where('n_file', '=', $tree->id())
8822c685d76SGreg Roach            ->delete();
8832c685d76SGreg Roach
8842c685d76SGreg Roach        DB::table('link')
8852c685d76SGreg Roach            ->where('l_from', '=', $gid)
8862c685d76SGreg Roach            ->where('l_file', '=', $tree->id())
8872c685d76SGreg Roach            ->delete();
8882c685d76SGreg Roach
8892c685d76SGreg Roach        switch ($type) {
8902c685d76SGreg Roach            case Individual::RECORD_TYPE:
8912c685d76SGreg Roach                DB::table('individuals')
8922c685d76SGreg Roach                    ->where('i_id', '=', $gid)
8932c685d76SGreg Roach                    ->where('i_file', '=', $tree->id())
8942c685d76SGreg Roach                    ->delete();
8952c685d76SGreg Roach                break;
8962c685d76SGreg Roach
8972c685d76SGreg Roach            case Family::RECORD_TYPE:
8982c685d76SGreg Roach                DB::table('families')
8992c685d76SGreg Roach                    ->where('f_id', '=', $gid)
9002c685d76SGreg Roach                    ->where('f_file', '=', $tree->id())
9012c685d76SGreg Roach                    ->delete();
9022c685d76SGreg Roach                break;
9032c685d76SGreg Roach
9042c685d76SGreg Roach            case Source::RECORD_TYPE:
9052c685d76SGreg Roach                DB::table('sources')
9062c685d76SGreg Roach                    ->where('s_id', '=', $gid)
9072c685d76SGreg Roach                    ->where('s_file', '=', $tree->id())
9082c685d76SGreg Roach                    ->delete();
9092c685d76SGreg Roach                break;
9102c685d76SGreg Roach
9112c685d76SGreg Roach            case Media::RECORD_TYPE:
9122c685d76SGreg Roach                DB::table('media_file')
9132c685d76SGreg Roach                    ->where('m_id', '=', $gid)
9142c685d76SGreg Roach                    ->where('m_file', '=', $tree->id())
9152c685d76SGreg Roach                    ->delete();
9162c685d76SGreg Roach
9172c685d76SGreg Roach                DB::table('media')
9182c685d76SGreg Roach                    ->where('m_id', '=', $gid)
9192c685d76SGreg Roach                    ->where('m_file', '=', $tree->id())
9202c685d76SGreg Roach                    ->delete();
9212c685d76SGreg Roach                break;
9222c685d76SGreg Roach
9232c685d76SGreg Roach            default:
9242c685d76SGreg Roach                DB::table('other')
9252c685d76SGreg Roach                    ->where('o_id', '=', $gid)
9262c685d76SGreg Roach                    ->where('o_file', '=', $tree->id())
9272c685d76SGreg Roach                    ->delete();
9282c685d76SGreg Roach                break;
9292c685d76SGreg Roach        }
9302c685d76SGreg Roach
9312c685d76SGreg Roach        if (!$delete) {
9322c685d76SGreg Roach            $this->importRecord($gedrec, $tree, true);
9332c685d76SGreg Roach        }
9342c685d76SGreg Roach    }
9352c685d76SGreg Roach}
936