12c685d76SGreg Roach<?php 22c685d76SGreg Roach 32c685d76SGreg Roach/** 42c685d76SGreg Roach * webtrees: online genealogy 55bfc6897SGreg Roach * Copyright (C) 2022 webtrees development team 62c685d76SGreg Roach * This program is free software: you can redistribute it and/or modify 72c685d76SGreg Roach * it under the terms of the GNU General Public License as published by 82c685d76SGreg Roach * the Free Software Foundation, either version 3 of the License, or 92c685d76SGreg Roach * (at your option) any later version. 102c685d76SGreg Roach * This program is distributed in the hope that it will be useful, 112c685d76SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 122c685d76SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 132c685d76SGreg Roach * GNU General Public License for more details. 142c685d76SGreg Roach * You should have received a copy of the GNU General Public License 152c685d76SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 162c685d76SGreg Roach */ 172c685d76SGreg Roach 182c685d76SGreg Roachdeclare(strict_types=1); 192c685d76SGreg Roach 202c685d76SGreg Roachnamespace Fisharebest\Webtrees\Services; 212c685d76SGreg Roach 222c685d76SGreg Roachuse Fisharebest\Webtrees\Date; 232c685d76SGreg Roachuse Fisharebest\Webtrees\Elements\UnknownElement; 242c685d76SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException; 252c685d76SGreg Roachuse Fisharebest\Webtrees\Family; 262c685d76SGreg Roachuse Fisharebest\Webtrees\Gedcom; 272c685d76SGreg Roachuse Fisharebest\Webtrees\Header; 282c685d76SGreg Roachuse Fisharebest\Webtrees\Individual; 292c685d76SGreg Roachuse Fisharebest\Webtrees\Location; 302c685d76SGreg Roachuse Fisharebest\Webtrees\Media; 312c685d76SGreg Roachuse Fisharebest\Webtrees\Note; 322c685d76SGreg Roachuse Fisharebest\Webtrees\Place; 332c685d76SGreg Roachuse Fisharebest\Webtrees\PlaceLocation; 342c685d76SGreg Roachuse Fisharebest\Webtrees\Registry; 352c685d76SGreg Roachuse Fisharebest\Webtrees\Repository; 362c685d76SGreg Roachuse Fisharebest\Webtrees\Soundex; 372c685d76SGreg Roachuse Fisharebest\Webtrees\Source; 382c685d76SGreg Roachuse Fisharebest\Webtrees\Submission; 392c685d76SGreg Roachuse Fisharebest\Webtrees\Submitter; 402c685d76SGreg Roachuse Fisharebest\Webtrees\Tree; 412c685d76SGreg Roachuse Illuminate\Database\Capsule\Manager as DB; 422c685d76SGreg Roachuse Illuminate\Database\Query\JoinClause; 432c685d76SGreg Roach 442c685d76SGreg Roachuse function app; 452c685d76SGreg Roachuse function array_chunk; 462c685d76SGreg Roachuse function array_intersect_key; 472c685d76SGreg Roachuse function array_map; 482c685d76SGreg Roachuse function array_unique; 492c685d76SGreg Roachuse function assert; 502c685d76SGreg Roachuse function date; 512c685d76SGreg Roachuse function explode; 522c685d76SGreg Roachuse function max; 532c685d76SGreg Roachuse function mb_substr; 542c685d76SGreg Roachuse function preg_match; 552c685d76SGreg Roachuse function preg_match_all; 562c685d76SGreg Roachuse function preg_replace; 572c685d76SGreg Roachuse function round; 582c685d76SGreg Roachuse function str_contains; 592c685d76SGreg Roachuse function str_replace; 602c685d76SGreg Roachuse function str_starts_with; 612c685d76SGreg Roachuse function strlen; 622c685d76SGreg Roachuse function strtolower; 632c685d76SGreg Roachuse function strtoupper; 642c685d76SGreg Roachuse function strtr; 652c685d76SGreg Roachuse function substr; 662c685d76SGreg Roachuse function trim; 672c685d76SGreg Roach 682c685d76SGreg Roachuse const PREG_SET_ORDER; 692c685d76SGreg Roach 702c685d76SGreg Roach/** 712c685d76SGreg Roach * Class GedcomImportService - import GEDCOM data 722c685d76SGreg Roach */ 732c685d76SGreg Roachclass GedcomImportService 742c685d76SGreg Roach{ 752c685d76SGreg Roach /** 762c685d76SGreg Roach * Tidy up a gedcom record on import, so that we can access it consistently/efficiently. 772c685d76SGreg Roach * 782c685d76SGreg Roach * @param string $rec 792c685d76SGreg Roach * @param Tree $tree 802c685d76SGreg Roach * 812c685d76SGreg Roach * @return string 822c685d76SGreg Roach */ 832c685d76SGreg Roach private function reformatRecord(string $rec, Tree $tree): string 842c685d76SGreg Roach { 852c685d76SGreg Roach $gedcom_service = app(GedcomService::class); 862c685d76SGreg Roach assert($gedcom_service instanceof GedcomService); 872c685d76SGreg Roach 882c685d76SGreg Roach // Strip out mac/msdos line endings 892c685d76SGreg Roach $rec = preg_replace("/[\r\n]+/", "\n", $rec); 902c685d76SGreg Roach 912c685d76SGreg Roach // Extract lines from the record; lines consist of: level + optional xref + tag + optional data 922c685d76SGreg Roach $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER); 932c685d76SGreg Roach 942c685d76SGreg Roach // Process the record line-by-line 952c685d76SGreg Roach $newrec = ''; 962c685d76SGreg Roach foreach ($matches as $n => $match) { 972c685d76SGreg Roach [, $level, $xref, $tag, $data] = $match; 982c685d76SGreg Roach 992c685d76SGreg Roach $tag = $gedcom_service->canonicalTag($tag); 1002c685d76SGreg Roach 1012c685d76SGreg Roach switch ($tag) { 1022c685d76SGreg Roach case 'AFN': 1032c685d76SGreg Roach // AFN values are upper case 1042c685d76SGreg Roach $data = strtoupper($data); 1052c685d76SGreg Roach break; 1062c685d76SGreg Roach case 'DATE': 1072c685d76SGreg Roach // Preserve text from INT dates 1082c685d76SGreg Roach if (str_contains($data, '(')) { 1092c685d76SGreg Roach [$date, $text] = explode('(', $data, 2); 1102c685d76SGreg Roach $text = ' (' . $text; 1112c685d76SGreg Roach } else { 1122c685d76SGreg Roach $date = $data; 1132c685d76SGreg Roach $text = ''; 1142c685d76SGreg Roach } 1152c685d76SGreg Roach // Capitals 1162c685d76SGreg Roach $date = strtoupper($date); 1172c685d76SGreg Roach // Temporarily add leading/trailing spaces, to allow efficient matching below 1182c685d76SGreg Roach $date = ' ' . $date . ' '; 1192c685d76SGreg Roach // Ensure space digits and letters 1202c685d76SGreg Roach $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date); 1212c685d76SGreg Roach $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date); 1222c685d76SGreg Roach // Ensure space before/after calendar escapes 1232c685d76SGreg Roach $date = preg_replace('/@#[^@]+@/', ' $0 ', $date); 1242c685d76SGreg Roach // "BET." => "BET" 1252c685d76SGreg Roach $date = preg_replace('/(\w\w)\./', '$1', $date); 1262c685d76SGreg Roach // "CIR" => "ABT" 1272c685d76SGreg Roach $date = str_replace(' CIR ', ' ABT ', $date); 1282c685d76SGreg Roach $date = str_replace(' APX ', ' ABT ', $date); 1292c685d76SGreg Roach // B.C. => BC (temporarily, to allow easier handling of ".") 1302c685d76SGreg Roach $date = str_replace(' B.C. ', ' BC ', $date); 1312c685d76SGreg Roach // TMG uses "EITHER X OR Y" 1322c685d76SGreg Roach $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date); 1332c685d76SGreg Roach // "BET X - Y " => "BET X AND Y" 1342c685d76SGreg Roach $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date); 1352c685d76SGreg Roach $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date); 1362c685d76SGreg Roach // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y" 1372c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date); 1382c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date); 1392c685d76SGreg Roach // "@#ESC@ AFT X" => "AFT @#ESC@ X" 1402c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date); 1412c685d76SGreg Roach // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900" 1422c685d76SGreg Roach // (don't change "/" - it is used in NS/OS dates) 1432c685d76SGreg Roach $date = preg_replace('/[.,:;-]/', ' ', $date); 1442c685d76SGreg Roach // BC => B.C. 1452c685d76SGreg Roach $date = str_replace(' BC ', ' B.C. ', $date); 1462c685d76SGreg Roach // Append the "INT" text 1472c685d76SGreg Roach $data = $date . $text; 1482c685d76SGreg Roach break; 1492c685d76SGreg Roach case '_FILE': 1502c685d76SGreg Roach $tag = 'FILE'; 1512c685d76SGreg Roach break; 1522c685d76SGreg Roach case 'FORM': 1532c685d76SGreg Roach // Consistent commas 1542c685d76SGreg Roach $data = preg_replace('/ *, */', ', ', $data); 1552c685d76SGreg Roach break; 1562c685d76SGreg Roach case 'HEAD': 1572c685d76SGreg Roach // HEAD records don't have an XREF or DATA 1582c685d76SGreg Roach if ($level === '0') { 1592c685d76SGreg Roach $xref = ''; 1602c685d76SGreg Roach $data = ''; 1612c685d76SGreg Roach } 1622c685d76SGreg Roach break; 1632c685d76SGreg Roach case 'NAME': 1642c685d76SGreg Roach // Tidy up non-printing characters 1652c685d76SGreg Roach $data = preg_replace('/ +/', ' ', trim($data)); 1662c685d76SGreg Roach break; 1672c685d76SGreg Roach case 'PEDI': 1682c685d76SGreg Roach // PEDI values are lower case 1692c685d76SGreg Roach $data = strtolower($data); 1702c685d76SGreg Roach break; 1712c685d76SGreg Roach case 'PLAC': 1722c685d76SGreg Roach // Consistent commas 1732c685d76SGreg Roach $data = preg_replace('/ *[,,،] */u', ', ', $data); 1742c685d76SGreg Roach // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W 1752c685d76SGreg Roach if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match)) { 1762c685d76SGreg Roach $data = 1772c685d76SGreg Roach $match[1] . "\n" . 1782c685d76SGreg Roach ($level + 1) . " MAP\n" . 1792c685d76SGreg Roach ($level + 2) . ' LATI ' . ($match[5] . round($match[2] + ($match[3] / 60) + ($match[4] / 3600), 4)) . "\n" . 1802c685d76SGreg Roach ($level + 2) . ' LONG ' . ($match[9] . round($match[6] + ($match[7] / 60) + ($match[8] / 3600), 4)); 1812c685d76SGreg Roach } 1822c685d76SGreg Roach break; 1832c685d76SGreg Roach case 'RESN': 1842c685d76SGreg Roach // RESN values are lower case (confidential, privacy, locked, none) 1852c685d76SGreg Roach $data = strtolower($data); 1862c685d76SGreg Roach if ($data === 'invisible') { 1872c685d76SGreg Roach $data = 'confidential'; // From old versions of Legacy. 1882c685d76SGreg Roach } 1892c685d76SGreg Roach break; 1902c685d76SGreg Roach case 'SEX': 1912c685d76SGreg Roach $data = strtoupper($data); 1922c685d76SGreg Roach break; 1932c685d76SGreg Roach case 'STAT': 1942c685d76SGreg Roach if ($data === 'CANCELLED') { 1952c685d76SGreg Roach // PhpGedView mis-spells this tag - correct it. 1962c685d76SGreg Roach $data = 'CANCELED'; 1972c685d76SGreg Roach } 1982c685d76SGreg Roach break; 1992c685d76SGreg Roach case 'TEMP': 2002c685d76SGreg Roach // Temple codes are upper case 2012c685d76SGreg Roach $data = strtoupper($data); 2022c685d76SGreg Roach break; 2032c685d76SGreg Roach case 'TRLR': 2042c685d76SGreg Roach // TRLR records don't have an XREF or DATA 2052c685d76SGreg Roach if ($level === '0') { 2062c685d76SGreg Roach $xref = ''; 2072c685d76SGreg Roach $data = ''; 2082c685d76SGreg Roach } 2092c685d76SGreg Roach break; 2102c685d76SGreg Roach } 2112c685d76SGreg Roach // Suppress "Y", for facts/events with a DATE or PLAC 2122c685d76SGreg Roach if ($data === 'y') { 2132c685d76SGreg Roach $data = 'Y'; 2142c685d76SGreg Roach } 2152c685d76SGreg Roach if ($level === '1' && $data === 'Y') { 2162c685d76SGreg Roach for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) { 2172c685d76SGreg Roach if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') { 2182c685d76SGreg Roach $data = ''; 2192c685d76SGreg Roach break; 2202c685d76SGreg Roach } 2212c685d76SGreg Roach } 2222c685d76SGreg Roach } 2232c685d76SGreg Roach // Reassemble components back into a single line 2242c685d76SGreg Roach switch ($tag) { 2252c685d76SGreg Roach default: 2262c685d76SGreg Roach // Remove tabs and multiple/leading/trailing spaces 2272c685d76SGreg Roach $data = strtr($data, ["\t" => ' ']); 2282c685d76SGreg Roach $data = trim($data, ' '); 2292c685d76SGreg Roach while (str_contains($data, ' ')) { 2302c685d76SGreg Roach $data = strtr($data, [' ' => ' ']); 2312c685d76SGreg Roach } 2322c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 2332c685d76SGreg Roach break; 2342c685d76SGreg Roach case 'NOTE': 2352c685d76SGreg Roach case 'TEXT': 2362c685d76SGreg Roach case 'DATA': 2372c685d76SGreg Roach case 'CONT': 2382c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 2392c685d76SGreg Roach break; 2402c685d76SGreg Roach case 'FILE': 2412c685d76SGreg Roach // Strip off the user-defined path prefix 2422c685d76SGreg Roach $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH'); 2432c685d76SGreg Roach if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) { 2442c685d76SGreg Roach $data = substr($data, strlen($GEDCOM_MEDIA_PATH)); 2452c685d76SGreg Roach } 2462c685d76SGreg Roach // convert backslashes in filenames to forward slashes 2472c685d76SGreg Roach $data = preg_replace("/\\\\/", '/', $data); 2482c685d76SGreg Roach 2492c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 2502c685d76SGreg Roach break; 2512c685d76SGreg Roach case 'CONC': 2522c685d76SGreg Roach // Merge CONC lines, to simplify access later on. 2532c685d76SGreg Roach $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data; 2542c685d76SGreg Roach break; 2552c685d76SGreg Roach } 2562c685d76SGreg Roach } 2572c685d76SGreg Roach 2582c685d76SGreg Roach return $newrec; 2592c685d76SGreg Roach } 2602c685d76SGreg Roach 2612c685d76SGreg Roach /** 2622c685d76SGreg Roach * import record into database 2632c685d76SGreg Roach * this function will parse the given gedcom record and add it to the database 2642c685d76SGreg Roach * 2652c685d76SGreg Roach * @param string $gedrec the raw gedcom record to parse 2662c685d76SGreg Roach * @param Tree $tree import the record into this tree 2672c685d76SGreg Roach * @param bool $update whether this is an updated record that has been accepted 2682c685d76SGreg Roach * 2692c685d76SGreg Roach * @return void 2702c685d76SGreg Roach * @throws GedcomErrorException 2712c685d76SGreg Roach */ 2722c685d76SGreg Roach public function importRecord(string $gedrec, Tree $tree, bool $update): void 2732c685d76SGreg Roach { 2742c685d76SGreg Roach $tree_id = $tree->id(); 2752c685d76SGreg Roach 2762c685d76SGreg Roach // Escaped @ signs (only if importing from file) 2772c685d76SGreg Roach if (!$update) { 2782c685d76SGreg Roach $gedrec = str_replace('@@', '@', $gedrec); 2792c685d76SGreg Roach } 2802c685d76SGreg Roach 2812c685d76SGreg Roach // Standardise gedcom format 2822c685d76SGreg Roach $gedrec = $this->reformatRecord($gedrec, $tree); 2832c685d76SGreg Roach 2842c685d76SGreg Roach // import different types of records 2852c685d76SGreg Roach if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 2862c685d76SGreg Roach [, $xref, $type] = $match; 287*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 HEAD')) { 288*3793e425SGreg Roach $type = 'HEAD'; 289*3793e425SGreg Roach $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF. 290*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 TRLR')) { 291*3793e425SGreg Roach $tree->setPreference('imported', '1'); 292*3793e425SGreg Roach $type = 'TRLR'; 293*3793e425SGreg Roach $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF. 2946bd4d63fSGreg Roach } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) { 2956bd4d63fSGreg Roach $this->importLegacyPlacDefn($gedrec); 2966bd4d63fSGreg Roach 2976bd4d63fSGreg Roach return; 2986bd4d63fSGreg Roach } elseif (str_starts_with($gedrec, '0 _PLAC ')) { 2996bd4d63fSGreg Roach $this->importTNGPlac($gedrec); 3006bd4d63fSGreg Roach 3016bd4d63fSGreg Roach return; 302*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 _EVDEF')) { 303*3793e425SGreg Roach // Created by RootsMagic. We cannot process these records without an XREF. 304*3793e425SGreg Roach return; 305*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 _EVENT_DEFN')) { 306*3793e425SGreg Roach // Created by PAF and Legacy. We cannot process these records without an XREF. 307*3793e425SGreg Roach return; 308*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 PEDIGREELINK')) { 309*3793e425SGreg Roach // Created by GenoPro. We cannot process these records without an XREF. 310*3793e425SGreg Roach return; 311*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 GLOBAL')) { 312*3793e425SGreg Roach // Created by GenoPro. We cannot process these records without an XREF. 313*3793e425SGreg Roach return; 314*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 GENOMAP')) { 315*3793e425SGreg Roach // Created by GenoPro. We cannot process these records without an XREF. 316*3793e425SGreg Roach return; 317*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 EMOTIONALRELATIONSHIP')) { 318*3793e425SGreg Roach // Created by GenoPro. We cannot process these records without an XREF. 319*3793e425SGreg Roach return; 320*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 SOCIALRELATIONSHIP')) { 321*3793e425SGreg Roach // Created by GenoPro. We cannot process these records without an XREF. 322*3793e425SGreg Roach return; 323*3793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 LABEL')) { 324*3793e425SGreg Roach // Created by GenoPro. We cannot process these records without an XREF. 325*3793e425SGreg Roach return; 3262c685d76SGreg Roach } else { 3272c685d76SGreg Roach throw new GedcomErrorException($gedrec); 3282c685d76SGreg Roach } 3292c685d76SGreg Roach 3302c685d76SGreg Roach // Add a _UID 3312c685d76SGreg Roach if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { 3322c685d76SGreg Roach $element = Registry::elementFactory()->make($type . ':_UID'); 3332c685d76SGreg Roach if (!$element instanceof UnknownElement) { 3342c685d76SGreg Roach $gedrec .= "\n1 _UID " . $element->default($tree); 3352c685d76SGreg Roach } 3362c685d76SGreg Roach } 3372c685d76SGreg Roach 3382c685d76SGreg Roach // If the user has downloaded their GEDCOM data (containing media objects) and edited it 3392c685d76SGreg Roach // using an application which does not support (and deletes) media objects, then add them 3402c685d76SGreg Roach // back in. 3412c685d76SGreg Roach if ($tree->getPreference('keep_media')) { 3422c685d76SGreg Roach $old_linked_media = DB::table('link') 3432c685d76SGreg Roach ->where('l_from', '=', $xref) 3442c685d76SGreg Roach ->where('l_file', '=', $tree_id) 3452c685d76SGreg Roach ->where('l_type', '=', 'OBJE') 3462c685d76SGreg Roach ->pluck('l_to'); 3472c685d76SGreg Roach 3482c685d76SGreg Roach // Delete these links - so that we do not insert them again in updateLinks() 3492c685d76SGreg Roach DB::table('link') 3502c685d76SGreg Roach ->where('l_from', '=', $xref) 3512c685d76SGreg Roach ->where('l_file', '=', $tree_id) 3522c685d76SGreg Roach ->where('l_type', '=', 'OBJE') 3532c685d76SGreg Roach ->delete(); 3542c685d76SGreg Roach 3552c685d76SGreg Roach foreach ($old_linked_media as $media_id) { 3562c685d76SGreg Roach $gedrec .= "\n1 OBJE @" . $media_id . '@'; 3572c685d76SGreg Roach } 3582c685d76SGreg Roach } 3592c685d76SGreg Roach 3602c685d76SGreg Roach // Convert inline media into media objects 3612c685d76SGreg Roach $gedrec = $this->convertInlineMedia($tree, $gedrec); 3622c685d76SGreg Roach 3632c685d76SGreg Roach switch ($type) { 3642c685d76SGreg Roach case Individual::RECORD_TYPE: 3652c685d76SGreg Roach $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); 3662c685d76SGreg Roach 3672c685d76SGreg Roach if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { 3682c685d76SGreg Roach $rin = $match[1]; 3692c685d76SGreg Roach } else { 3702c685d76SGreg Roach $rin = $xref; 3712c685d76SGreg Roach } 3722c685d76SGreg Roach 3732c685d76SGreg Roach DB::table('individuals')->insert([ 3742c685d76SGreg Roach 'i_id' => $xref, 3752c685d76SGreg Roach 'i_file' => $tree_id, 3762c685d76SGreg Roach 'i_rin' => $rin, 3772c685d76SGreg Roach 'i_sex' => $record->sex(), 3782c685d76SGreg Roach 'i_gedcom' => $gedrec, 3792c685d76SGreg Roach ]); 3802c685d76SGreg Roach 3812c685d76SGreg Roach // Update the cross-reference/index tables. 3822c685d76SGreg Roach $this->updatePlaces($xref, $tree, $gedrec); 3832c685d76SGreg Roach $this->updateDates($xref, $tree_id, $gedrec); 3842c685d76SGreg Roach $this->updateNames($xref, $tree_id, $record); 3852c685d76SGreg Roach break; 3862c685d76SGreg Roach 3872c685d76SGreg Roach case Family::RECORD_TYPE: 3882c685d76SGreg Roach if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 3892c685d76SGreg Roach $husb = $match[1]; 3902c685d76SGreg Roach } else { 3912c685d76SGreg Roach $husb = ''; 3922c685d76SGreg Roach } 3932c685d76SGreg Roach if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 3942c685d76SGreg Roach $wife = $match[1]; 3952c685d76SGreg Roach } else { 3962c685d76SGreg Roach $wife = ''; 3972c685d76SGreg Roach } 3982c685d76SGreg Roach $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); 3992c685d76SGreg Roach if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { 4002c685d76SGreg Roach $nchi = max($nchi, $match[1]); 4012c685d76SGreg Roach } 4022c685d76SGreg Roach 4032c685d76SGreg Roach DB::table('families')->insert([ 4042c685d76SGreg Roach 'f_id' => $xref, 4052c685d76SGreg Roach 'f_file' => $tree_id, 4062c685d76SGreg Roach 'f_husb' => $husb, 4072c685d76SGreg Roach 'f_wife' => $wife, 4082c685d76SGreg Roach 'f_gedcom' => $gedrec, 4092c685d76SGreg Roach 'f_numchil' => $nchi, 4102c685d76SGreg Roach ]); 4112c685d76SGreg Roach 4122c685d76SGreg Roach // Update the cross-reference/index tables. 4132c685d76SGreg Roach $this->updatePlaces($xref, $tree, $gedrec); 4142c685d76SGreg Roach $this->updateDates($xref, $tree_id, $gedrec); 4152c685d76SGreg Roach break; 4162c685d76SGreg Roach 4172c685d76SGreg Roach case Source::RECORD_TYPE: 4182c685d76SGreg Roach if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { 4192c685d76SGreg Roach $name = $match[1]; 4202c685d76SGreg Roach } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { 4212c685d76SGreg Roach $name = $match[1]; 4222c685d76SGreg Roach } else { 4232c685d76SGreg Roach $name = $xref; 4242c685d76SGreg Roach } 4252c685d76SGreg Roach 4262c685d76SGreg Roach DB::table('sources')->insert([ 4272c685d76SGreg Roach 's_id' => $xref, 4282c685d76SGreg Roach 's_file' => $tree_id, 4292c685d76SGreg Roach 's_name' => mb_substr($name, 0, 255), 4302c685d76SGreg Roach 's_gedcom' => $gedrec, 4312c685d76SGreg Roach ]); 4322c685d76SGreg Roach break; 4332c685d76SGreg Roach 4342c685d76SGreg Roach case Repository::RECORD_TYPE: 4352c685d76SGreg Roach case Note::RECORD_TYPE: 4362c685d76SGreg Roach case Submission::RECORD_TYPE: 4372c685d76SGreg Roach case Submitter::RECORD_TYPE: 4382c685d76SGreg Roach case Location::RECORD_TYPE: 4392c685d76SGreg Roach DB::table('other')->insert([ 4402c685d76SGreg Roach 'o_id' => $xref, 4412c685d76SGreg Roach 'o_file' => $tree_id, 4422c685d76SGreg Roach 'o_type' => $type, 4432c685d76SGreg Roach 'o_gedcom' => $gedrec, 4442c685d76SGreg Roach ]); 4452c685d76SGreg Roach break; 4462c685d76SGreg Roach 4472c685d76SGreg Roach case Header::RECORD_TYPE: 4482c685d76SGreg Roach // Force HEAD records to have a creation date. 4492c685d76SGreg Roach if (!str_contains($gedrec, "\n1 DATE ")) { 4502c685d76SGreg Roach $today = strtoupper(date('d M Y')); 4512c685d76SGreg Roach $gedrec .= "\n1 DATE " . $today; 4522c685d76SGreg Roach } 4532c685d76SGreg Roach 4542c685d76SGreg Roach DB::table('other')->insert([ 4552c685d76SGreg Roach 'o_id' => $xref, 4562c685d76SGreg Roach 'o_file' => $tree_id, 4572c685d76SGreg Roach 'o_type' => Header::RECORD_TYPE, 4582c685d76SGreg Roach 'o_gedcom' => $gedrec, 4592c685d76SGreg Roach ]); 4602c685d76SGreg Roach break; 4612c685d76SGreg Roach 4622c685d76SGreg Roach 4632c685d76SGreg Roach case Media::RECORD_TYPE: 4642c685d76SGreg Roach $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); 4652c685d76SGreg Roach 4662c685d76SGreg Roach DB::table('media')->insert([ 4672c685d76SGreg Roach 'm_id' => $xref, 4682c685d76SGreg Roach 'm_file' => $tree_id, 4692c685d76SGreg Roach 'm_gedcom' => $gedrec, 4702c685d76SGreg Roach ]); 4712c685d76SGreg Roach 4722c685d76SGreg Roach foreach ($record->mediaFiles() as $media_file) { 4732c685d76SGreg Roach DB::table('media_file')->insert([ 4742c685d76SGreg Roach 'm_id' => $xref, 4752c685d76SGreg Roach 'm_file' => $tree_id, 4762c685d76SGreg Roach 'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), 4772c685d76SGreg Roach 'multimedia_format' => mb_substr($media_file->format(), 0, 4), 4782c685d76SGreg Roach 'source_media_type' => mb_substr($media_file->type(), 0, 15), 4792c685d76SGreg Roach 'descriptive_title' => mb_substr($media_file->title(), 0, 248), 4802c685d76SGreg Roach ]); 4812c685d76SGreg Roach } 4822c685d76SGreg Roach break; 4832c685d76SGreg Roach 4842c685d76SGreg Roach default: // Custom record types. 4852c685d76SGreg Roach DB::table('other')->insert([ 4862c685d76SGreg Roach 'o_id' => $xref, 4872c685d76SGreg Roach 'o_file' => $tree_id, 4882c685d76SGreg Roach 'o_type' => mb_substr($type, 0, 15), 4892c685d76SGreg Roach 'o_gedcom' => $gedrec, 4902c685d76SGreg Roach ]); 4912c685d76SGreg Roach break; 4922c685d76SGreg Roach } 4932c685d76SGreg Roach 4942c685d76SGreg Roach // Update the cross-reference/index tables. 4952c685d76SGreg Roach $this->updateLinks($xref, $tree_id, $gedrec); 4962c685d76SGreg Roach } 4972c685d76SGreg Roach 4982c685d76SGreg Roach /** 4992c685d76SGreg Roach * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values 5002c685d76SGreg Roach * 5012c685d76SGreg Roach * @param string $gedcom 5022c685d76SGreg Roach */ 5032c685d76SGreg Roach private function importLegacyPlacDefn(string $gedcom): void 5042c685d76SGreg Roach { 5052c685d76SGreg Roach $gedcom_service = new GedcomService(); 5062c685d76SGreg Roach 5072c685d76SGreg Roach if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { 5082c685d76SGreg Roach $place_name = $match[1]; 5092c685d76SGreg Roach } else { 5102c685d76SGreg Roach return; 5112c685d76SGreg Roach } 5122c685d76SGreg Roach 5132c685d76SGreg Roach if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { 5142c685d76SGreg Roach $latitude = $gedcom_service->readLatitude($match[1]); 5152c685d76SGreg Roach } else { 5162c685d76SGreg Roach return; 5172c685d76SGreg Roach } 5182c685d76SGreg Roach 5192c685d76SGreg Roach if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { 5202c685d76SGreg Roach $longitude = $gedcom_service->readLongitude($match[1]); 5212c685d76SGreg Roach } else { 5222c685d76SGreg Roach return; 5232c685d76SGreg Roach } 5242c685d76SGreg Roach 5252c685d76SGreg Roach $location = new PlaceLocation($place_name); 5262c685d76SGreg Roach 5272c685d76SGreg Roach if ($location->latitude() === null && $location->longitude() === null) { 5282c685d76SGreg Roach DB::table('place_location') 5292c685d76SGreg Roach ->where('id', '=', $location->id()) 5302c685d76SGreg Roach ->update([ 5312c685d76SGreg Roach 'latitude' => $latitude, 5322c685d76SGreg Roach 'longitude' => $longitude, 5332c685d76SGreg Roach ]); 5342c685d76SGreg Roach } 5352c685d76SGreg Roach } 5362c685d76SGreg Roach 5372c685d76SGreg Roach /** 5382c685d76SGreg Roach * Legacy Family Tree software generates _PLAC records containing LAT/LONG values 5392c685d76SGreg Roach * 5402c685d76SGreg Roach * @param string $gedcom 5412c685d76SGreg Roach */ 5422c685d76SGreg Roach private function importTNGPlac(string $gedcom): void 5432c685d76SGreg Roach { 5442c685d76SGreg Roach if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { 5452c685d76SGreg Roach $place_name = $match[1]; 5462c685d76SGreg Roach } else { 5472c685d76SGreg Roach return; 5482c685d76SGreg Roach } 5492c685d76SGreg Roach 5502c685d76SGreg Roach if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { 5512c685d76SGreg Roach $latitude = (float) $match[1]; 5522c685d76SGreg Roach } else { 5532c685d76SGreg Roach return; 5542c685d76SGreg Roach } 5552c685d76SGreg Roach 5562c685d76SGreg Roach if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { 5572c685d76SGreg Roach $longitude = (float) $match[1]; 5582c685d76SGreg Roach } else { 5592c685d76SGreg Roach return; 5602c685d76SGreg Roach } 5612c685d76SGreg Roach 5622c685d76SGreg Roach $location = new PlaceLocation($place_name); 5632c685d76SGreg Roach 5642c685d76SGreg Roach if ($location->latitude() === null && $location->longitude() === null) { 5652c685d76SGreg Roach DB::table('place_location') 5662c685d76SGreg Roach ->where('id', '=', $location->id()) 5672c685d76SGreg Roach ->update([ 5682c685d76SGreg Roach 'latitude' => $latitude, 5692c685d76SGreg Roach 'longitude' => $longitude, 5702c685d76SGreg Roach ]); 5712c685d76SGreg Roach } 5722c685d76SGreg Roach } 5732c685d76SGreg Roach 5742c685d76SGreg Roach /** 5752c685d76SGreg Roach * Extract all level 2 places from the given record and insert them into the places table 5762c685d76SGreg Roach * 5772c685d76SGreg Roach * @param string $xref 5782c685d76SGreg Roach * @param Tree $tree 5792c685d76SGreg Roach * @param string $gedrec 5802c685d76SGreg Roach * 5812c685d76SGreg Roach * @return void 5822c685d76SGreg Roach */ 5832c685d76SGreg Roach public function updatePlaces(string $xref, Tree $tree, string $gedrec): void 5842c685d76SGreg Roach { 5852c685d76SGreg Roach // Insert all new rows together 5862c685d76SGreg Roach $rows = []; 5872c685d76SGreg Roach 5882c685d76SGreg Roach preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); 5892c685d76SGreg Roach 5902c685d76SGreg Roach $places = array_unique($matches[1]); 5912c685d76SGreg Roach 5922c685d76SGreg Roach foreach ($places as $place_name) { 5932c685d76SGreg Roach $place = new Place($place_name, $tree); 5942c685d76SGreg Roach 5952c685d76SGreg Roach // Calling Place::id() will create the entry in the database, if it doesn't already exist. 5962c685d76SGreg Roach while ($place->id() !== 0) { 5972c685d76SGreg Roach $rows[] = [ 5982c685d76SGreg Roach 'pl_p_id' => $place->id(), 5992c685d76SGreg Roach 'pl_gid' => $xref, 6002c685d76SGreg Roach 'pl_file' => $tree->id(), 6012c685d76SGreg Roach ]; 6022c685d76SGreg Roach 6032c685d76SGreg Roach $place = $place->parent(); 6042c685d76SGreg Roach } 6052c685d76SGreg Roach } 6062c685d76SGreg Roach 6072c685d76SGreg Roach // array_unique doesn't work with arrays of arrays 6082c685d76SGreg Roach $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 6092c685d76SGreg Roach 6102c685d76SGreg Roach // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. 6112c685d76SGreg Roach foreach (array_chunk($rows, 20000) as $chunk) { 6122c685d76SGreg Roach DB::table('placelinks')->insert($chunk); 6132c685d76SGreg Roach } 6142c685d76SGreg Roach } 6152c685d76SGreg Roach 6162c685d76SGreg Roach /** 6172c685d76SGreg Roach * Extract all the dates from the given record and insert them into the database. 6182c685d76SGreg Roach * 6192c685d76SGreg Roach * @param string $xref 6202c685d76SGreg Roach * @param int $ged_id 6212c685d76SGreg Roach * @param string $gedrec 6222c685d76SGreg Roach * 6232c685d76SGreg Roach * @return void 6242c685d76SGreg Roach */ 6252c685d76SGreg Roach private function updateDates(string $xref, int $ged_id, string $gedrec): void 6262c685d76SGreg Roach { 6272c685d76SGreg Roach // Insert all new rows together 6282c685d76SGreg Roach $rows = []; 6292c685d76SGreg Roach 6302c685d76SGreg Roach preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER); 6312c685d76SGreg Roach 6322c685d76SGreg Roach foreach ($matches as $match) { 6332c685d76SGreg Roach $fact = $match[1]; 6342c685d76SGreg Roach $date = new Date($match[2]); 6352c685d76SGreg Roach $rows[] = [ 6362c685d76SGreg Roach 'd_day' => $date->minimumDate()->day, 6372c685d76SGreg Roach 'd_month' => $date->minimumDate()->format('%O'), 6382c685d76SGreg Roach 'd_mon' => $date->minimumDate()->month, 6392c685d76SGreg Roach 'd_year' => $date->minimumDate()->year, 6402c685d76SGreg Roach 'd_julianday1' => $date->minimumDate()->minimumJulianDay(), 6412c685d76SGreg Roach 'd_julianday2' => $date->minimumDate()->maximumJulianDay(), 6422c685d76SGreg Roach 'd_fact' => $fact, 6432c685d76SGreg Roach 'd_gid' => $xref, 6442c685d76SGreg Roach 'd_file' => $ged_id, 6452c685d76SGreg Roach 'd_type' => $date->minimumDate()->format('%@'), 6462c685d76SGreg Roach ]; 6472c685d76SGreg Roach 6482c685d76SGreg Roach $rows[] = [ 6492c685d76SGreg Roach 'd_day' => $date->maximumDate()->day, 6502c685d76SGreg Roach 'd_month' => $date->maximumDate()->format('%O'), 6512c685d76SGreg Roach 'd_mon' => $date->maximumDate()->month, 6522c685d76SGreg Roach 'd_year' => $date->maximumDate()->year, 6532c685d76SGreg Roach 'd_julianday1' => $date->maximumDate()->minimumJulianDay(), 6542c685d76SGreg Roach 'd_julianday2' => $date->maximumDate()->maximumJulianDay(), 6552c685d76SGreg Roach 'd_fact' => $fact, 6562c685d76SGreg Roach 'd_gid' => $xref, 6572c685d76SGreg Roach 'd_file' => $ged_id, 6582c685d76SGreg Roach 'd_type' => $date->minimumDate()->format('%@'), 6592c685d76SGreg Roach ]; 6602c685d76SGreg Roach } 6612c685d76SGreg Roach 6622c685d76SGreg Roach // array_unique doesn't work with arrays of arrays 6632c685d76SGreg Roach $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 6642c685d76SGreg Roach 6652c685d76SGreg Roach DB::table('dates')->insert($rows); 6662c685d76SGreg Roach } 6672c685d76SGreg Roach 6682c685d76SGreg Roach /** 6692c685d76SGreg Roach * Extract all the links from the given record and insert them into the database 6702c685d76SGreg Roach * 6712c685d76SGreg Roach * @param string $xref 6722c685d76SGreg Roach * @param int $ged_id 6732c685d76SGreg Roach * @param string $gedrec 6742c685d76SGreg Roach * 6752c685d76SGreg Roach * @return void 6762c685d76SGreg Roach */ 6772c685d76SGreg Roach private function updateLinks(string $xref, int $ged_id, string $gedrec): void 6782c685d76SGreg Roach { 6792c685d76SGreg Roach // Insert all new rows together 6802c685d76SGreg Roach $rows = []; 6812c685d76SGreg Roach 6822c685d76SGreg Roach preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER); 6832c685d76SGreg Roach 6842c685d76SGreg Roach foreach ($matches as $match) { 685*3793e425SGreg Roach // Some applications (e.g. GenoPro) create links longer than 15 characters. 686*3793e425SGreg Roach $link = mb_substr($match[1], 15); 687*3793e425SGreg Roach 6882c685d76SGreg Roach // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@" 689*3793e425SGreg Roach $rows[$link . strtoupper($match[2])] = [ 6902c685d76SGreg Roach 'l_from' => $xref, 6912c685d76SGreg Roach 'l_to' => $match[2], 692*3793e425SGreg Roach 'l_type' => $link, 6932c685d76SGreg Roach 'l_file' => $ged_id, 6942c685d76SGreg Roach ]; 6952c685d76SGreg Roach } 6962c685d76SGreg Roach 6972c685d76SGreg Roach DB::table('link')->insert($rows); 6982c685d76SGreg Roach } 6992c685d76SGreg Roach 7002c685d76SGreg Roach /** 7012c685d76SGreg Roach * Extract all the names from the given record and insert them into the database. 7022c685d76SGreg Roach * 7032c685d76SGreg Roach * @param string $xref 7042c685d76SGreg Roach * @param int $ged_id 7052c685d76SGreg Roach * @param Individual $record 7062c685d76SGreg Roach * 7072c685d76SGreg Roach * @return void 7082c685d76SGreg Roach */ 7092c685d76SGreg Roach private function updateNames(string $xref, int $ged_id, Individual $record): void 7102c685d76SGreg Roach { 7112c685d76SGreg Roach // Insert all new rows together 7122c685d76SGreg Roach $rows = []; 7132c685d76SGreg Roach 7142c685d76SGreg Roach foreach ($record->getAllNames() as $n => $name) { 7152c685d76SGreg Roach if ($name['givn'] === Individual::PRAENOMEN_NESCIO) { 7162c685d76SGreg Roach $soundex_givn_std = null; 7172c685d76SGreg Roach $soundex_givn_dm = null; 7182c685d76SGreg Roach } else { 7192c685d76SGreg Roach $soundex_givn_std = Soundex::russell($name['givn']); 7202c685d76SGreg Roach $soundex_givn_dm = Soundex::daitchMokotoff($name['givn']); 7212c685d76SGreg Roach } 7222c685d76SGreg Roach 7232c685d76SGreg Roach if ($name['surn'] === Individual::NOMEN_NESCIO) { 7242c685d76SGreg Roach $soundex_surn_std = null; 7252c685d76SGreg Roach $soundex_surn_dm = null; 7262c685d76SGreg Roach } else { 7272c685d76SGreg Roach $soundex_surn_std = Soundex::russell($name['surname']); 7282c685d76SGreg Roach $soundex_surn_dm = Soundex::daitchMokotoff($name['surname']); 7292c685d76SGreg Roach } 7302c685d76SGreg Roach 7312c685d76SGreg Roach $rows[] = [ 7322c685d76SGreg Roach 'n_file' => $ged_id, 7332c685d76SGreg Roach 'n_id' => $xref, 7342c685d76SGreg Roach 'n_num' => $n, 7352c685d76SGreg Roach 'n_type' => $name['type'], 7362c685d76SGreg Roach 'n_sort' => mb_substr($name['sort'], 0, 255), 7372c685d76SGreg Roach 'n_full' => mb_substr($name['fullNN'], 0, 255), 7382c685d76SGreg Roach 'n_surname' => mb_substr($name['surname'], 0, 255), 7392c685d76SGreg Roach 'n_surn' => mb_substr($name['surn'], 0, 255), 7402c685d76SGreg Roach 'n_givn' => mb_substr($name['givn'], 0, 255), 7412c685d76SGreg Roach 'n_soundex_givn_std' => $soundex_givn_std, 7422c685d76SGreg Roach 'n_soundex_surn_std' => $soundex_surn_std, 7432c685d76SGreg Roach 'n_soundex_givn_dm' => $soundex_givn_dm, 7442c685d76SGreg Roach 'n_soundex_surn_dm' => $soundex_surn_dm, 7452c685d76SGreg Roach ]; 7462c685d76SGreg Roach } 7472c685d76SGreg Roach 7482c685d76SGreg Roach DB::table('name')->insert($rows); 7492c685d76SGreg Roach } 7502c685d76SGreg Roach 7512c685d76SGreg Roach /** 7522c685d76SGreg Roach * Extract inline media data, and convert to media objects. 7532c685d76SGreg Roach * 7542c685d76SGreg Roach * @param Tree $tree 7552c685d76SGreg Roach * @param string $gedcom 7562c685d76SGreg Roach * 7572c685d76SGreg Roach * @return string 7582c685d76SGreg Roach */ 7592c685d76SGreg Roach private function convertInlineMedia(Tree $tree, string $gedcom): string 7602c685d76SGreg Roach { 7612c685d76SGreg Roach while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) { 7622c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7632c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n1 OBJE @" . $xref . '@']); 7642c685d76SGreg Roach } 7652c685d76SGreg Roach while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) { 7662c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7672c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n2 OBJE @" . $xref . '@']); 7682c685d76SGreg Roach } 7692c685d76SGreg Roach while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) { 7702c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7712c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n3 OBJE @" . $xref . '@']); 7722c685d76SGreg Roach } 7732c685d76SGreg Roach 7742c685d76SGreg Roach return $gedcom; 7752c685d76SGreg Roach } 7762c685d76SGreg Roach 7772c685d76SGreg Roach /** 7782c685d76SGreg Roach * Create a new media object, from inline media data. 7792c685d76SGreg Roach * 7802c685d76SGreg Roach * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL 7812c685d76SGreg Roach * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL 7822c685d76SGreg Roach * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI 7832c685d76SGreg Roach * 7842c685d76SGreg Roach * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE 7852c685d76SGreg Roach * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL 7862c685d76SGreg Roach * 7872c685d76SGreg Roach * @param string $gedcom 7882c685d76SGreg Roach * @param Tree $tree 7892c685d76SGreg Roach * 7902c685d76SGreg Roach * @return string 7912c685d76SGreg Roach */ 7922c685d76SGreg Roach private function createMediaObject(string $gedcom, Tree $tree): string 7932c685d76SGreg Roach { 7942c685d76SGreg Roach preg_match('/\n\d FILE (.+)/', $gedcom, $match); 7952c685d76SGreg Roach $file = $match[1] ?? ''; 7962c685d76SGreg Roach 7972c685d76SGreg Roach preg_match('/\n\d TITL (.+)/', $gedcom, $match); 7982c685d76SGreg Roach $title = $match[1] ?? ''; 7992c685d76SGreg Roach 8002c685d76SGreg Roach preg_match('/\n\d FORM (.+)/', $gedcom, $match); 8012c685d76SGreg Roach $format = $match[1] ?? ''; 8022c685d76SGreg Roach 8032c685d76SGreg Roach preg_match('/\n\d MEDI (.+)/', $gedcom, $match); 8042c685d76SGreg Roach $media = $match[1] ?? ''; 8052c685d76SGreg Roach 8062c685d76SGreg Roach preg_match('/\n\d _SCBK (.+)/', $gedcom, $match); 8072c685d76SGreg Roach $scrapbook = $match[1] ?? ''; 8082c685d76SGreg Roach 8092c685d76SGreg Roach preg_match('/\n\d _PRIM (.+)/', $gedcom, $match); 8102c685d76SGreg Roach $primary = $match[1] ?? ''; 8112c685d76SGreg Roach 8122c685d76SGreg Roach preg_match('/\n\d _TYPE (.+)/', $gedcom, $match); 8132c685d76SGreg Roach if ($media === '') { 8142c685d76SGreg Roach // Legacy uses _TYPE instead of MEDI 8152c685d76SGreg Roach $media = $match[1] ?? ''; 8162c685d76SGreg Roach $type = ''; 8172c685d76SGreg Roach } else { 8182c685d76SGreg Roach $type = $match[1] ?? ''; 8192c685d76SGreg Roach } 8202c685d76SGreg Roach 8212c685d76SGreg Roach preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches); 8222c685d76SGreg Roach $notes = $matches[1] ?? []; 8232c685d76SGreg Roach 8242c685d76SGreg Roach // Have we already created a media object with the same title/filename? 8252c685d76SGreg Roach $xref = DB::table('media_file') 8262c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 8272c685d76SGreg Roach ->where('descriptive_title', '=', mb_substr($title, 0, 248)) 8282c685d76SGreg Roach ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248)) 8292c685d76SGreg Roach ->value('m_id'); 8302c685d76SGreg Roach 8312c685d76SGreg Roach if ($xref === null) { 8322c685d76SGreg Roach $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE); 8332c685d76SGreg Roach 8342c685d76SGreg Roach // convert to a media-object 8352c685d76SGreg Roach $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file; 8362c685d76SGreg Roach 8372c685d76SGreg Roach if ($format !== '') { 8382c685d76SGreg Roach $gedcom .= "\n2 FORM " . $format; 8392c685d76SGreg Roach 8402c685d76SGreg Roach if ($media !== '') { 8412c685d76SGreg Roach $gedcom .= "\n3 TYPE " . $media; 8422c685d76SGreg Roach } 8432c685d76SGreg Roach } 8442c685d76SGreg Roach 8452c685d76SGreg Roach if ($title !== '') { 8462c685d76SGreg Roach $gedcom .= "\n3 TITL " . $title; 8472c685d76SGreg Roach } 8482c685d76SGreg Roach 8492c685d76SGreg Roach if ($scrapbook !== '') { 8502c685d76SGreg Roach $gedcom .= "\n1 _SCBK " . $scrapbook; 8512c685d76SGreg Roach } 8522c685d76SGreg Roach 8532c685d76SGreg Roach if ($primary !== '') { 8542c685d76SGreg Roach $gedcom .= "\n1 _PRIM " . $primary; 8552c685d76SGreg Roach } 8562c685d76SGreg Roach 8572c685d76SGreg Roach if ($type !== '') { 8582c685d76SGreg Roach $gedcom .= "\n1 _TYPE " . $type; 8592c685d76SGreg Roach } 8602c685d76SGreg Roach 8612c685d76SGreg Roach foreach ($notes as $note) { 8622c685d76SGreg Roach $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]); 8632c685d76SGreg Roach } 8642c685d76SGreg Roach 8652c685d76SGreg Roach DB::table('media')->insert([ 8662c685d76SGreg Roach 'm_id' => $xref, 8672c685d76SGreg Roach 'm_file' => $tree->id(), 8682c685d76SGreg Roach 'm_gedcom' => $gedcom, 8692c685d76SGreg Roach ]); 8702c685d76SGreg Roach 8712c685d76SGreg Roach DB::table('media_file')->insert([ 8722c685d76SGreg Roach 'm_id' => $xref, 8732c685d76SGreg Roach 'm_file' => $tree->id(), 8742c685d76SGreg Roach 'multimedia_file_refn' => mb_substr($file, 0, 248), 8752c685d76SGreg Roach 'multimedia_format' => mb_substr($format, 0, 4), 8762c685d76SGreg Roach 'source_media_type' => mb_substr($media, 0, 15), 8772c685d76SGreg Roach 'descriptive_title' => mb_substr($title, 0, 248), 8782c685d76SGreg Roach ]); 8792c685d76SGreg Roach } 8802c685d76SGreg Roach 8812c685d76SGreg Roach return $xref; 8822c685d76SGreg Roach } 8832c685d76SGreg Roach 8842c685d76SGreg Roach /** 8852c685d76SGreg Roach * update a record in the database 8862c685d76SGreg Roach * 8872c685d76SGreg Roach * @param string $gedrec 8882c685d76SGreg Roach * @param Tree $tree 8892c685d76SGreg Roach * @param bool $delete 8902c685d76SGreg Roach * 8912c685d76SGreg Roach * @return void 8922c685d76SGreg Roach * @throws GedcomErrorException 8932c685d76SGreg Roach */ 8942c685d76SGreg Roach public function updateRecord(string $gedrec, Tree $tree, bool $delete): void 8952c685d76SGreg Roach { 8962c685d76SGreg Roach if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 8972c685d76SGreg Roach [, $gid, $type] = $match; 8982c685d76SGreg Roach } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) { 8992c685d76SGreg Roach // The HEAD record has no XREF. Any others? 9002c685d76SGreg Roach $gid = $match[1]; 9012c685d76SGreg Roach $type = $match[1]; 9022c685d76SGreg Roach } else { 9032c685d76SGreg Roach throw new GedcomErrorException($gedrec); 9042c685d76SGreg Roach } 9052c685d76SGreg Roach 9062c685d76SGreg Roach // Place links 9072c685d76SGreg Roach DB::table('placelinks') 9082c685d76SGreg Roach ->where('pl_gid', '=', $gid) 9092c685d76SGreg Roach ->where('pl_file', '=', $tree->id()) 9102c685d76SGreg Roach ->delete(); 9112c685d76SGreg Roach 9122c685d76SGreg Roach // Orphaned places. If we're deleting "Westminster, London, England", 9132c685d76SGreg Roach // then we may also need to delete "London, England" and "England". 9142c685d76SGreg Roach do { 9152c685d76SGreg Roach $affected = DB::table('places') 9162c685d76SGreg Roach ->leftJoin('placelinks', function (JoinClause $join): void { 9172c685d76SGreg Roach $join 9182c685d76SGreg Roach ->on('p_id', '=', 'pl_p_id') 9192c685d76SGreg Roach ->on('p_file', '=', 'pl_file'); 9202c685d76SGreg Roach }) 9212c685d76SGreg Roach ->whereNull('pl_p_id') 9222c685d76SGreg Roach ->delete(); 9232c685d76SGreg Roach } while ($affected > 0); 9242c685d76SGreg Roach 9252c685d76SGreg Roach DB::table('dates') 9262c685d76SGreg Roach ->where('d_gid', '=', $gid) 9272c685d76SGreg Roach ->where('d_file', '=', $tree->id()) 9282c685d76SGreg Roach ->delete(); 9292c685d76SGreg Roach 9302c685d76SGreg Roach DB::table('name') 9312c685d76SGreg Roach ->where('n_id', '=', $gid) 9322c685d76SGreg Roach ->where('n_file', '=', $tree->id()) 9332c685d76SGreg Roach ->delete(); 9342c685d76SGreg Roach 9352c685d76SGreg Roach DB::table('link') 9362c685d76SGreg Roach ->where('l_from', '=', $gid) 9372c685d76SGreg Roach ->where('l_file', '=', $tree->id()) 9382c685d76SGreg Roach ->delete(); 9392c685d76SGreg Roach 9402c685d76SGreg Roach switch ($type) { 9412c685d76SGreg Roach case Individual::RECORD_TYPE: 9422c685d76SGreg Roach DB::table('individuals') 9432c685d76SGreg Roach ->where('i_id', '=', $gid) 9442c685d76SGreg Roach ->where('i_file', '=', $tree->id()) 9452c685d76SGreg Roach ->delete(); 9462c685d76SGreg Roach break; 9472c685d76SGreg Roach 9482c685d76SGreg Roach case Family::RECORD_TYPE: 9492c685d76SGreg Roach DB::table('families') 9502c685d76SGreg Roach ->where('f_id', '=', $gid) 9512c685d76SGreg Roach ->where('f_file', '=', $tree->id()) 9522c685d76SGreg Roach ->delete(); 9532c685d76SGreg Roach break; 9542c685d76SGreg Roach 9552c685d76SGreg Roach case Source::RECORD_TYPE: 9562c685d76SGreg Roach DB::table('sources') 9572c685d76SGreg Roach ->where('s_id', '=', $gid) 9582c685d76SGreg Roach ->where('s_file', '=', $tree->id()) 9592c685d76SGreg Roach ->delete(); 9602c685d76SGreg Roach break; 9612c685d76SGreg Roach 9622c685d76SGreg Roach case Media::RECORD_TYPE: 9632c685d76SGreg Roach DB::table('media_file') 9642c685d76SGreg Roach ->where('m_id', '=', $gid) 9652c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 9662c685d76SGreg Roach ->delete(); 9672c685d76SGreg Roach 9682c685d76SGreg Roach DB::table('media') 9692c685d76SGreg Roach ->where('m_id', '=', $gid) 9702c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 9712c685d76SGreg Roach ->delete(); 9722c685d76SGreg Roach break; 9732c685d76SGreg Roach 9742c685d76SGreg Roach default: 9752c685d76SGreg Roach DB::table('other') 9762c685d76SGreg Roach ->where('o_id', '=', $gid) 9772c685d76SGreg Roach ->where('o_file', '=', $tree->id()) 9782c685d76SGreg Roach ->delete(); 9792c685d76SGreg Roach break; 9802c685d76SGreg Roach } 9812c685d76SGreg Roach 9822c685d76SGreg Roach if (!$delete) { 9832c685d76SGreg Roach $this->importRecord($gedrec, $tree, true); 9842c685d76SGreg Roach } 9852c685d76SGreg Roach } 9862c685d76SGreg Roach} 987