12c685d76SGreg Roach<?php 22c685d76SGreg Roach 32c685d76SGreg Roach/** 42c685d76SGreg Roach * webtrees: online genealogy 5*5bfc6897SGreg Roach * Copyright (C) 2022 webtrees development team 62c685d76SGreg Roach * This program is free software: you can redistribute it and/or modify 72c685d76SGreg Roach * it under the terms of the GNU General Public License as published by 82c685d76SGreg Roach * the Free Software Foundation, either version 3 of the License, or 92c685d76SGreg Roach * (at your option) any later version. 102c685d76SGreg Roach * This program is distributed in the hope that it will be useful, 112c685d76SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 122c685d76SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 132c685d76SGreg Roach * GNU General Public License for more details. 142c685d76SGreg Roach * You should have received a copy of the GNU General Public License 152c685d76SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 162c685d76SGreg Roach */ 172c685d76SGreg Roach 182c685d76SGreg Roachdeclare(strict_types=1); 192c685d76SGreg Roach 202c685d76SGreg Roachnamespace Fisharebest\Webtrees\Services; 212c685d76SGreg Roach 222c685d76SGreg Roachuse Fisharebest\Webtrees\Date; 232c685d76SGreg Roachuse Fisharebest\Webtrees\Elements\UnknownElement; 242c685d76SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException; 252c685d76SGreg Roachuse Fisharebest\Webtrees\Family; 262c685d76SGreg Roachuse Fisharebest\Webtrees\Gedcom; 272c685d76SGreg Roachuse Fisharebest\Webtrees\Header; 282c685d76SGreg Roachuse Fisharebest\Webtrees\Individual; 292c685d76SGreg Roachuse Fisharebest\Webtrees\Location; 302c685d76SGreg Roachuse Fisharebest\Webtrees\Media; 312c685d76SGreg Roachuse Fisharebest\Webtrees\Note; 322c685d76SGreg Roachuse Fisharebest\Webtrees\Place; 332c685d76SGreg Roachuse Fisharebest\Webtrees\PlaceLocation; 342c685d76SGreg Roachuse Fisharebest\Webtrees\Registry; 352c685d76SGreg Roachuse Fisharebest\Webtrees\Repository; 362c685d76SGreg Roachuse Fisharebest\Webtrees\Soundex; 372c685d76SGreg Roachuse Fisharebest\Webtrees\Source; 382c685d76SGreg Roachuse Fisharebest\Webtrees\Submission; 392c685d76SGreg Roachuse Fisharebest\Webtrees\Submitter; 402c685d76SGreg Roachuse Fisharebest\Webtrees\Tree; 412c685d76SGreg Roachuse Illuminate\Database\Capsule\Manager as DB; 422c685d76SGreg Roachuse Illuminate\Database\Query\JoinClause; 432c685d76SGreg Roach 442c685d76SGreg Roachuse function app; 452c685d76SGreg Roachuse function array_chunk; 462c685d76SGreg Roachuse function array_intersect_key; 472c685d76SGreg Roachuse function array_map; 482c685d76SGreg Roachuse function array_unique; 492c685d76SGreg Roachuse function assert; 502c685d76SGreg Roachuse function date; 512c685d76SGreg Roachuse function explode; 522c685d76SGreg Roachuse function max; 532c685d76SGreg Roachuse function mb_substr; 542c685d76SGreg Roachuse function preg_match; 552c685d76SGreg Roachuse function preg_match_all; 562c685d76SGreg Roachuse function preg_replace; 572c685d76SGreg Roachuse function round; 582c685d76SGreg Roachuse function str_contains; 592c685d76SGreg Roachuse function str_replace; 602c685d76SGreg Roachuse function str_starts_with; 612c685d76SGreg Roachuse function strlen; 622c685d76SGreg Roachuse function strtolower; 632c685d76SGreg Roachuse function strtoupper; 642c685d76SGreg Roachuse function strtr; 652c685d76SGreg Roachuse function substr; 662c685d76SGreg Roachuse function trim; 672c685d76SGreg Roach 682c685d76SGreg Roachuse const PREG_SET_ORDER; 692c685d76SGreg Roach 702c685d76SGreg Roach/** 712c685d76SGreg Roach * Class GedcomImportService - import GEDCOM data 722c685d76SGreg Roach */ 732c685d76SGreg Roachclass GedcomImportService 742c685d76SGreg Roach{ 752c685d76SGreg Roach /** 762c685d76SGreg Roach * Tidy up a gedcom record on import, so that we can access it consistently/efficiently. 772c685d76SGreg Roach * 782c685d76SGreg Roach * @param string $rec 792c685d76SGreg Roach * @param Tree $tree 802c685d76SGreg Roach * 812c685d76SGreg Roach * @return string 822c685d76SGreg Roach */ 832c685d76SGreg Roach private function reformatRecord(string $rec, Tree $tree): string 842c685d76SGreg Roach { 852c685d76SGreg Roach $gedcom_service = app(GedcomService::class); 862c685d76SGreg Roach assert($gedcom_service instanceof GedcomService); 872c685d76SGreg Roach 882c685d76SGreg Roach // Strip out mac/msdos line endings 892c685d76SGreg Roach $rec = preg_replace("/[\r\n]+/", "\n", $rec); 902c685d76SGreg Roach 912c685d76SGreg Roach // Extract lines from the record; lines consist of: level + optional xref + tag + optional data 922c685d76SGreg Roach $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER); 932c685d76SGreg Roach 942c685d76SGreg Roach // Process the record line-by-line 952c685d76SGreg Roach $newrec = ''; 962c685d76SGreg Roach foreach ($matches as $n => $match) { 972c685d76SGreg Roach [, $level, $xref, $tag, $data] = $match; 982c685d76SGreg Roach 992c685d76SGreg Roach $tag = $gedcom_service->canonicalTag($tag); 1002c685d76SGreg Roach 1012c685d76SGreg Roach switch ($tag) { 1022c685d76SGreg Roach case 'AFN': 1032c685d76SGreg Roach // AFN values are upper case 1042c685d76SGreg Roach $data = strtoupper($data); 1052c685d76SGreg Roach break; 1062c685d76SGreg Roach case 'DATE': 1072c685d76SGreg Roach // Preserve text from INT dates 1082c685d76SGreg Roach if (str_contains($data, '(')) { 1092c685d76SGreg Roach [$date, $text] = explode('(', $data, 2); 1102c685d76SGreg Roach $text = ' (' . $text; 1112c685d76SGreg Roach } else { 1122c685d76SGreg Roach $date = $data; 1132c685d76SGreg Roach $text = ''; 1142c685d76SGreg Roach } 1152c685d76SGreg Roach // Capitals 1162c685d76SGreg Roach $date = strtoupper($date); 1172c685d76SGreg Roach // Temporarily add leading/trailing spaces, to allow efficient matching below 1182c685d76SGreg Roach $date = ' ' . $date . ' '; 1192c685d76SGreg Roach // Ensure space digits and letters 1202c685d76SGreg Roach $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date); 1212c685d76SGreg Roach $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date); 1222c685d76SGreg Roach // Ensure space before/after calendar escapes 1232c685d76SGreg Roach $date = preg_replace('/@#[^@]+@/', ' $0 ', $date); 1242c685d76SGreg Roach // "BET." => "BET" 1252c685d76SGreg Roach $date = preg_replace('/(\w\w)\./', '$1', $date); 1262c685d76SGreg Roach // "CIR" => "ABT" 1272c685d76SGreg Roach $date = str_replace(' CIR ', ' ABT ', $date); 1282c685d76SGreg Roach $date = str_replace(' APX ', ' ABT ', $date); 1292c685d76SGreg Roach // B.C. => BC (temporarily, to allow easier handling of ".") 1302c685d76SGreg Roach $date = str_replace(' B.C. ', ' BC ', $date); 1312c685d76SGreg Roach // TMG uses "EITHER X OR Y" 1322c685d76SGreg Roach $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date); 1332c685d76SGreg Roach // "BET X - Y " => "BET X AND Y" 1342c685d76SGreg Roach $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date); 1352c685d76SGreg Roach $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date); 1362c685d76SGreg Roach // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y" 1372c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date); 1382c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date); 1392c685d76SGreg Roach // "@#ESC@ AFT X" => "AFT @#ESC@ X" 1402c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date); 1412c685d76SGreg Roach // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900" 1422c685d76SGreg Roach // (don't change "/" - it is used in NS/OS dates) 1432c685d76SGreg Roach $date = preg_replace('/[.,:;-]/', ' ', $date); 1442c685d76SGreg Roach // BC => B.C. 1452c685d76SGreg Roach $date = str_replace(' BC ', ' B.C. ', $date); 1462c685d76SGreg Roach // Append the "INT" text 1472c685d76SGreg Roach $data = $date . $text; 1482c685d76SGreg Roach break; 1492c685d76SGreg Roach case '_FILE': 1502c685d76SGreg Roach $tag = 'FILE'; 1512c685d76SGreg Roach break; 1522c685d76SGreg Roach case 'FORM': 1532c685d76SGreg Roach // Consistent commas 1542c685d76SGreg Roach $data = preg_replace('/ *, */', ', ', $data); 1552c685d76SGreg Roach break; 1562c685d76SGreg Roach case 'HEAD': 1572c685d76SGreg Roach // HEAD records don't have an XREF or DATA 1582c685d76SGreg Roach if ($level === '0') { 1592c685d76SGreg Roach $xref = ''; 1602c685d76SGreg Roach $data = ''; 1612c685d76SGreg Roach } 1622c685d76SGreg Roach break; 1632c685d76SGreg Roach case 'NAME': 1642c685d76SGreg Roach // Tidy up non-printing characters 1652c685d76SGreg Roach $data = preg_replace('/ +/', ' ', trim($data)); 1662c685d76SGreg Roach break; 1672c685d76SGreg Roach case 'PEDI': 1682c685d76SGreg Roach // PEDI values are lower case 1692c685d76SGreg Roach $data = strtolower($data); 1702c685d76SGreg Roach break; 1712c685d76SGreg Roach case 'PLAC': 1722c685d76SGreg Roach // Consistent commas 1732c685d76SGreg Roach $data = preg_replace('/ *[,,،] */u', ', ', $data); 1742c685d76SGreg Roach // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W 1752c685d76SGreg Roach if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match)) { 1762c685d76SGreg Roach $data = 1772c685d76SGreg Roach $match[1] . "\n" . 1782c685d76SGreg Roach ($level + 1) . " MAP\n" . 1792c685d76SGreg Roach ($level + 2) . ' LATI ' . ($match[5] . round($match[2] + ($match[3] / 60) + ($match[4] / 3600), 4)) . "\n" . 1802c685d76SGreg Roach ($level + 2) . ' LONG ' . ($match[9] . round($match[6] + ($match[7] / 60) + ($match[8] / 3600), 4)); 1812c685d76SGreg Roach } 1822c685d76SGreg Roach break; 1832c685d76SGreg Roach case 'RESN': 1842c685d76SGreg Roach // RESN values are lower case (confidential, privacy, locked, none) 1852c685d76SGreg Roach $data = strtolower($data); 1862c685d76SGreg Roach if ($data === 'invisible') { 1872c685d76SGreg Roach $data = 'confidential'; // From old versions of Legacy. 1882c685d76SGreg Roach } 1892c685d76SGreg Roach break; 1902c685d76SGreg Roach case 'SEX': 1912c685d76SGreg Roach $data = strtoupper($data); 1922c685d76SGreg Roach break; 1932c685d76SGreg Roach case 'STAT': 1942c685d76SGreg Roach if ($data === 'CANCELLED') { 1952c685d76SGreg Roach // PhpGedView mis-spells this tag - correct it. 1962c685d76SGreg Roach $data = 'CANCELED'; 1972c685d76SGreg Roach } 1982c685d76SGreg Roach break; 1992c685d76SGreg Roach case 'TEMP': 2002c685d76SGreg Roach // Temple codes are upper case 2012c685d76SGreg Roach $data = strtoupper($data); 2022c685d76SGreg Roach break; 2032c685d76SGreg Roach case 'TRLR': 2042c685d76SGreg Roach // TRLR records don't have an XREF or DATA 2052c685d76SGreg Roach if ($level === '0') { 2062c685d76SGreg Roach $xref = ''; 2072c685d76SGreg Roach $data = ''; 2082c685d76SGreg Roach } 2092c685d76SGreg Roach break; 2102c685d76SGreg Roach } 2112c685d76SGreg Roach // Suppress "Y", for facts/events with a DATE or PLAC 2122c685d76SGreg Roach if ($data === 'y') { 2132c685d76SGreg Roach $data = 'Y'; 2142c685d76SGreg Roach } 2152c685d76SGreg Roach if ($level === '1' && $data === 'Y') { 2162c685d76SGreg Roach for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) { 2172c685d76SGreg Roach if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') { 2182c685d76SGreg Roach $data = ''; 2192c685d76SGreg Roach break; 2202c685d76SGreg Roach } 2212c685d76SGreg Roach } 2222c685d76SGreg Roach } 2232c685d76SGreg Roach // Reassemble components back into a single line 2242c685d76SGreg Roach switch ($tag) { 2252c685d76SGreg Roach default: 2262c685d76SGreg Roach // Remove tabs and multiple/leading/trailing spaces 2272c685d76SGreg Roach $data = strtr($data, ["\t" => ' ']); 2282c685d76SGreg Roach $data = trim($data, ' '); 2292c685d76SGreg Roach while (str_contains($data, ' ')) { 2302c685d76SGreg Roach $data = strtr($data, [' ' => ' ']); 2312c685d76SGreg Roach } 2322c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 2332c685d76SGreg Roach break; 2342c685d76SGreg Roach case 'NOTE': 2352c685d76SGreg Roach case 'TEXT': 2362c685d76SGreg Roach case 'DATA': 2372c685d76SGreg Roach case 'CONT': 2382c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 2392c685d76SGreg Roach break; 2402c685d76SGreg Roach case 'FILE': 2412c685d76SGreg Roach // Strip off the user-defined path prefix 2422c685d76SGreg Roach $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH'); 2432c685d76SGreg Roach if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) { 2442c685d76SGreg Roach $data = substr($data, strlen($GEDCOM_MEDIA_PATH)); 2452c685d76SGreg Roach } 2462c685d76SGreg Roach // convert backslashes in filenames to forward slashes 2472c685d76SGreg Roach $data = preg_replace("/\\\\/", '/', $data); 2482c685d76SGreg Roach 2492c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 2502c685d76SGreg Roach break; 2512c685d76SGreg Roach case 'CONC': 2522c685d76SGreg Roach // Merge CONC lines, to simplify access later on. 2532c685d76SGreg Roach $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data; 2542c685d76SGreg Roach break; 2552c685d76SGreg Roach } 2562c685d76SGreg Roach } 2572c685d76SGreg Roach 2582c685d76SGreg Roach return $newrec; 2592c685d76SGreg Roach } 2602c685d76SGreg Roach 2612c685d76SGreg Roach /** 2622c685d76SGreg Roach * import record into database 2632c685d76SGreg Roach * this function will parse the given gedcom record and add it to the database 2642c685d76SGreg Roach * 2652c685d76SGreg Roach * @param string $gedrec the raw gedcom record to parse 2662c685d76SGreg Roach * @param Tree $tree import the record into this tree 2672c685d76SGreg Roach * @param bool $update whether this is an updated record that has been accepted 2682c685d76SGreg Roach * 2692c685d76SGreg Roach * @return void 2702c685d76SGreg Roach * @throws GedcomErrorException 2712c685d76SGreg Roach */ 2722c685d76SGreg Roach public function importRecord(string $gedrec, Tree $tree, bool $update): void 2732c685d76SGreg Roach { 2742c685d76SGreg Roach $tree_id = $tree->id(); 2752c685d76SGreg Roach 2762c685d76SGreg Roach // Escaped @ signs (only if importing from file) 2772c685d76SGreg Roach if (!$update) { 2782c685d76SGreg Roach $gedrec = str_replace('@@', '@', $gedrec); 2792c685d76SGreg Roach } 2802c685d76SGreg Roach 2812c685d76SGreg Roach // Standardise gedcom format 2822c685d76SGreg Roach $gedrec = $this->reformatRecord($gedrec, $tree); 2832c685d76SGreg Roach 2842c685d76SGreg Roach // import different types of records 2852c685d76SGreg Roach if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 2862c685d76SGreg Roach [, $xref, $type] = $match; 2872c685d76SGreg Roach } elseif (preg_match('/0 (HEAD|TRLR|_PLAC |_PLAC_DEFN)/', $gedrec, $match)) { 2882c685d76SGreg Roach $type = $match[1]; 2892c685d76SGreg Roach $xref = $type; // For records without an XREF, use the type as a pseudo XREF. 2902c685d76SGreg Roach } else { 2912c685d76SGreg Roach throw new GedcomErrorException($gedrec); 2922c685d76SGreg Roach } 2932c685d76SGreg Roach 2942c685d76SGreg Roach // Add a _UID 2952c685d76SGreg Roach if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { 2962c685d76SGreg Roach $element = Registry::elementFactory()->make($type . ':_UID'); 2972c685d76SGreg Roach if (!$element instanceof UnknownElement) { 2982c685d76SGreg Roach $gedrec .= "\n1 _UID " . $element->default($tree); 2992c685d76SGreg Roach } 3002c685d76SGreg Roach } 3012c685d76SGreg Roach 3022c685d76SGreg Roach // If the user has downloaded their GEDCOM data (containing media objects) and edited it 3032c685d76SGreg Roach // using an application which does not support (and deletes) media objects, then add them 3042c685d76SGreg Roach // back in. 3052c685d76SGreg Roach if ($tree->getPreference('keep_media')) { 3062c685d76SGreg Roach $old_linked_media = DB::table('link') 3072c685d76SGreg Roach ->where('l_from', '=', $xref) 3082c685d76SGreg Roach ->where('l_file', '=', $tree_id) 3092c685d76SGreg Roach ->where('l_type', '=', 'OBJE') 3102c685d76SGreg Roach ->pluck('l_to'); 3112c685d76SGreg Roach 3122c685d76SGreg Roach // Delete these links - so that we do not insert them again in updateLinks() 3132c685d76SGreg Roach DB::table('link') 3142c685d76SGreg Roach ->where('l_from', '=', $xref) 3152c685d76SGreg Roach ->where('l_file', '=', $tree_id) 3162c685d76SGreg Roach ->where('l_type', '=', 'OBJE') 3172c685d76SGreg Roach ->delete(); 3182c685d76SGreg Roach 3192c685d76SGreg Roach foreach ($old_linked_media as $media_id) { 3202c685d76SGreg Roach $gedrec .= "\n1 OBJE @" . $media_id . '@'; 3212c685d76SGreg Roach } 3222c685d76SGreg Roach } 3232c685d76SGreg Roach 3242c685d76SGreg Roach // Convert inline media into media objects 3252c685d76SGreg Roach $gedrec = $this->convertInlineMedia($tree, $gedrec); 3262c685d76SGreg Roach 3272c685d76SGreg Roach switch ($type) { 3282c685d76SGreg Roach case Individual::RECORD_TYPE: 3292c685d76SGreg Roach $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); 3302c685d76SGreg Roach 3312c685d76SGreg Roach if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { 3322c685d76SGreg Roach $rin = $match[1]; 3332c685d76SGreg Roach } else { 3342c685d76SGreg Roach $rin = $xref; 3352c685d76SGreg Roach } 3362c685d76SGreg Roach 3372c685d76SGreg Roach DB::table('individuals')->insert([ 3382c685d76SGreg Roach 'i_id' => $xref, 3392c685d76SGreg Roach 'i_file' => $tree_id, 3402c685d76SGreg Roach 'i_rin' => $rin, 3412c685d76SGreg Roach 'i_sex' => $record->sex(), 3422c685d76SGreg Roach 'i_gedcom' => $gedrec, 3432c685d76SGreg Roach ]); 3442c685d76SGreg Roach 3452c685d76SGreg Roach // Update the cross-reference/index tables. 3462c685d76SGreg Roach $this->updatePlaces($xref, $tree, $gedrec); 3472c685d76SGreg Roach $this->updateDates($xref, $tree_id, $gedrec); 3482c685d76SGreg Roach $this->updateNames($xref, $tree_id, $record); 3492c685d76SGreg Roach break; 3502c685d76SGreg Roach 3512c685d76SGreg Roach case Family::RECORD_TYPE: 3522c685d76SGreg Roach if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 3532c685d76SGreg Roach $husb = $match[1]; 3542c685d76SGreg Roach } else { 3552c685d76SGreg Roach $husb = ''; 3562c685d76SGreg Roach } 3572c685d76SGreg Roach if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 3582c685d76SGreg Roach $wife = $match[1]; 3592c685d76SGreg Roach } else { 3602c685d76SGreg Roach $wife = ''; 3612c685d76SGreg Roach } 3622c685d76SGreg Roach $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); 3632c685d76SGreg Roach if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { 3642c685d76SGreg Roach $nchi = max($nchi, $match[1]); 3652c685d76SGreg Roach } 3662c685d76SGreg Roach 3672c685d76SGreg Roach DB::table('families')->insert([ 3682c685d76SGreg Roach 'f_id' => $xref, 3692c685d76SGreg Roach 'f_file' => $tree_id, 3702c685d76SGreg Roach 'f_husb' => $husb, 3712c685d76SGreg Roach 'f_wife' => $wife, 3722c685d76SGreg Roach 'f_gedcom' => $gedrec, 3732c685d76SGreg Roach 'f_numchil' => $nchi, 3742c685d76SGreg Roach ]); 3752c685d76SGreg Roach 3762c685d76SGreg Roach // Update the cross-reference/index tables. 3772c685d76SGreg Roach $this->updatePlaces($xref, $tree, $gedrec); 3782c685d76SGreg Roach $this->updateDates($xref, $tree_id, $gedrec); 3792c685d76SGreg Roach break; 3802c685d76SGreg Roach 3812c685d76SGreg Roach case Source::RECORD_TYPE: 3822c685d76SGreg Roach if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { 3832c685d76SGreg Roach $name = $match[1]; 3842c685d76SGreg Roach } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { 3852c685d76SGreg Roach $name = $match[1]; 3862c685d76SGreg Roach } else { 3872c685d76SGreg Roach $name = $xref; 3882c685d76SGreg Roach } 3892c685d76SGreg Roach 3902c685d76SGreg Roach DB::table('sources')->insert([ 3912c685d76SGreg Roach 's_id' => $xref, 3922c685d76SGreg Roach 's_file' => $tree_id, 3932c685d76SGreg Roach 's_name' => mb_substr($name, 0, 255), 3942c685d76SGreg Roach 's_gedcom' => $gedrec, 3952c685d76SGreg Roach ]); 3962c685d76SGreg Roach break; 3972c685d76SGreg Roach 3982c685d76SGreg Roach case Repository::RECORD_TYPE: 3992c685d76SGreg Roach case Note::RECORD_TYPE: 4002c685d76SGreg Roach case Submission::RECORD_TYPE: 4012c685d76SGreg Roach case Submitter::RECORD_TYPE: 4022c685d76SGreg Roach case Location::RECORD_TYPE: 4032c685d76SGreg Roach DB::table('other')->insert([ 4042c685d76SGreg Roach 'o_id' => $xref, 4052c685d76SGreg Roach 'o_file' => $tree_id, 4062c685d76SGreg Roach 'o_type' => $type, 4072c685d76SGreg Roach 'o_gedcom' => $gedrec, 4082c685d76SGreg Roach ]); 4092c685d76SGreg Roach break; 4102c685d76SGreg Roach 4112c685d76SGreg Roach case Header::RECORD_TYPE: 4122c685d76SGreg Roach // Force HEAD records to have a creation date. 4132c685d76SGreg Roach if (!str_contains($gedrec, "\n1 DATE ")) { 4142c685d76SGreg Roach $today = strtoupper(date('d M Y')); 4152c685d76SGreg Roach $gedrec .= "\n1 DATE " . $today; 4162c685d76SGreg Roach } 4172c685d76SGreg Roach 4182c685d76SGreg Roach DB::table('other')->insert([ 4192c685d76SGreg Roach 'o_id' => $xref, 4202c685d76SGreg Roach 'o_file' => $tree_id, 4212c685d76SGreg Roach 'o_type' => Header::RECORD_TYPE, 4222c685d76SGreg Roach 'o_gedcom' => $gedrec, 4232c685d76SGreg Roach ]); 4242c685d76SGreg Roach break; 4252c685d76SGreg Roach 4262c685d76SGreg Roach 4272c685d76SGreg Roach case Media::RECORD_TYPE: 4282c685d76SGreg Roach $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); 4292c685d76SGreg Roach 4302c685d76SGreg Roach DB::table('media')->insert([ 4312c685d76SGreg Roach 'm_id' => $xref, 4322c685d76SGreg Roach 'm_file' => $tree_id, 4332c685d76SGreg Roach 'm_gedcom' => $gedrec, 4342c685d76SGreg Roach ]); 4352c685d76SGreg Roach 4362c685d76SGreg Roach foreach ($record->mediaFiles() as $media_file) { 4372c685d76SGreg Roach DB::table('media_file')->insert([ 4382c685d76SGreg Roach 'm_id' => $xref, 4392c685d76SGreg Roach 'm_file' => $tree_id, 4402c685d76SGreg Roach 'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), 4412c685d76SGreg Roach 'multimedia_format' => mb_substr($media_file->format(), 0, 4), 4422c685d76SGreg Roach 'source_media_type' => mb_substr($media_file->type(), 0, 15), 4432c685d76SGreg Roach 'descriptive_title' => mb_substr($media_file->title(), 0, 248), 4442c685d76SGreg Roach ]); 4452c685d76SGreg Roach } 4462c685d76SGreg Roach break; 4472c685d76SGreg Roach 4482c685d76SGreg Roach case '_PLAC ': 4492c685d76SGreg Roach $this->importTNGPlac($gedrec); 4502c685d76SGreg Roach return; 4512c685d76SGreg Roach 4522c685d76SGreg Roach case '_PLAC_DEFN': 4532c685d76SGreg Roach $this->importLegacyPlacDefn($gedrec); 4542c685d76SGreg Roach return; 4552c685d76SGreg Roach 4562c685d76SGreg Roach default: // Custom record types. 4572c685d76SGreg Roach DB::table('other')->insert([ 4582c685d76SGreg Roach 'o_id' => $xref, 4592c685d76SGreg Roach 'o_file' => $tree_id, 4602c685d76SGreg Roach 'o_type' => mb_substr($type, 0, 15), 4612c685d76SGreg Roach 'o_gedcom' => $gedrec, 4622c685d76SGreg Roach ]); 4632c685d76SGreg Roach break; 4642c685d76SGreg Roach } 4652c685d76SGreg Roach 4662c685d76SGreg Roach // Update the cross-reference/index tables. 4672c685d76SGreg Roach $this->updateLinks($xref, $tree_id, $gedrec); 4682c685d76SGreg Roach } 4692c685d76SGreg Roach 4702c685d76SGreg Roach /** 4712c685d76SGreg Roach * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values 4722c685d76SGreg Roach * 4732c685d76SGreg Roach * @param string $gedcom 4742c685d76SGreg Roach */ 4752c685d76SGreg Roach private function importLegacyPlacDefn(string $gedcom): void 4762c685d76SGreg Roach { 4772c685d76SGreg Roach $gedcom_service = new GedcomService(); 4782c685d76SGreg Roach 4792c685d76SGreg Roach if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { 4802c685d76SGreg Roach $place_name = $match[1]; 4812c685d76SGreg Roach } else { 4822c685d76SGreg Roach return; 4832c685d76SGreg Roach } 4842c685d76SGreg Roach 4852c685d76SGreg Roach if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { 4862c685d76SGreg Roach $latitude = $gedcom_service->readLatitude($match[1]); 4872c685d76SGreg Roach } else { 4882c685d76SGreg Roach return; 4892c685d76SGreg Roach } 4902c685d76SGreg Roach 4912c685d76SGreg Roach if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { 4922c685d76SGreg Roach $longitude = $gedcom_service->readLongitude($match[1]); 4932c685d76SGreg Roach } else { 4942c685d76SGreg Roach return; 4952c685d76SGreg Roach } 4962c685d76SGreg Roach 4972c685d76SGreg Roach $location = new PlaceLocation($place_name); 4982c685d76SGreg Roach 4992c685d76SGreg Roach if ($location->latitude() === null && $location->longitude() === null) { 5002c685d76SGreg Roach DB::table('place_location') 5012c685d76SGreg Roach ->where('id', '=', $location->id()) 5022c685d76SGreg Roach ->update([ 5032c685d76SGreg Roach 'latitude' => $latitude, 5042c685d76SGreg Roach 'longitude' => $longitude, 5052c685d76SGreg Roach ]); 5062c685d76SGreg Roach } 5072c685d76SGreg Roach } 5082c685d76SGreg Roach 5092c685d76SGreg Roach /** 5102c685d76SGreg Roach * Legacy Family Tree software generates _PLAC records containing LAT/LONG values 5112c685d76SGreg Roach * 5122c685d76SGreg Roach * @param string $gedcom 5132c685d76SGreg Roach */ 5142c685d76SGreg Roach private function importTNGPlac(string $gedcom): void 5152c685d76SGreg Roach { 5162c685d76SGreg Roach if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { 5172c685d76SGreg Roach $place_name = $match[1]; 5182c685d76SGreg Roach } else { 5192c685d76SGreg Roach return; 5202c685d76SGreg Roach } 5212c685d76SGreg Roach 5222c685d76SGreg Roach if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { 5232c685d76SGreg Roach $latitude = (float) $match[1]; 5242c685d76SGreg Roach } else { 5252c685d76SGreg Roach return; 5262c685d76SGreg Roach } 5272c685d76SGreg Roach 5282c685d76SGreg Roach if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { 5292c685d76SGreg Roach $longitude = (float) $match[1]; 5302c685d76SGreg Roach } else { 5312c685d76SGreg Roach return; 5322c685d76SGreg Roach } 5332c685d76SGreg Roach 5342c685d76SGreg Roach $location = new PlaceLocation($place_name); 5352c685d76SGreg Roach 5362c685d76SGreg Roach if ($location->latitude() === null && $location->longitude() === null) { 5372c685d76SGreg Roach DB::table('place_location') 5382c685d76SGreg Roach ->where('id', '=', $location->id()) 5392c685d76SGreg Roach ->update([ 5402c685d76SGreg Roach 'latitude' => $latitude, 5412c685d76SGreg Roach 'longitude' => $longitude, 5422c685d76SGreg Roach ]); 5432c685d76SGreg Roach } 5442c685d76SGreg Roach } 5452c685d76SGreg Roach 5462c685d76SGreg Roach /** 5472c685d76SGreg Roach * Extract all level 2 places from the given record and insert them into the places table 5482c685d76SGreg Roach * 5492c685d76SGreg Roach * @param string $xref 5502c685d76SGreg Roach * @param Tree $tree 5512c685d76SGreg Roach * @param string $gedrec 5522c685d76SGreg Roach * 5532c685d76SGreg Roach * @return void 5542c685d76SGreg Roach */ 5552c685d76SGreg Roach public function updatePlaces(string $xref, Tree $tree, string $gedrec): void 5562c685d76SGreg Roach { 5572c685d76SGreg Roach // Insert all new rows together 5582c685d76SGreg Roach $rows = []; 5592c685d76SGreg Roach 5602c685d76SGreg Roach preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); 5612c685d76SGreg Roach 5622c685d76SGreg Roach $places = array_unique($matches[1]); 5632c685d76SGreg Roach 5642c685d76SGreg Roach foreach ($places as $place_name) { 5652c685d76SGreg Roach $place = new Place($place_name, $tree); 5662c685d76SGreg Roach 5672c685d76SGreg Roach // Calling Place::id() will create the entry in the database, if it doesn't already exist. 5682c685d76SGreg Roach while ($place->id() !== 0) { 5692c685d76SGreg Roach $rows[] = [ 5702c685d76SGreg Roach 'pl_p_id' => $place->id(), 5712c685d76SGreg Roach 'pl_gid' => $xref, 5722c685d76SGreg Roach 'pl_file' => $tree->id(), 5732c685d76SGreg Roach ]; 5742c685d76SGreg Roach 5752c685d76SGreg Roach $place = $place->parent(); 5762c685d76SGreg Roach } 5772c685d76SGreg Roach } 5782c685d76SGreg Roach 5792c685d76SGreg Roach // array_unique doesn't work with arrays of arrays 5802c685d76SGreg Roach $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 5812c685d76SGreg Roach 5822c685d76SGreg Roach // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. 5832c685d76SGreg Roach foreach (array_chunk($rows, 20000) as $chunk) { 5842c685d76SGreg Roach DB::table('placelinks')->insert($chunk); 5852c685d76SGreg Roach } 5862c685d76SGreg Roach } 5872c685d76SGreg Roach 5882c685d76SGreg Roach /** 5892c685d76SGreg Roach * Extract all the dates from the given record and insert them into the database. 5902c685d76SGreg Roach * 5912c685d76SGreg Roach * @param string $xref 5922c685d76SGreg Roach * @param int $ged_id 5932c685d76SGreg Roach * @param string $gedrec 5942c685d76SGreg Roach * 5952c685d76SGreg Roach * @return void 5962c685d76SGreg Roach */ 5972c685d76SGreg Roach private function updateDates(string $xref, int $ged_id, string $gedrec): void 5982c685d76SGreg Roach { 5992c685d76SGreg Roach // Insert all new rows together 6002c685d76SGreg Roach $rows = []; 6012c685d76SGreg Roach 6022c685d76SGreg Roach preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER); 6032c685d76SGreg Roach 6042c685d76SGreg Roach foreach ($matches as $match) { 6052c685d76SGreg Roach $fact = $match[1]; 6062c685d76SGreg Roach $date = new Date($match[2]); 6072c685d76SGreg Roach $rows[] = [ 6082c685d76SGreg Roach 'd_day' => $date->minimumDate()->day, 6092c685d76SGreg Roach 'd_month' => $date->minimumDate()->format('%O'), 6102c685d76SGreg Roach 'd_mon' => $date->minimumDate()->month, 6112c685d76SGreg Roach 'd_year' => $date->minimumDate()->year, 6122c685d76SGreg Roach 'd_julianday1' => $date->minimumDate()->minimumJulianDay(), 6132c685d76SGreg Roach 'd_julianday2' => $date->minimumDate()->maximumJulianDay(), 6142c685d76SGreg Roach 'd_fact' => $fact, 6152c685d76SGreg Roach 'd_gid' => $xref, 6162c685d76SGreg Roach 'd_file' => $ged_id, 6172c685d76SGreg Roach 'd_type' => $date->minimumDate()->format('%@'), 6182c685d76SGreg Roach ]; 6192c685d76SGreg Roach 6202c685d76SGreg Roach $rows[] = [ 6212c685d76SGreg Roach 'd_day' => $date->maximumDate()->day, 6222c685d76SGreg Roach 'd_month' => $date->maximumDate()->format('%O'), 6232c685d76SGreg Roach 'd_mon' => $date->maximumDate()->month, 6242c685d76SGreg Roach 'd_year' => $date->maximumDate()->year, 6252c685d76SGreg Roach 'd_julianday1' => $date->maximumDate()->minimumJulianDay(), 6262c685d76SGreg Roach 'd_julianday2' => $date->maximumDate()->maximumJulianDay(), 6272c685d76SGreg Roach 'd_fact' => $fact, 6282c685d76SGreg Roach 'd_gid' => $xref, 6292c685d76SGreg Roach 'd_file' => $ged_id, 6302c685d76SGreg Roach 'd_type' => $date->minimumDate()->format('%@'), 6312c685d76SGreg Roach ]; 6322c685d76SGreg Roach } 6332c685d76SGreg Roach 6342c685d76SGreg Roach // array_unique doesn't work with arrays of arrays 6352c685d76SGreg Roach $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 6362c685d76SGreg Roach 6372c685d76SGreg Roach DB::table('dates')->insert($rows); 6382c685d76SGreg Roach } 6392c685d76SGreg Roach 6402c685d76SGreg Roach /** 6412c685d76SGreg Roach * Extract all the links from the given record and insert them into the database 6422c685d76SGreg Roach * 6432c685d76SGreg Roach * @param string $xref 6442c685d76SGreg Roach * @param int $ged_id 6452c685d76SGreg Roach * @param string $gedrec 6462c685d76SGreg Roach * 6472c685d76SGreg Roach * @return void 6482c685d76SGreg Roach */ 6492c685d76SGreg Roach private function updateLinks(string $xref, int $ged_id, string $gedrec): void 6502c685d76SGreg Roach { 6512c685d76SGreg Roach // Insert all new rows together 6522c685d76SGreg Roach $rows = []; 6532c685d76SGreg Roach 6542c685d76SGreg Roach preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER); 6552c685d76SGreg Roach 6562c685d76SGreg Roach foreach ($matches as $match) { 6572c685d76SGreg Roach // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@" 6582c685d76SGreg Roach $rows[$match[1] . strtoupper($match[2])] = [ 6592c685d76SGreg Roach 'l_from' => $xref, 6602c685d76SGreg Roach 'l_to' => $match[2], 6612c685d76SGreg Roach 'l_type' => $match[1], 6622c685d76SGreg Roach 'l_file' => $ged_id, 6632c685d76SGreg Roach ]; 6642c685d76SGreg Roach } 6652c685d76SGreg Roach 6662c685d76SGreg Roach DB::table('link')->insert($rows); 6672c685d76SGreg Roach } 6682c685d76SGreg Roach 6692c685d76SGreg Roach /** 6702c685d76SGreg Roach * Extract all the names from the given record and insert them into the database. 6712c685d76SGreg Roach * 6722c685d76SGreg Roach * @param string $xref 6732c685d76SGreg Roach * @param int $ged_id 6742c685d76SGreg Roach * @param Individual $record 6752c685d76SGreg Roach * 6762c685d76SGreg Roach * @return void 6772c685d76SGreg Roach */ 6782c685d76SGreg Roach private function updateNames(string $xref, int $ged_id, Individual $record): void 6792c685d76SGreg Roach { 6802c685d76SGreg Roach // Insert all new rows together 6812c685d76SGreg Roach $rows = []; 6822c685d76SGreg Roach 6832c685d76SGreg Roach foreach ($record->getAllNames() as $n => $name) { 6842c685d76SGreg Roach if ($name['givn'] === Individual::PRAENOMEN_NESCIO) { 6852c685d76SGreg Roach $soundex_givn_std = null; 6862c685d76SGreg Roach $soundex_givn_dm = null; 6872c685d76SGreg Roach } else { 6882c685d76SGreg Roach $soundex_givn_std = Soundex::russell($name['givn']); 6892c685d76SGreg Roach $soundex_givn_dm = Soundex::daitchMokotoff($name['givn']); 6902c685d76SGreg Roach } 6912c685d76SGreg Roach 6922c685d76SGreg Roach if ($name['surn'] === Individual::NOMEN_NESCIO) { 6932c685d76SGreg Roach $soundex_surn_std = null; 6942c685d76SGreg Roach $soundex_surn_dm = null; 6952c685d76SGreg Roach } else { 6962c685d76SGreg Roach $soundex_surn_std = Soundex::russell($name['surname']); 6972c685d76SGreg Roach $soundex_surn_dm = Soundex::daitchMokotoff($name['surname']); 6982c685d76SGreg Roach } 6992c685d76SGreg Roach 7002c685d76SGreg Roach $rows[] = [ 7012c685d76SGreg Roach 'n_file' => $ged_id, 7022c685d76SGreg Roach 'n_id' => $xref, 7032c685d76SGreg Roach 'n_num' => $n, 7042c685d76SGreg Roach 'n_type' => $name['type'], 7052c685d76SGreg Roach 'n_sort' => mb_substr($name['sort'], 0, 255), 7062c685d76SGreg Roach 'n_full' => mb_substr($name['fullNN'], 0, 255), 7072c685d76SGreg Roach 'n_surname' => mb_substr($name['surname'], 0, 255), 7082c685d76SGreg Roach 'n_surn' => mb_substr($name['surn'], 0, 255), 7092c685d76SGreg Roach 'n_givn' => mb_substr($name['givn'], 0, 255), 7102c685d76SGreg Roach 'n_soundex_givn_std' => $soundex_givn_std, 7112c685d76SGreg Roach 'n_soundex_surn_std' => $soundex_surn_std, 7122c685d76SGreg Roach 'n_soundex_givn_dm' => $soundex_givn_dm, 7132c685d76SGreg Roach 'n_soundex_surn_dm' => $soundex_surn_dm, 7142c685d76SGreg Roach ]; 7152c685d76SGreg Roach } 7162c685d76SGreg Roach 7172c685d76SGreg Roach DB::table('name')->insert($rows); 7182c685d76SGreg Roach } 7192c685d76SGreg Roach 7202c685d76SGreg Roach /** 7212c685d76SGreg Roach * Extract inline media data, and convert to media objects. 7222c685d76SGreg Roach * 7232c685d76SGreg Roach * @param Tree $tree 7242c685d76SGreg Roach * @param string $gedcom 7252c685d76SGreg Roach * 7262c685d76SGreg Roach * @return string 7272c685d76SGreg Roach */ 7282c685d76SGreg Roach private function convertInlineMedia(Tree $tree, string $gedcom): string 7292c685d76SGreg Roach { 7302c685d76SGreg Roach while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) { 7312c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7322c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n1 OBJE @" . $xref . '@']); 7332c685d76SGreg Roach } 7342c685d76SGreg Roach while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) { 7352c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7362c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n2 OBJE @" . $xref . '@']); 7372c685d76SGreg Roach } 7382c685d76SGreg Roach while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) { 7392c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7402c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n3 OBJE @" . $xref . '@']); 7412c685d76SGreg Roach } 7422c685d76SGreg Roach 7432c685d76SGreg Roach return $gedcom; 7442c685d76SGreg Roach } 7452c685d76SGreg Roach 7462c685d76SGreg Roach /** 7472c685d76SGreg Roach * Create a new media object, from inline media data. 7482c685d76SGreg Roach * 7492c685d76SGreg Roach * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL 7502c685d76SGreg Roach * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL 7512c685d76SGreg Roach * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI 7522c685d76SGreg Roach * 7532c685d76SGreg Roach * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE 7542c685d76SGreg Roach * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL 7552c685d76SGreg Roach * 7562c685d76SGreg Roach * @param string $gedcom 7572c685d76SGreg Roach * @param Tree $tree 7582c685d76SGreg Roach * 7592c685d76SGreg Roach * @return string 7602c685d76SGreg Roach */ 7612c685d76SGreg Roach private function createMediaObject(string $gedcom, Tree $tree): string 7622c685d76SGreg Roach { 7632c685d76SGreg Roach preg_match('/\n\d FILE (.+)/', $gedcom, $match); 7642c685d76SGreg Roach $file = $match[1] ?? ''; 7652c685d76SGreg Roach 7662c685d76SGreg Roach preg_match('/\n\d TITL (.+)/', $gedcom, $match); 7672c685d76SGreg Roach $title = $match[1] ?? ''; 7682c685d76SGreg Roach 7692c685d76SGreg Roach preg_match('/\n\d FORM (.+)/', $gedcom, $match); 7702c685d76SGreg Roach $format = $match[1] ?? ''; 7712c685d76SGreg Roach 7722c685d76SGreg Roach preg_match('/\n\d MEDI (.+)/', $gedcom, $match); 7732c685d76SGreg Roach $media = $match[1] ?? ''; 7742c685d76SGreg Roach 7752c685d76SGreg Roach preg_match('/\n\d _SCBK (.+)/', $gedcom, $match); 7762c685d76SGreg Roach $scrapbook = $match[1] ?? ''; 7772c685d76SGreg Roach 7782c685d76SGreg Roach preg_match('/\n\d _PRIM (.+)/', $gedcom, $match); 7792c685d76SGreg Roach $primary = $match[1] ?? ''; 7802c685d76SGreg Roach 7812c685d76SGreg Roach preg_match('/\n\d _TYPE (.+)/', $gedcom, $match); 7822c685d76SGreg Roach if ($media === '') { 7832c685d76SGreg Roach // Legacy uses _TYPE instead of MEDI 7842c685d76SGreg Roach $media = $match[1] ?? ''; 7852c685d76SGreg Roach $type = ''; 7862c685d76SGreg Roach } else { 7872c685d76SGreg Roach $type = $match[1] ?? ''; 7882c685d76SGreg Roach } 7892c685d76SGreg Roach 7902c685d76SGreg Roach preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches); 7912c685d76SGreg Roach $notes = $matches[1] ?? []; 7922c685d76SGreg Roach 7932c685d76SGreg Roach // Have we already created a media object with the same title/filename? 7942c685d76SGreg Roach $xref = DB::table('media_file') 7952c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 7962c685d76SGreg Roach ->where('descriptive_title', '=', mb_substr($title, 0, 248)) 7972c685d76SGreg Roach ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248)) 7982c685d76SGreg Roach ->value('m_id'); 7992c685d76SGreg Roach 8002c685d76SGreg Roach if ($xref === null) { 8012c685d76SGreg Roach $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE); 8022c685d76SGreg Roach 8032c685d76SGreg Roach // convert to a media-object 8042c685d76SGreg Roach $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file; 8052c685d76SGreg Roach 8062c685d76SGreg Roach if ($format !== '') { 8072c685d76SGreg Roach $gedcom .= "\n2 FORM " . $format; 8082c685d76SGreg Roach 8092c685d76SGreg Roach if ($media !== '') { 8102c685d76SGreg Roach $gedcom .= "\n3 TYPE " . $media; 8112c685d76SGreg Roach } 8122c685d76SGreg Roach } 8132c685d76SGreg Roach 8142c685d76SGreg Roach if ($title !== '') { 8152c685d76SGreg Roach $gedcom .= "\n3 TITL " . $title; 8162c685d76SGreg Roach } 8172c685d76SGreg Roach 8182c685d76SGreg Roach if ($scrapbook !== '') { 8192c685d76SGreg Roach $gedcom .= "\n1 _SCBK " . $scrapbook; 8202c685d76SGreg Roach } 8212c685d76SGreg Roach 8222c685d76SGreg Roach if ($primary !== '') { 8232c685d76SGreg Roach $gedcom .= "\n1 _PRIM " . $primary; 8242c685d76SGreg Roach } 8252c685d76SGreg Roach 8262c685d76SGreg Roach if ($type !== '') { 8272c685d76SGreg Roach $gedcom .= "\n1 _TYPE " . $type; 8282c685d76SGreg Roach } 8292c685d76SGreg Roach 8302c685d76SGreg Roach foreach ($notes as $note) { 8312c685d76SGreg Roach $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]); 8322c685d76SGreg Roach } 8332c685d76SGreg Roach 8342c685d76SGreg Roach DB::table('media')->insert([ 8352c685d76SGreg Roach 'm_id' => $xref, 8362c685d76SGreg Roach 'm_file' => $tree->id(), 8372c685d76SGreg Roach 'm_gedcom' => $gedcom, 8382c685d76SGreg Roach ]); 8392c685d76SGreg Roach 8402c685d76SGreg Roach DB::table('media_file')->insert([ 8412c685d76SGreg Roach 'm_id' => $xref, 8422c685d76SGreg Roach 'm_file' => $tree->id(), 8432c685d76SGreg Roach 'multimedia_file_refn' => mb_substr($file, 0, 248), 8442c685d76SGreg Roach 'multimedia_format' => mb_substr($format, 0, 4), 8452c685d76SGreg Roach 'source_media_type' => mb_substr($media, 0, 15), 8462c685d76SGreg Roach 'descriptive_title' => mb_substr($title, 0, 248), 8472c685d76SGreg Roach ]); 8482c685d76SGreg Roach } 8492c685d76SGreg Roach 8502c685d76SGreg Roach return $xref; 8512c685d76SGreg Roach } 8522c685d76SGreg Roach 8532c685d76SGreg Roach /** 8542c685d76SGreg Roach * update a record in the database 8552c685d76SGreg Roach * 8562c685d76SGreg Roach * @param string $gedrec 8572c685d76SGreg Roach * @param Tree $tree 8582c685d76SGreg Roach * @param bool $delete 8592c685d76SGreg Roach * 8602c685d76SGreg Roach * @return void 8612c685d76SGreg Roach * @throws GedcomErrorException 8622c685d76SGreg Roach */ 8632c685d76SGreg Roach public function updateRecord(string $gedrec, Tree $tree, bool $delete): void 8642c685d76SGreg Roach { 8652c685d76SGreg Roach if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 8662c685d76SGreg Roach [, $gid, $type] = $match; 8672c685d76SGreg Roach } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) { 8682c685d76SGreg Roach // The HEAD record has no XREF. Any others? 8692c685d76SGreg Roach $gid = $match[1]; 8702c685d76SGreg Roach $type = $match[1]; 8712c685d76SGreg Roach } else { 8722c685d76SGreg Roach throw new GedcomErrorException($gedrec); 8732c685d76SGreg Roach } 8742c685d76SGreg Roach 8752c685d76SGreg Roach // Place links 8762c685d76SGreg Roach DB::table('placelinks') 8772c685d76SGreg Roach ->where('pl_gid', '=', $gid) 8782c685d76SGreg Roach ->where('pl_file', '=', $tree->id()) 8792c685d76SGreg Roach ->delete(); 8802c685d76SGreg Roach 8812c685d76SGreg Roach // Orphaned places. If we're deleting "Westminster, London, England", 8822c685d76SGreg Roach // then we may also need to delete "London, England" and "England". 8832c685d76SGreg Roach do { 8842c685d76SGreg Roach $affected = DB::table('places') 8852c685d76SGreg Roach ->leftJoin('placelinks', function (JoinClause $join): void { 8862c685d76SGreg Roach $join 8872c685d76SGreg Roach ->on('p_id', '=', 'pl_p_id') 8882c685d76SGreg Roach ->on('p_file', '=', 'pl_file'); 8892c685d76SGreg Roach }) 8902c685d76SGreg Roach ->whereNull('pl_p_id') 8912c685d76SGreg Roach ->delete(); 8922c685d76SGreg Roach } while ($affected > 0); 8932c685d76SGreg Roach 8942c685d76SGreg Roach DB::table('dates') 8952c685d76SGreg Roach ->where('d_gid', '=', $gid) 8962c685d76SGreg Roach ->where('d_file', '=', $tree->id()) 8972c685d76SGreg Roach ->delete(); 8982c685d76SGreg Roach 8992c685d76SGreg Roach DB::table('name') 9002c685d76SGreg Roach ->where('n_id', '=', $gid) 9012c685d76SGreg Roach ->where('n_file', '=', $tree->id()) 9022c685d76SGreg Roach ->delete(); 9032c685d76SGreg Roach 9042c685d76SGreg Roach DB::table('link') 9052c685d76SGreg Roach ->where('l_from', '=', $gid) 9062c685d76SGreg Roach ->where('l_file', '=', $tree->id()) 9072c685d76SGreg Roach ->delete(); 9082c685d76SGreg Roach 9092c685d76SGreg Roach switch ($type) { 9102c685d76SGreg Roach case Individual::RECORD_TYPE: 9112c685d76SGreg Roach DB::table('individuals') 9122c685d76SGreg Roach ->where('i_id', '=', $gid) 9132c685d76SGreg Roach ->where('i_file', '=', $tree->id()) 9142c685d76SGreg Roach ->delete(); 9152c685d76SGreg Roach break; 9162c685d76SGreg Roach 9172c685d76SGreg Roach case Family::RECORD_TYPE: 9182c685d76SGreg Roach DB::table('families') 9192c685d76SGreg Roach ->where('f_id', '=', $gid) 9202c685d76SGreg Roach ->where('f_file', '=', $tree->id()) 9212c685d76SGreg Roach ->delete(); 9222c685d76SGreg Roach break; 9232c685d76SGreg Roach 9242c685d76SGreg Roach case Source::RECORD_TYPE: 9252c685d76SGreg Roach DB::table('sources') 9262c685d76SGreg Roach ->where('s_id', '=', $gid) 9272c685d76SGreg Roach ->where('s_file', '=', $tree->id()) 9282c685d76SGreg Roach ->delete(); 9292c685d76SGreg Roach break; 9302c685d76SGreg Roach 9312c685d76SGreg Roach case Media::RECORD_TYPE: 9322c685d76SGreg Roach DB::table('media_file') 9332c685d76SGreg Roach ->where('m_id', '=', $gid) 9342c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 9352c685d76SGreg Roach ->delete(); 9362c685d76SGreg Roach 9372c685d76SGreg Roach DB::table('media') 9382c685d76SGreg Roach ->where('m_id', '=', $gid) 9392c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 9402c685d76SGreg Roach ->delete(); 9412c685d76SGreg Roach break; 9422c685d76SGreg Roach 9432c685d76SGreg Roach default: 9442c685d76SGreg Roach DB::table('other') 9452c685d76SGreg Roach ->where('o_id', '=', $gid) 9462c685d76SGreg Roach ->where('o_file', '=', $tree->id()) 9472c685d76SGreg Roach ->delete(); 9482c685d76SGreg Roach break; 9492c685d76SGreg Roach } 9502c685d76SGreg Roach 9512c685d76SGreg Roach if (!$delete) { 9522c685d76SGreg Roach $this->importRecord($gedrec, $tree, true); 9532c685d76SGreg Roach } 9542c685d76SGreg Roach } 9552c685d76SGreg Roach} 956