12c685d76SGreg Roach<?php 22c685d76SGreg Roach 32c685d76SGreg Roach/** 42c685d76SGreg Roach * webtrees: online genealogy 5d11be702SGreg Roach * Copyright (C) 2023 webtrees development team 62c685d76SGreg Roach * This program is free software: you can redistribute it and/or modify 72c685d76SGreg Roach * it under the terms of the GNU General Public License as published by 82c685d76SGreg Roach * the Free Software Foundation, either version 3 of the License, or 92c685d76SGreg Roach * (at your option) any later version. 102c685d76SGreg Roach * This program is distributed in the hope that it will be useful, 112c685d76SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 122c685d76SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 132c685d76SGreg Roach * GNU General Public License for more details. 142c685d76SGreg Roach * You should have received a copy of the GNU General Public License 152c685d76SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 162c685d76SGreg Roach */ 172c685d76SGreg Roach 182c685d76SGreg Roachdeclare(strict_types=1); 192c685d76SGreg Roach 202c685d76SGreg Roachnamespace Fisharebest\Webtrees\Services; 212c685d76SGreg Roach 222c685d76SGreg Roachuse Fisharebest\Webtrees\Date; 236f4ec3caSGreg Roachuse Fisharebest\Webtrees\DB; 242c685d76SGreg Roachuse Fisharebest\Webtrees\Elements\UnknownElement; 252c685d76SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException; 262c685d76SGreg Roachuse Fisharebest\Webtrees\Family; 272c685d76SGreg Roachuse Fisharebest\Webtrees\Gedcom; 282c685d76SGreg Roachuse Fisharebest\Webtrees\Header; 292c685d76SGreg Roachuse Fisharebest\Webtrees\Individual; 302c685d76SGreg Roachuse Fisharebest\Webtrees\Location; 312c685d76SGreg Roachuse Fisharebest\Webtrees\Media; 322c685d76SGreg Roachuse Fisharebest\Webtrees\Note; 332c685d76SGreg Roachuse Fisharebest\Webtrees\Place; 342c685d76SGreg Roachuse Fisharebest\Webtrees\PlaceLocation; 352c685d76SGreg Roachuse Fisharebest\Webtrees\Registry; 362c685d76SGreg Roachuse Fisharebest\Webtrees\Repository; 372c685d76SGreg Roachuse Fisharebest\Webtrees\Soundex; 382c685d76SGreg Roachuse Fisharebest\Webtrees\Source; 392c685d76SGreg Roachuse Fisharebest\Webtrees\Submission; 402c685d76SGreg Roachuse Fisharebest\Webtrees\Submitter; 412c685d76SGreg Roachuse Fisharebest\Webtrees\Tree; 422c685d76SGreg Roachuse Illuminate\Database\Query\JoinClause; 432c685d76SGreg Roach 442c685d76SGreg Roachuse function array_chunk; 452c685d76SGreg Roachuse function array_intersect_key; 462c685d76SGreg Roachuse function array_map; 472c685d76SGreg Roachuse function array_unique; 482c685d76SGreg Roachuse function date; 492c685d76SGreg Roachuse function explode; 502c685d76SGreg Roachuse function max; 512c685d76SGreg Roachuse function mb_substr; 522c685d76SGreg Roachuse function preg_match; 532c685d76SGreg Roachuse function preg_match_all; 542c685d76SGreg Roachuse function preg_replace; 552c685d76SGreg Roachuse function round; 562c685d76SGreg Roachuse function str_contains; 572c685d76SGreg Roachuse function str_replace; 582c685d76SGreg Roachuse function str_starts_with; 592c685d76SGreg Roachuse function strlen; 602c685d76SGreg Roachuse function strtoupper; 612c685d76SGreg Roachuse function strtr; 622c685d76SGreg Roachuse function substr; 632c685d76SGreg Roachuse function trim; 642c685d76SGreg Roach 652c685d76SGreg Roachuse const PREG_SET_ORDER; 662c685d76SGreg Roach 672c685d76SGreg Roach/** 682c685d76SGreg Roach * Class GedcomImportService - import GEDCOM data 692c685d76SGreg Roach */ 702c685d76SGreg Roachclass GedcomImportService 712c685d76SGreg Roach{ 722c685d76SGreg Roach /** 732c685d76SGreg Roach * Tidy up a gedcom record on import, so that we can access it consistently/efficiently. 742c685d76SGreg Roach * 752c685d76SGreg Roach * @param string $rec 762c685d76SGreg Roach * @param Tree $tree 772c685d76SGreg Roach * 782c685d76SGreg Roach * @return string 792c685d76SGreg Roach */ 802c685d76SGreg Roach private function reformatRecord(string $rec, Tree $tree): string 812c685d76SGreg Roach { 82d35568b4SGreg Roach $gedcom_service = Registry::container()->get(GedcomService::class); 832c685d76SGreg Roach 842c685d76SGreg Roach // Strip out mac/msdos line endings 852c685d76SGreg Roach $rec = preg_replace("/[\r\n]+/", "\n", $rec); 862c685d76SGreg Roach 872c685d76SGreg Roach // Extract lines from the record; lines consist of: level + optional xref + tag + optional data 882c685d76SGreg Roach $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER); 892c685d76SGreg Roach 902c685d76SGreg Roach // Process the record line-by-line 912c685d76SGreg Roach $newrec = ''; 922c685d76SGreg Roach foreach ($matches as $n => $match) { 932c685d76SGreg Roach [, $level, $xref, $tag, $data] = $match; 942c685d76SGreg Roach 952c685d76SGreg Roach $tag = $gedcom_service->canonicalTag($tag); 962c685d76SGreg Roach 972c685d76SGreg Roach switch ($tag) { 982c685d76SGreg Roach case 'DATE': 992c685d76SGreg Roach // Preserve text from INT dates 1002c685d76SGreg Roach if (str_contains($data, '(')) { 1012c685d76SGreg Roach [$date, $text] = explode('(', $data, 2); 1022c685d76SGreg Roach $text = ' (' . $text; 1032c685d76SGreg Roach } else { 1042c685d76SGreg Roach $date = $data; 1052c685d76SGreg Roach $text = ''; 1062c685d76SGreg Roach } 1072c685d76SGreg Roach // Capitals 1082c685d76SGreg Roach $date = strtoupper($date); 1092c685d76SGreg Roach // Temporarily add leading/trailing spaces, to allow efficient matching below 1102c685d76SGreg Roach $date = ' ' . $date . ' '; 1112c685d76SGreg Roach // Ensure space digits and letters 1122c685d76SGreg Roach $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date); 1132c685d76SGreg Roach $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date); 1142c685d76SGreg Roach // Ensure space before/after calendar escapes 1152c685d76SGreg Roach $date = preg_replace('/@#[^@]+@/', ' $0 ', $date); 1162c685d76SGreg Roach // "BET." => "BET" 1172c685d76SGreg Roach $date = preg_replace('/(\w\w)\./', '$1', $date); 1182c685d76SGreg Roach // "CIR" => "ABT" 1192c685d76SGreg Roach $date = str_replace(' CIR ', ' ABT ', $date); 1202c685d76SGreg Roach $date = str_replace(' APX ', ' ABT ', $date); 1212c685d76SGreg Roach // B.C. => BC (temporarily, to allow easier handling of ".") 1222c685d76SGreg Roach $date = str_replace(' B.C. ', ' BC ', $date); 1232c685d76SGreg Roach // TMG uses "EITHER X OR Y" 1242c685d76SGreg Roach $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date); 1252c685d76SGreg Roach // "BET X - Y " => "BET X AND Y" 1262c685d76SGreg Roach $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date); 1272c685d76SGreg Roach $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date); 1282c685d76SGreg Roach // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y" 1292c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date); 1302c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date); 1312c685d76SGreg Roach // "@#ESC@ AFT X" => "AFT @#ESC@ X" 1322c685d76SGreg Roach $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date); 1332c685d76SGreg Roach // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900" 1342c685d76SGreg Roach // (don't change "/" - it is used in NS/OS dates) 1352c685d76SGreg Roach $date = preg_replace('/[.,:;-]/', ' ', $date); 1362c685d76SGreg Roach // BC => B.C. 1372c685d76SGreg Roach $date = str_replace(' BC ', ' B.C. ', $date); 1382c685d76SGreg Roach // Append the "INT" text 1392c685d76SGreg Roach $data = $date . $text; 1402c685d76SGreg Roach break; 1412c685d76SGreg Roach case 'HEAD': 14235e7ad0cSGreg Roach case 'TRLR': 14335e7ad0cSGreg Roach // HEAD and TRLR records do not have an XREF or DATA 1442c685d76SGreg Roach if ($level === '0') { 1452c685d76SGreg Roach $xref = ''; 1462c685d76SGreg Roach $data = ''; 1472c685d76SGreg Roach } 1482c685d76SGreg Roach break; 1492c685d76SGreg Roach case 'NAME': 1502c685d76SGreg Roach // Tidy up non-printing characters 1512c685d76SGreg Roach $data = preg_replace('/ +/', ' ', trim($data)); 1522c685d76SGreg Roach break; 1532c685d76SGreg Roach case 'PLAC': 1542c685d76SGreg Roach // Consistent commas 1552c685d76SGreg Roach $data = preg_replace('/ *[,,،] */u', ', ', $data); 1562c685d76SGreg Roach // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W 157ef475b14SGreg Roach if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match) === 1) { 158ef475b14SGreg Roach $degns = (int) $match[2]; 159ef475b14SGreg Roach $minns = (int) $match[3]; 160ef475b14SGreg Roach $secns = (int) $match[4]; 161ef475b14SGreg Roach $degew = (int) $match[6]; 162ef475b14SGreg Roach $minew = (int) $match[7]; 163ef475b14SGreg Roach $secew = (int) $match[8]; 1642c685d76SGreg Roach $data = 1652c685d76SGreg Roach $match[1] . "\n" . 166ef475b14SGreg Roach (1 + (int) $level) . " MAP\n" . 167ef475b14SGreg Roach (2 + (int) $level) . ' LATI ' . ($match[5] . round($degns + $minns / 60 + $secns / 3600, 4)) . "\n" . 168ef475b14SGreg Roach (2 + (int) $level) . ' LONG ' . ($match[9] . round($degew + $minew / 60 + $secew / 3600, 4)); 1692c685d76SGreg Roach } 1702c685d76SGreg Roach break; 1712c685d76SGreg Roach case 'SEX': 1722c685d76SGreg Roach $data = strtoupper($data); 1732c685d76SGreg Roach break; 1742c685d76SGreg Roach } 1752c685d76SGreg Roach // Suppress "Y", for facts/events with a DATE or PLAC 1762c685d76SGreg Roach if ($data === 'y') { 1772c685d76SGreg Roach $data = 'Y'; 1782c685d76SGreg Roach } 1792c685d76SGreg Roach if ($level === '1' && $data === 'Y') { 1802c685d76SGreg Roach for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) { 1812c685d76SGreg Roach if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') { 1822c685d76SGreg Roach $data = ''; 1832c685d76SGreg Roach break; 1842c685d76SGreg Roach } 1852c685d76SGreg Roach } 1862c685d76SGreg Roach } 1872c685d76SGreg Roach // Reassemble components back into a single line 1882c685d76SGreg Roach switch ($tag) { 1892c685d76SGreg Roach default: 1902c685d76SGreg Roach // Remove tabs and multiple/leading/trailing spaces 1912c685d76SGreg Roach $data = strtr($data, ["\t" => ' ']); 1922c685d76SGreg Roach $data = trim($data, ' '); 1932c685d76SGreg Roach while (str_contains($data, ' ')) { 1942c685d76SGreg Roach $data = strtr($data, [' ' => ' ']); 1952c685d76SGreg Roach } 1962c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 1972c685d76SGreg Roach break; 1982c685d76SGreg Roach case 'NOTE': 1992c685d76SGreg Roach case 'TEXT': 2002c685d76SGreg Roach case 'DATA': 2012c685d76SGreg Roach case 'CONT': 2022c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 2032c685d76SGreg Roach break; 2042c685d76SGreg Roach case 'FILE': 2052c685d76SGreg Roach // Strip off the user-defined path prefix 2062c685d76SGreg Roach $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH'); 2072c685d76SGreg Roach if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) { 2082c685d76SGreg Roach $data = substr($data, strlen($GEDCOM_MEDIA_PATH)); 2092c685d76SGreg Roach } 2102c685d76SGreg Roach // convert backslashes in filenames to forward slashes 2112c685d76SGreg Roach $data = preg_replace("/\\\\/", '/', $data); 2122c685d76SGreg Roach 2132c685d76SGreg Roach $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 2142c685d76SGreg Roach break; 2152c685d76SGreg Roach case 'CONC': 2162c685d76SGreg Roach // Merge CONC lines, to simplify access later on. 2172c685d76SGreg Roach $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data; 2182c685d76SGreg Roach break; 2192c685d76SGreg Roach } 2202c685d76SGreg Roach } 2212c685d76SGreg Roach 2222c685d76SGreg Roach return $newrec; 2232c685d76SGreg Roach } 2242c685d76SGreg Roach 2252c685d76SGreg Roach /** 2262c685d76SGreg Roach * import record into database 2272c685d76SGreg Roach * this function will parse the given gedcom record and add it to the database 2282c685d76SGreg Roach * 2292c685d76SGreg Roach * @param string $gedrec the raw gedcom record to parse 2302c685d76SGreg Roach * @param Tree $tree import the record into this tree 2312c685d76SGreg Roach * @param bool $update whether this is an updated record that has been accepted 2322c685d76SGreg Roach * 2332c685d76SGreg Roach * @return void 2342c685d76SGreg Roach * @throws GedcomErrorException 2352c685d76SGreg Roach */ 2362c685d76SGreg Roach public function importRecord(string $gedrec, Tree $tree, bool $update): void 2372c685d76SGreg Roach { 2382c685d76SGreg Roach $tree_id = $tree->id(); 2392c685d76SGreg Roach 2402c685d76SGreg Roach // Escaped @ signs (only if importing from file) 2412c685d76SGreg Roach if (!$update) { 2422c685d76SGreg Roach $gedrec = str_replace('@@', '@', $gedrec); 2432c685d76SGreg Roach } 2442c685d76SGreg Roach 2452c685d76SGreg Roach // Standardise gedcom format 2462c685d76SGreg Roach $gedrec = $this->reformatRecord($gedrec, $tree); 2472c685d76SGreg Roach 2482c685d76SGreg Roach // import different types of records 2492c685d76SGreg Roach if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 2502c685d76SGreg Roach [, $xref, $type] = $match; 2513793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 HEAD')) { 2523793e425SGreg Roach $type = 'HEAD'; 2533793e425SGreg Roach $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF. 2543793e425SGreg Roach } elseif (str_starts_with($gedrec, '0 TRLR')) { 2553793e425SGreg Roach $tree->setPreference('imported', '1'); 2563793e425SGreg Roach $type = 'TRLR'; 2573793e425SGreg Roach $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF. 2586bd4d63fSGreg Roach } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) { 2596bd4d63fSGreg Roach $this->importLegacyPlacDefn($gedrec); 2606bd4d63fSGreg Roach 2616bd4d63fSGreg Roach return; 2626bd4d63fSGreg Roach } elseif (str_starts_with($gedrec, '0 _PLAC ')) { 2636bd4d63fSGreg Roach $this->importTNGPlac($gedrec); 2646bd4d63fSGreg Roach 2656bd4d63fSGreg Roach return; 2662c685d76SGreg Roach } else { 267356588a1SGreg Roach foreach (Gedcom::CUSTOM_RECORDS_WITHOUT_XREFS as $record_type) { 268356588a1SGreg Roach if (preg_match('/^0 ' . $record_type . '\b/', $gedrec) === 1) { 269356588a1SGreg Roach return; 270356588a1SGreg Roach } 271356588a1SGreg Roach } 272356588a1SGreg Roach 2732c685d76SGreg Roach throw new GedcomErrorException($gedrec); 2742c685d76SGreg Roach } 2752c685d76SGreg Roach 2762c685d76SGreg Roach // Add a _UID 2772c685d76SGreg Roach if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { 2782c685d76SGreg Roach $element = Registry::elementFactory()->make($type . ':_UID'); 2792c685d76SGreg Roach if (!$element instanceof UnknownElement) { 2802c685d76SGreg Roach $gedrec .= "\n1 _UID " . $element->default($tree); 2812c685d76SGreg Roach } 2822c685d76SGreg Roach } 2832c685d76SGreg Roach 2842c685d76SGreg Roach // If the user has downloaded their GEDCOM data (containing media objects) and edited it 2852c685d76SGreg Roach // using an application which does not support (and deletes) media objects, then add them 2862c685d76SGreg Roach // back in. 287b6ec1ccfSGreg Roach if ($tree->getPreference('keep_media') === '1') { 2882c685d76SGreg Roach $old_linked_media = DB::table('link') 2892c685d76SGreg Roach ->where('l_from', '=', $xref) 2902c685d76SGreg Roach ->where('l_file', '=', $tree_id) 2912c685d76SGreg Roach ->where('l_type', '=', 'OBJE') 2922c685d76SGreg Roach ->pluck('l_to'); 2932c685d76SGreg Roach 2942c685d76SGreg Roach // Delete these links - so that we do not insert them again in updateLinks() 2952c685d76SGreg Roach DB::table('link') 2962c685d76SGreg Roach ->where('l_from', '=', $xref) 2972c685d76SGreg Roach ->where('l_file', '=', $tree_id) 2982c685d76SGreg Roach ->where('l_type', '=', 'OBJE') 2992c685d76SGreg Roach ->delete(); 3002c685d76SGreg Roach 3012c685d76SGreg Roach foreach ($old_linked_media as $media_id) { 3022c685d76SGreg Roach $gedrec .= "\n1 OBJE @" . $media_id . '@'; 3032c685d76SGreg Roach } 3042c685d76SGreg Roach } 3052c685d76SGreg Roach 3062c685d76SGreg Roach // Convert inline media into media objects 3072c685d76SGreg Roach $gedrec = $this->convertInlineMedia($tree, $gedrec); 3082c685d76SGreg Roach 3092c685d76SGreg Roach switch ($type) { 3102c685d76SGreg Roach case Individual::RECORD_TYPE: 3112c685d76SGreg Roach $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); 3122c685d76SGreg Roach 3132c685d76SGreg Roach if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { 3142c685d76SGreg Roach $rin = $match[1]; 3152c685d76SGreg Roach } else { 3162c685d76SGreg Roach $rin = $xref; 3172c685d76SGreg Roach } 3182c685d76SGreg Roach 319b4ec8324SGreg Roach // The database can only store MFU, and many of the stats queries assume this. 320b4ec8324SGreg Roach $sex = $record->sex(); 321b4ec8324SGreg Roach $sex = $sex === 'M' || $sex === 'F' ? $sex : 'U'; 322b4ec8324SGreg Roach 3232c685d76SGreg Roach DB::table('individuals')->insert([ 3242c685d76SGreg Roach 'i_id' => $xref, 3252c685d76SGreg Roach 'i_file' => $tree_id, 3262c685d76SGreg Roach 'i_rin' => $rin, 327b4ec8324SGreg Roach 'i_sex' => $sex, 3282c685d76SGreg Roach 'i_gedcom' => $gedrec, 3292c685d76SGreg Roach ]); 3302c685d76SGreg Roach 3312c685d76SGreg Roach // Update the cross-reference/index tables. 3322c685d76SGreg Roach $this->updatePlaces($xref, $tree, $gedrec); 3332c685d76SGreg Roach $this->updateDates($xref, $tree_id, $gedrec); 3342c685d76SGreg Roach $this->updateNames($xref, $tree_id, $record); 3352c685d76SGreg Roach break; 3362c685d76SGreg Roach 3372c685d76SGreg Roach case Family::RECORD_TYPE: 3382c685d76SGreg Roach if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 3392c685d76SGreg Roach $husb = $match[1]; 3402c685d76SGreg Roach } else { 3412c685d76SGreg Roach $husb = ''; 3422c685d76SGreg Roach } 3432c685d76SGreg Roach if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 3442c685d76SGreg Roach $wife = $match[1]; 3452c685d76SGreg Roach } else { 3462c685d76SGreg Roach $wife = ''; 3472c685d76SGreg Roach } 3482c685d76SGreg Roach $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); 3492c685d76SGreg Roach if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { 3502c685d76SGreg Roach $nchi = max($nchi, $match[1]); 3512c685d76SGreg Roach } 3522c685d76SGreg Roach 3532c685d76SGreg Roach DB::table('families')->insert([ 3542c685d76SGreg Roach 'f_id' => $xref, 3552c685d76SGreg Roach 'f_file' => $tree_id, 3562c685d76SGreg Roach 'f_husb' => $husb, 3572c685d76SGreg Roach 'f_wife' => $wife, 3582c685d76SGreg Roach 'f_gedcom' => $gedrec, 3592c685d76SGreg Roach 'f_numchil' => $nchi, 3602c685d76SGreg Roach ]); 3612c685d76SGreg Roach 3622c685d76SGreg Roach // Update the cross-reference/index tables. 3632c685d76SGreg Roach $this->updatePlaces($xref, $tree, $gedrec); 3642c685d76SGreg Roach $this->updateDates($xref, $tree_id, $gedrec); 3652c685d76SGreg Roach break; 3662c685d76SGreg Roach 3672c685d76SGreg Roach case Source::RECORD_TYPE: 3682c685d76SGreg Roach if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { 3692c685d76SGreg Roach $name = $match[1]; 3702c685d76SGreg Roach } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { 3712c685d76SGreg Roach $name = $match[1]; 3722c685d76SGreg Roach } else { 3732c685d76SGreg Roach $name = $xref; 3742c685d76SGreg Roach } 3752c685d76SGreg Roach 3762c685d76SGreg Roach DB::table('sources')->insert([ 3772c685d76SGreg Roach 's_id' => $xref, 3782c685d76SGreg Roach 's_file' => $tree_id, 3792c685d76SGreg Roach 's_name' => mb_substr($name, 0, 255), 3802c685d76SGreg Roach 's_gedcom' => $gedrec, 3812c685d76SGreg Roach ]); 3822c685d76SGreg Roach break; 3832c685d76SGreg Roach 3842c685d76SGreg Roach case Repository::RECORD_TYPE: 3852c685d76SGreg Roach case Note::RECORD_TYPE: 3862c685d76SGreg Roach case Submission::RECORD_TYPE: 3872c685d76SGreg Roach case Submitter::RECORD_TYPE: 3882c685d76SGreg Roach case Location::RECORD_TYPE: 3892c685d76SGreg Roach DB::table('other')->insert([ 3902c685d76SGreg Roach 'o_id' => $xref, 3912c685d76SGreg Roach 'o_file' => $tree_id, 3922c685d76SGreg Roach 'o_type' => $type, 3932c685d76SGreg Roach 'o_gedcom' => $gedrec, 3942c685d76SGreg Roach ]); 3952c685d76SGreg Roach break; 3962c685d76SGreg Roach 3972c685d76SGreg Roach case Header::RECORD_TYPE: 3982c685d76SGreg Roach // Force HEAD records to have a creation date. 3992c685d76SGreg Roach if (!str_contains($gedrec, "\n1 DATE ")) { 4002c685d76SGreg Roach $today = strtoupper(date('d M Y')); 4012c685d76SGreg Roach $gedrec .= "\n1 DATE " . $today; 4022c685d76SGreg Roach } 4032c685d76SGreg Roach 4042c685d76SGreg Roach DB::table('other')->insert([ 4052c685d76SGreg Roach 'o_id' => $xref, 4062c685d76SGreg Roach 'o_file' => $tree_id, 4072c685d76SGreg Roach 'o_type' => Header::RECORD_TYPE, 4082c685d76SGreg Roach 'o_gedcom' => $gedrec, 4092c685d76SGreg Roach ]); 4102c685d76SGreg Roach break; 4112c685d76SGreg Roach 4122c685d76SGreg Roach case Media::RECORD_TYPE: 4132c685d76SGreg Roach $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); 4142c685d76SGreg Roach 4152c685d76SGreg Roach DB::table('media')->insert([ 4162c685d76SGreg Roach 'm_id' => $xref, 4172c685d76SGreg Roach 'm_file' => $tree_id, 4182c685d76SGreg Roach 'm_gedcom' => $gedrec, 4192c685d76SGreg Roach ]); 4202c685d76SGreg Roach 4212c685d76SGreg Roach foreach ($record->mediaFiles() as $media_file) { 4222c685d76SGreg Roach DB::table('media_file')->insert([ 4232c685d76SGreg Roach 'm_id' => $xref, 4242c685d76SGreg Roach 'm_file' => $tree_id, 4252c685d76SGreg Roach 'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), 4262c685d76SGreg Roach 'multimedia_format' => mb_substr($media_file->format(), 0, 4), 4272c685d76SGreg Roach 'source_media_type' => mb_substr($media_file->type(), 0, 15), 4282c685d76SGreg Roach 'descriptive_title' => mb_substr($media_file->title(), 0, 248), 4292c685d76SGreg Roach ]); 4302c685d76SGreg Roach } 4312c685d76SGreg Roach break; 4322c685d76SGreg Roach 4332c685d76SGreg Roach default: // Custom record types. 4342c685d76SGreg Roach DB::table('other')->insert([ 4352c685d76SGreg Roach 'o_id' => $xref, 4362c685d76SGreg Roach 'o_file' => $tree_id, 4372c685d76SGreg Roach 'o_type' => mb_substr($type, 0, 15), 4382c685d76SGreg Roach 'o_gedcom' => $gedrec, 4392c685d76SGreg Roach ]); 4402c685d76SGreg Roach break; 4412c685d76SGreg Roach } 4422c685d76SGreg Roach 4432c685d76SGreg Roach // Update the cross-reference/index tables. 4442c685d76SGreg Roach $this->updateLinks($xref, $tree_id, $gedrec); 4452c685d76SGreg Roach } 4462c685d76SGreg Roach 4472c685d76SGreg Roach /** 4482c685d76SGreg Roach * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values 4492c685d76SGreg Roach * 4502c685d76SGreg Roach * @param string $gedcom 4512c685d76SGreg Roach */ 4522c685d76SGreg Roach private function importLegacyPlacDefn(string $gedcom): void 4532c685d76SGreg Roach { 4542c685d76SGreg Roach $gedcom_service = new GedcomService(); 4552c685d76SGreg Roach 4562c685d76SGreg Roach if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { 4572c685d76SGreg Roach $place_name = $match[1]; 4582c685d76SGreg Roach } else { 4592c685d76SGreg Roach return; 4602c685d76SGreg Roach } 4612c685d76SGreg Roach 4622c685d76SGreg Roach if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { 4632c685d76SGreg Roach $latitude = $gedcom_service->readLatitude($match[1]); 4642c685d76SGreg Roach } else { 4652c685d76SGreg Roach return; 4662c685d76SGreg Roach } 4672c685d76SGreg Roach 4682c685d76SGreg Roach if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { 4692c685d76SGreg Roach $longitude = $gedcom_service->readLongitude($match[1]); 4702c685d76SGreg Roach } else { 4712c685d76SGreg Roach return; 4722c685d76SGreg Roach } 4732c685d76SGreg Roach 4742c685d76SGreg Roach $location = new PlaceLocation($place_name); 4752c685d76SGreg Roach 4762c685d76SGreg Roach if ($location->latitude() === null && $location->longitude() === null) { 4772c685d76SGreg Roach DB::table('place_location') 4782c685d76SGreg Roach ->where('id', '=', $location->id()) 4792c685d76SGreg Roach ->update([ 4802c685d76SGreg Roach 'latitude' => $latitude, 4812c685d76SGreg Roach 'longitude' => $longitude, 4822c685d76SGreg Roach ]); 4832c685d76SGreg Roach } 4842c685d76SGreg Roach } 4852c685d76SGreg Roach 4862c685d76SGreg Roach /** 4872c685d76SGreg Roach * Legacy Family Tree software generates _PLAC records containing LAT/LONG values 4882c685d76SGreg Roach * 4892c685d76SGreg Roach * @param string $gedcom 4902c685d76SGreg Roach */ 4912c685d76SGreg Roach private function importTNGPlac(string $gedcom): void 4922c685d76SGreg Roach { 4932c685d76SGreg Roach if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { 4942c685d76SGreg Roach $place_name = $match[1]; 4952c685d76SGreg Roach } else { 4962c685d76SGreg Roach return; 4972c685d76SGreg Roach } 4982c685d76SGreg Roach 4992c685d76SGreg Roach if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { 5002c685d76SGreg Roach $latitude = (float) $match[1]; 5012c685d76SGreg Roach } else { 5022c685d76SGreg Roach return; 5032c685d76SGreg Roach } 5042c685d76SGreg Roach 5052c685d76SGreg Roach if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { 5062c685d76SGreg Roach $longitude = (float) $match[1]; 5072c685d76SGreg Roach } else { 5082c685d76SGreg Roach return; 5092c685d76SGreg Roach } 5102c685d76SGreg Roach 5112c685d76SGreg Roach $location = new PlaceLocation($place_name); 5122c685d76SGreg Roach 5132c685d76SGreg Roach if ($location->latitude() === null && $location->longitude() === null) { 5142c685d76SGreg Roach DB::table('place_location') 5152c685d76SGreg Roach ->where('id', '=', $location->id()) 5162c685d76SGreg Roach ->update([ 5172c685d76SGreg Roach 'latitude' => $latitude, 5182c685d76SGreg Roach 'longitude' => $longitude, 5192c685d76SGreg Roach ]); 5202c685d76SGreg Roach } 5212c685d76SGreg Roach } 5222c685d76SGreg Roach 5232c685d76SGreg Roach /** 5242c685d76SGreg Roach * Extract all level 2 places from the given record and insert them into the places table 5252c685d76SGreg Roach * 5262c685d76SGreg Roach * @param string $xref 5272c685d76SGreg Roach * @param Tree $tree 5282c685d76SGreg Roach * @param string $gedrec 5292c685d76SGreg Roach * 5302c685d76SGreg Roach * @return void 5312c685d76SGreg Roach */ 5322c685d76SGreg Roach public function updatePlaces(string $xref, Tree $tree, string $gedrec): void 5332c685d76SGreg Roach { 5342c685d76SGreg Roach // Insert all new rows together 5352c685d76SGreg Roach $rows = []; 5362c685d76SGreg Roach 5372c685d76SGreg Roach preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); 5382c685d76SGreg Roach 5392c685d76SGreg Roach $places = array_unique($matches[1]); 5402c685d76SGreg Roach 5412c685d76SGreg Roach foreach ($places as $place_name) { 5422c685d76SGreg Roach $place = new Place($place_name, $tree); 5432c685d76SGreg Roach 5442c685d76SGreg Roach // Calling Place::id() will create the entry in the database, if it doesn't already exist. 5452c685d76SGreg Roach while ($place->id() !== 0) { 5462c685d76SGreg Roach $rows[] = [ 5472c685d76SGreg Roach 'pl_p_id' => $place->id(), 5482c685d76SGreg Roach 'pl_gid' => $xref, 5492c685d76SGreg Roach 'pl_file' => $tree->id(), 5502c685d76SGreg Roach ]; 5512c685d76SGreg Roach 5522c685d76SGreg Roach $place = $place->parent(); 5532c685d76SGreg Roach } 5542c685d76SGreg Roach } 5552c685d76SGreg Roach 5562c685d76SGreg Roach // array_unique doesn't work with arrays of arrays 5572c685d76SGreg Roach $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 5582c685d76SGreg Roach 5592c685d76SGreg Roach // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. 5602c685d76SGreg Roach foreach (array_chunk($rows, 20000) as $chunk) { 5612c685d76SGreg Roach DB::table('placelinks')->insert($chunk); 5622c685d76SGreg Roach } 5632c685d76SGreg Roach } 5642c685d76SGreg Roach 5652c685d76SGreg Roach /** 5662c685d76SGreg Roach * Extract all the dates from the given record and insert them into the database. 5672c685d76SGreg Roach * 5682c685d76SGreg Roach * @param string $xref 5692c685d76SGreg Roach * @param int $ged_id 5702c685d76SGreg Roach * @param string $gedrec 5712c685d76SGreg Roach * 5722c685d76SGreg Roach * @return void 5732c685d76SGreg Roach */ 5742c685d76SGreg Roach private function updateDates(string $xref, int $ged_id, string $gedrec): void 5752c685d76SGreg Roach { 5762c685d76SGreg Roach // Insert all new rows together 5772c685d76SGreg Roach $rows = []; 5782c685d76SGreg Roach 5792c685d76SGreg Roach preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER); 5802c685d76SGreg Roach 5812c685d76SGreg Roach foreach ($matches as $match) { 5822c685d76SGreg Roach $fact = $match[1]; 5832c685d76SGreg Roach $date = new Date($match[2]); 5842c685d76SGreg Roach $rows[] = [ 5852c685d76SGreg Roach 'd_day' => $date->minimumDate()->day, 5862c685d76SGreg Roach 'd_month' => $date->minimumDate()->format('%O'), 5872c685d76SGreg Roach 'd_mon' => $date->minimumDate()->month, 5882c685d76SGreg Roach 'd_year' => $date->minimumDate()->year, 5892c685d76SGreg Roach 'd_julianday1' => $date->minimumDate()->minimumJulianDay(), 5902c685d76SGreg Roach 'd_julianday2' => $date->minimumDate()->maximumJulianDay(), 5912c685d76SGreg Roach 'd_fact' => $fact, 5922c685d76SGreg Roach 'd_gid' => $xref, 5932c685d76SGreg Roach 'd_file' => $ged_id, 5942c685d76SGreg Roach 'd_type' => $date->minimumDate()->format('%@'), 5952c685d76SGreg Roach ]; 5962c685d76SGreg Roach 5972c685d76SGreg Roach $rows[] = [ 5982c685d76SGreg Roach 'd_day' => $date->maximumDate()->day, 5992c685d76SGreg Roach 'd_month' => $date->maximumDate()->format('%O'), 6002c685d76SGreg Roach 'd_mon' => $date->maximumDate()->month, 6012c685d76SGreg Roach 'd_year' => $date->maximumDate()->year, 6022c685d76SGreg Roach 'd_julianday1' => $date->maximumDate()->minimumJulianDay(), 6032c685d76SGreg Roach 'd_julianday2' => $date->maximumDate()->maximumJulianDay(), 6042c685d76SGreg Roach 'd_fact' => $fact, 6052c685d76SGreg Roach 'd_gid' => $xref, 6062c685d76SGreg Roach 'd_file' => $ged_id, 6072c685d76SGreg Roach 'd_type' => $date->minimumDate()->format('%@'), 6082c685d76SGreg Roach ]; 6092c685d76SGreg Roach } 6102c685d76SGreg Roach 6112c685d76SGreg Roach // array_unique doesn't work with arrays of arrays 6122c685d76SGreg Roach $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 6132c685d76SGreg Roach 6142c685d76SGreg Roach DB::table('dates')->insert($rows); 6152c685d76SGreg Roach } 6162c685d76SGreg Roach 6172c685d76SGreg Roach /** 6182c685d76SGreg Roach * Extract all the links from the given record and insert them into the database 6192c685d76SGreg Roach * 6202c685d76SGreg Roach * @param string $xref 6212c685d76SGreg Roach * @param int $ged_id 6222c685d76SGreg Roach * @param string $gedrec 6232c685d76SGreg Roach * 6242c685d76SGreg Roach * @return void 6252c685d76SGreg Roach */ 6262c685d76SGreg Roach private function updateLinks(string $xref, int $ged_id, string $gedrec): void 6272c685d76SGreg Roach { 6282c685d76SGreg Roach // Insert all new rows together 6292c685d76SGreg Roach $rows = []; 6302c685d76SGreg Roach 6312c685d76SGreg Roach preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER); 6322c685d76SGreg Roach 6332c685d76SGreg Roach foreach ($matches as $match) { 6343793e425SGreg Roach // Some applications (e.g. GenoPro) create links longer than 15 characters. 635f6bc4f93SGreg Roach $link = mb_substr($match[1], 0, 15); 6363793e425SGreg Roach 6372c685d76SGreg Roach // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@" 6383793e425SGreg Roach $rows[$link . strtoupper($match[2])] = [ 6392c685d76SGreg Roach 'l_from' => $xref, 6402c685d76SGreg Roach 'l_to' => $match[2], 6413793e425SGreg Roach 'l_type' => $link, 6422c685d76SGreg Roach 'l_file' => $ged_id, 6432c685d76SGreg Roach ]; 6442c685d76SGreg Roach } 6452c685d76SGreg Roach 6462c685d76SGreg Roach DB::table('link')->insert($rows); 6472c685d76SGreg Roach } 6482c685d76SGreg Roach 6492c685d76SGreg Roach /** 6502c685d76SGreg Roach * Extract all the names from the given record and insert them into the database. 6512c685d76SGreg Roach * 6522c685d76SGreg Roach * @param string $xref 6532c685d76SGreg Roach * @param int $ged_id 6542c685d76SGreg Roach * @param Individual $record 6552c685d76SGreg Roach * 6562c685d76SGreg Roach * @return void 6572c685d76SGreg Roach */ 6582c685d76SGreg Roach private function updateNames(string $xref, int $ged_id, Individual $record): void 6592c685d76SGreg Roach { 6602c685d76SGreg Roach // Insert all new rows together 6612c685d76SGreg Roach $rows = []; 6622c685d76SGreg Roach 6632c685d76SGreg Roach foreach ($record->getAllNames() as $n => $name) { 6642c685d76SGreg Roach if ($name['givn'] === Individual::PRAENOMEN_NESCIO) { 6652c685d76SGreg Roach $soundex_givn_std = null; 6662c685d76SGreg Roach $soundex_givn_dm = null; 6672c685d76SGreg Roach } else { 6682c685d76SGreg Roach $soundex_givn_std = Soundex::russell($name['givn']); 6692c685d76SGreg Roach $soundex_givn_dm = Soundex::daitchMokotoff($name['givn']); 6702c685d76SGreg Roach } 6712c685d76SGreg Roach 6722c685d76SGreg Roach if ($name['surn'] === Individual::NOMEN_NESCIO) { 6732c685d76SGreg Roach $soundex_surn_std = null; 6742c685d76SGreg Roach $soundex_surn_dm = null; 6752c685d76SGreg Roach } else { 6762c685d76SGreg Roach $soundex_surn_std = Soundex::russell($name['surname']); 6772c685d76SGreg Roach $soundex_surn_dm = Soundex::daitchMokotoff($name['surname']); 6782c685d76SGreg Roach } 6792c685d76SGreg Roach 6802c685d76SGreg Roach $rows[] = [ 6812c685d76SGreg Roach 'n_file' => $ged_id, 6822c685d76SGreg Roach 'n_id' => $xref, 6832c685d76SGreg Roach 'n_num' => $n, 6842c685d76SGreg Roach 'n_type' => $name['type'], 6852c685d76SGreg Roach 'n_sort' => mb_substr($name['sort'], 0, 255), 6862c685d76SGreg Roach 'n_full' => mb_substr($name['fullNN'], 0, 255), 6872c685d76SGreg Roach 'n_surname' => mb_substr($name['surname'], 0, 255), 6882c685d76SGreg Roach 'n_surn' => mb_substr($name['surn'], 0, 255), 6892c685d76SGreg Roach 'n_givn' => mb_substr($name['givn'], 0, 255), 6902c685d76SGreg Roach 'n_soundex_givn_std' => $soundex_givn_std, 6912c685d76SGreg Roach 'n_soundex_surn_std' => $soundex_surn_std, 6922c685d76SGreg Roach 'n_soundex_givn_dm' => $soundex_givn_dm, 6932c685d76SGreg Roach 'n_soundex_surn_dm' => $soundex_surn_dm, 6942c685d76SGreg Roach ]; 6952c685d76SGreg Roach } 6962c685d76SGreg Roach 6972c685d76SGreg Roach DB::table('name')->insert($rows); 6982c685d76SGreg Roach } 6992c685d76SGreg Roach 7002c685d76SGreg Roach /** 7012c685d76SGreg Roach * Extract inline media data, and convert to media objects. 7022c685d76SGreg Roach * 7032c685d76SGreg Roach * @param Tree $tree 7042c685d76SGreg Roach * @param string $gedcom 7052c685d76SGreg Roach * 7062c685d76SGreg Roach * @return string 7072c685d76SGreg Roach */ 7082c685d76SGreg Roach private function convertInlineMedia(Tree $tree, string $gedcom): string 7092c685d76SGreg Roach { 7102c685d76SGreg Roach while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) { 7112c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7122c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n1 OBJE @" . $xref . '@']); 7132c685d76SGreg Roach } 7142c685d76SGreg Roach while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) { 7152c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7162c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n2 OBJE @" . $xref . '@']); 7172c685d76SGreg Roach } 7182c685d76SGreg Roach while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) { 7192c685d76SGreg Roach $xref = $this->createMediaObject($match[0], $tree); 7202c685d76SGreg Roach $gedcom = strtr($gedcom, [$match[0] => "\n3 OBJE @" . $xref . '@']); 7212c685d76SGreg Roach } 7222c685d76SGreg Roach 7232c685d76SGreg Roach return $gedcom; 7242c685d76SGreg Roach } 7252c685d76SGreg Roach 7262c685d76SGreg Roach /** 7272c685d76SGreg Roach * Create a new media object, from inline media data. 7282c685d76SGreg Roach * 7292c685d76SGreg Roach * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL 7302c685d76SGreg Roach * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL 7312c685d76SGreg Roach * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI 7322c685d76SGreg Roach * 7332c685d76SGreg Roach * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE 7342c685d76SGreg Roach * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL 7352c685d76SGreg Roach * 7362c685d76SGreg Roach * @param string $gedcom 7372c685d76SGreg Roach * @param Tree $tree 7382c685d76SGreg Roach * 7392c685d76SGreg Roach * @return string 7402c685d76SGreg Roach */ 7412c685d76SGreg Roach private function createMediaObject(string $gedcom, Tree $tree): string 7422c685d76SGreg Roach { 7432c685d76SGreg Roach preg_match('/\n\d FILE (.+)/', $gedcom, $match); 7442c685d76SGreg Roach $file = $match[1] ?? ''; 7452c685d76SGreg Roach 7462c685d76SGreg Roach preg_match('/\n\d TITL (.+)/', $gedcom, $match); 7472c685d76SGreg Roach $title = $match[1] ?? ''; 7482c685d76SGreg Roach 7492c685d76SGreg Roach preg_match('/\n\d FORM (.+)/', $gedcom, $match); 7502c685d76SGreg Roach $format = $match[1] ?? ''; 7512c685d76SGreg Roach 7522c685d76SGreg Roach preg_match('/\n\d MEDI (.+)/', $gedcom, $match); 7532c685d76SGreg Roach $media = $match[1] ?? ''; 7542c685d76SGreg Roach 7552c685d76SGreg Roach preg_match('/\n\d _SCBK (.+)/', $gedcom, $match); 7562c685d76SGreg Roach $scrapbook = $match[1] ?? ''; 7572c685d76SGreg Roach 7582c685d76SGreg Roach preg_match('/\n\d _PRIM (.+)/', $gedcom, $match); 7592c685d76SGreg Roach $primary = $match[1] ?? ''; 7602c685d76SGreg Roach 7612c685d76SGreg Roach preg_match('/\n\d _TYPE (.+)/', $gedcom, $match); 7622c685d76SGreg Roach if ($media === '') { 7632c685d76SGreg Roach // Legacy uses _TYPE instead of MEDI 7642c685d76SGreg Roach $media = $match[1] ?? ''; 7652c685d76SGreg Roach $type = ''; 7662c685d76SGreg Roach } else { 7672c685d76SGreg Roach $type = $match[1] ?? ''; 7682c685d76SGreg Roach } 7692c685d76SGreg Roach 7702c685d76SGreg Roach preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches); 771*f0776139SGreg Roach $notes = $matches[1]; 7722c685d76SGreg Roach 7732c685d76SGreg Roach // Have we already created a media object with the same title/filename? 7742c685d76SGreg Roach $xref = DB::table('media_file') 7752c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 7762c685d76SGreg Roach ->where('descriptive_title', '=', mb_substr($title, 0, 248)) 7772c685d76SGreg Roach ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248)) 7782c685d76SGreg Roach ->value('m_id'); 7792c685d76SGreg Roach 7802c685d76SGreg Roach if ($xref === null) { 7812c685d76SGreg Roach $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE); 7822c685d76SGreg Roach 7832c685d76SGreg Roach // convert to a media-object 7842c685d76SGreg Roach $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file; 7852c685d76SGreg Roach 7862c685d76SGreg Roach if ($format !== '') { 7872c685d76SGreg Roach $gedcom .= "\n2 FORM " . $format; 7882c685d76SGreg Roach 7892c685d76SGreg Roach if ($media !== '') { 7902c685d76SGreg Roach $gedcom .= "\n3 TYPE " . $media; 7912c685d76SGreg Roach } 7922c685d76SGreg Roach } 7932c685d76SGreg Roach 7942c685d76SGreg Roach if ($title !== '') { 7958a45bddeSGreg Roach $gedcom .= "\n2 TITL " . $title; 7962c685d76SGreg Roach } 7972c685d76SGreg Roach 7982c685d76SGreg Roach if ($scrapbook !== '') { 7992c685d76SGreg Roach $gedcom .= "\n1 _SCBK " . $scrapbook; 8002c685d76SGreg Roach } 8012c685d76SGreg Roach 8022c685d76SGreg Roach if ($primary !== '') { 8032c685d76SGreg Roach $gedcom .= "\n1 _PRIM " . $primary; 8042c685d76SGreg Roach } 8052c685d76SGreg Roach 8062c685d76SGreg Roach if ($type !== '') { 8072c685d76SGreg Roach $gedcom .= "\n1 _TYPE " . $type; 8082c685d76SGreg Roach } 8092c685d76SGreg Roach 8102c685d76SGreg Roach foreach ($notes as $note) { 8112c685d76SGreg Roach $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]); 8122c685d76SGreg Roach } 8132c685d76SGreg Roach 8142c685d76SGreg Roach DB::table('media')->insert([ 8152c685d76SGreg Roach 'm_id' => $xref, 8162c685d76SGreg Roach 'm_file' => $tree->id(), 8172c685d76SGreg Roach 'm_gedcom' => $gedcom, 8182c685d76SGreg Roach ]); 8192c685d76SGreg Roach 8202c685d76SGreg Roach DB::table('media_file')->insert([ 8212c685d76SGreg Roach 'm_id' => $xref, 8222c685d76SGreg Roach 'm_file' => $tree->id(), 8232c685d76SGreg Roach 'multimedia_file_refn' => mb_substr($file, 0, 248), 8242c685d76SGreg Roach 'multimedia_format' => mb_substr($format, 0, 4), 8252c685d76SGreg Roach 'source_media_type' => mb_substr($media, 0, 15), 8262c685d76SGreg Roach 'descriptive_title' => mb_substr($title, 0, 248), 8272c685d76SGreg Roach ]); 8282c685d76SGreg Roach } 8292c685d76SGreg Roach 8302c685d76SGreg Roach return $xref; 8312c685d76SGreg Roach } 8322c685d76SGreg Roach 8332c685d76SGreg Roach /** 8342c685d76SGreg Roach * update a record in the database 8352c685d76SGreg Roach * 8362c685d76SGreg Roach * @param string $gedrec 8372c685d76SGreg Roach * @param Tree $tree 8382c685d76SGreg Roach * @param bool $delete 8392c685d76SGreg Roach * 8402c685d76SGreg Roach * @return void 8412c685d76SGreg Roach * @throws GedcomErrorException 8422c685d76SGreg Roach */ 8432c685d76SGreg Roach public function updateRecord(string $gedrec, Tree $tree, bool $delete): void 8442c685d76SGreg Roach { 8452c685d76SGreg Roach if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 8462c685d76SGreg Roach [, $gid, $type] = $match; 8472c685d76SGreg Roach } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) { 8482c685d76SGreg Roach // The HEAD record has no XREF. Any others? 8492c685d76SGreg Roach $gid = $match[1]; 8502c685d76SGreg Roach $type = $match[1]; 8512c685d76SGreg Roach } else { 8522c685d76SGreg Roach throw new GedcomErrorException($gedrec); 8532c685d76SGreg Roach } 8542c685d76SGreg Roach 8552c685d76SGreg Roach // Place links 8562c685d76SGreg Roach DB::table('placelinks') 8572c685d76SGreg Roach ->where('pl_gid', '=', $gid) 8582c685d76SGreg Roach ->where('pl_file', '=', $tree->id()) 8592c685d76SGreg Roach ->delete(); 8602c685d76SGreg Roach 8612c685d76SGreg Roach // Orphaned places. If we're deleting "Westminster, London, England", 8622c685d76SGreg Roach // then we may also need to delete "London, England" and "England". 8632c685d76SGreg Roach do { 8642c685d76SGreg Roach $affected = DB::table('places') 8652c685d76SGreg Roach ->leftJoin('placelinks', function (JoinClause $join): void { 8662c685d76SGreg Roach $join 8672c685d76SGreg Roach ->on('p_id', '=', 'pl_p_id') 8682c685d76SGreg Roach ->on('p_file', '=', 'pl_file'); 8692c685d76SGreg Roach }) 8702c685d76SGreg Roach ->whereNull('pl_p_id') 8712c685d76SGreg Roach ->delete(); 8722c685d76SGreg Roach } while ($affected > 0); 8732c685d76SGreg Roach 8742c685d76SGreg Roach DB::table('dates') 8752c685d76SGreg Roach ->where('d_gid', '=', $gid) 8762c685d76SGreg Roach ->where('d_file', '=', $tree->id()) 8772c685d76SGreg Roach ->delete(); 8782c685d76SGreg Roach 8792c685d76SGreg Roach DB::table('name') 8802c685d76SGreg Roach ->where('n_id', '=', $gid) 8812c685d76SGreg Roach ->where('n_file', '=', $tree->id()) 8822c685d76SGreg Roach ->delete(); 8832c685d76SGreg Roach 8842c685d76SGreg Roach DB::table('link') 8852c685d76SGreg Roach ->where('l_from', '=', $gid) 8862c685d76SGreg Roach ->where('l_file', '=', $tree->id()) 8872c685d76SGreg Roach ->delete(); 8882c685d76SGreg Roach 8892c685d76SGreg Roach switch ($type) { 8902c685d76SGreg Roach case Individual::RECORD_TYPE: 8912c685d76SGreg Roach DB::table('individuals') 8922c685d76SGreg Roach ->where('i_id', '=', $gid) 8932c685d76SGreg Roach ->where('i_file', '=', $tree->id()) 8942c685d76SGreg Roach ->delete(); 8952c685d76SGreg Roach break; 8962c685d76SGreg Roach 8972c685d76SGreg Roach case Family::RECORD_TYPE: 8982c685d76SGreg Roach DB::table('families') 8992c685d76SGreg Roach ->where('f_id', '=', $gid) 9002c685d76SGreg Roach ->where('f_file', '=', $tree->id()) 9012c685d76SGreg Roach ->delete(); 9022c685d76SGreg Roach break; 9032c685d76SGreg Roach 9042c685d76SGreg Roach case Source::RECORD_TYPE: 9052c685d76SGreg Roach DB::table('sources') 9062c685d76SGreg Roach ->where('s_id', '=', $gid) 9072c685d76SGreg Roach ->where('s_file', '=', $tree->id()) 9082c685d76SGreg Roach ->delete(); 9092c685d76SGreg Roach break; 9102c685d76SGreg Roach 9112c685d76SGreg Roach case Media::RECORD_TYPE: 9122c685d76SGreg Roach DB::table('media_file') 9132c685d76SGreg Roach ->where('m_id', '=', $gid) 9142c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 9152c685d76SGreg Roach ->delete(); 9162c685d76SGreg Roach 9172c685d76SGreg Roach DB::table('media') 9182c685d76SGreg Roach ->where('m_id', '=', $gid) 9192c685d76SGreg Roach ->where('m_file', '=', $tree->id()) 9202c685d76SGreg Roach ->delete(); 9212c685d76SGreg Roach break; 9222c685d76SGreg Roach 9232c685d76SGreg Roach default: 9242c685d76SGreg Roach DB::table('other') 9252c685d76SGreg Roach ->where('o_id', '=', $gid) 9262c685d76SGreg Roach ->where('o_file', '=', $tree->id()) 9272c685d76SGreg Roach ->delete(); 9282c685d76SGreg Roach break; 9292c685d76SGreg Roach } 9302c685d76SGreg Roach 9312c685d76SGreg Roach if (!$delete) { 9322c685d76SGreg Roach $this->importRecord($gedrec, $tree, true); 9332c685d76SGreg Roach } 9342c685d76SGreg Roach } 9352c685d76SGreg Roach} 936