1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Date; 23use Fisharebest\Webtrees\DB; 24use Fisharebest\Webtrees\Elements\UnknownElement; 25use Fisharebest\Webtrees\Exceptions\GedcomErrorException; 26use Fisharebest\Webtrees\Family; 27use Fisharebest\Webtrees\Gedcom; 28use Fisharebest\Webtrees\Header; 29use Fisharebest\Webtrees\Individual; 30use Fisharebest\Webtrees\Location; 31use Fisharebest\Webtrees\Media; 32use Fisharebest\Webtrees\Note; 33use Fisharebest\Webtrees\Place; 34use Fisharebest\Webtrees\PlaceLocation; 35use Fisharebest\Webtrees\Registry; 36use Fisharebest\Webtrees\Repository; 37use Fisharebest\Webtrees\Soundex; 38use Fisharebest\Webtrees\Source; 39use Fisharebest\Webtrees\Submission; 40use Fisharebest\Webtrees\Submitter; 41use Fisharebest\Webtrees\Tree; 42use Illuminate\Database\Query\JoinClause; 43 44use function array_chunk; 45use function array_intersect_key; 46use function array_map; 47use function array_unique; 48use function date; 49use function explode; 50use function max; 51use function mb_substr; 52use function preg_match; 53use function preg_match_all; 54use function preg_replace; 55use function round; 56use function str_contains; 57use function str_replace; 58use function str_starts_with; 59use function strlen; 60use function strtoupper; 61use function strtr; 62use function substr; 63use function trim; 64 65use const PREG_SET_ORDER; 66 67/** 68 * Class GedcomImportService - import GEDCOM data 69 */ 70class GedcomImportService 71{ 72 /** 73 * Tidy up a gedcom record on import, so that we can access it consistently/efficiently. 74 * 75 * @param string $rec 76 * @param Tree $tree 77 * 78 * @return string 79 */ 80 private function reformatRecord(string $rec, Tree $tree): string 81 { 82 $gedcom_service = Registry::container()->get(GedcomService::class); 83 84 // Strip out mac/msdos line endings 85 $rec = preg_replace("/[\r\n]+/", "\n", $rec); 86 87 // Extract lines from the record; lines consist of: level + optional xref + tag + optional data 88 $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER); 89 90 // Process the record line-by-line 91 $newrec = ''; 92 foreach ($matches as $n => $match) { 93 [, $level, $xref, $tag, $data] = $match; 94 95 $tag = $gedcom_service->canonicalTag($tag); 96 97 switch ($tag) { 98 case 'DATE': 99 // Preserve text from INT dates 100 if (str_contains($data, '(')) { 101 [$date, $text] = explode('(', $data, 2); 102 $text = ' (' . $text; 103 } else { 104 $date = $data; 105 $text = ''; 106 } 107 // Capitals 108 $date = strtoupper($date); 109 // Temporarily add leading/trailing spaces, to allow efficient matching below 110 $date = ' ' . $date . ' '; 111 // Ensure space digits and letters 112 $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date); 113 $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date); 114 // Ensure space before/after calendar escapes 115 $date = preg_replace('/@#[^@]+@/', ' $0 ', $date); 116 // "BET." => "BET" 117 $date = preg_replace('/(\w\w)\./', '$1', $date); 118 // "CIR" => "ABT" 119 $date = str_replace(' CIR ', ' ABT ', $date); 120 $date = str_replace(' APX ', ' ABT ', $date); 121 // B.C. => BC (temporarily, to allow easier handling of ".") 122 $date = str_replace(' B.C. ', ' BC ', $date); 123 // TMG uses "EITHER X OR Y" 124 $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date); 125 // "BET X - Y " => "BET X AND Y" 126 $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date); 127 $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date); 128 // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y" 129 $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date); 130 $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date); 131 // "@#ESC@ AFT X" => "AFT @#ESC@ X" 132 $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date); 133 // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900" 134 // (don't change "/" - it is used in NS/OS dates) 135 $date = preg_replace('/[.,:;-]/', ' ', $date); 136 // BC => B.C. 137 $date = str_replace(' BC ', ' B.C. ', $date); 138 // Append the "INT" text 139 $data = $date . $text; 140 break; 141 case 'HEAD': 142 case 'TRLR': 143 // HEAD and TRLR records do not have an XREF or DATA 144 if ($level === '0') { 145 $xref = ''; 146 $data = ''; 147 } 148 break; 149 case 'NAME': 150 // Tidy up non-printing characters 151 $data = preg_replace('/ +/', ' ', trim($data)); 152 break; 153 case 'PLAC': 154 // Consistent commas 155 $data = preg_replace('/ *[,,،] */u', ', ', $data); 156 // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W 157 if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match) === 1) { 158 $degns = (int) $match[2]; 159 $minns = (int) $match[3]; 160 $secns = (int) $match[4]; 161 $degew = (int) $match[6]; 162 $minew = (int) $match[7]; 163 $secew = (int) $match[8]; 164 $data = 165 $match[1] . "\n" . 166 (1 + (int) $level) . " MAP\n" . 167 (2 + (int) $level) . ' LATI ' . ($match[5] . round($degns + $minns / 60 + $secns / 3600, 4)) . "\n" . 168 (2 + (int) $level) . ' LONG ' . ($match[9] . round($degew + $minew / 60 + $secew / 3600, 4)); 169 } 170 break; 171 case 'SEX': 172 $data = strtoupper($data); 173 break; 174 } 175 // Suppress "Y", for facts/events with a DATE or PLAC 176 if ($data === 'y') { 177 $data = 'Y'; 178 } 179 if ($level === '1' && $data === 'Y') { 180 for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) { 181 if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') { 182 $data = ''; 183 break; 184 } 185 } 186 } 187 // Reassemble components back into a single line 188 switch ($tag) { 189 default: 190 // Remove tabs and multiple/leading/trailing spaces 191 $data = strtr($data, ["\t" => ' ']); 192 $data = trim($data, ' '); 193 while (str_contains($data, ' ')) { 194 $data = strtr($data, [' ' => ' ']); 195 } 196 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 197 break; 198 case 'NOTE': 199 case 'TEXT': 200 case 'DATA': 201 case 'CONT': 202 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 203 break; 204 case 'FILE': 205 // Strip off the user-defined path prefix 206 $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH'); 207 if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) { 208 $data = substr($data, strlen($GEDCOM_MEDIA_PATH)); 209 } 210 // convert backslashes in filenames to forward slashes 211 $data = preg_replace("/\\\\/", '/', $data); 212 213 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 214 break; 215 case 'CONC': 216 // Merge CONC lines, to simplify access later on. 217 $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data; 218 break; 219 } 220 } 221 222 return $newrec; 223 } 224 225 /** 226 * import record into database 227 * this function will parse the given gedcom record and add it to the database 228 * 229 * @param string $gedrec the raw gedcom record to parse 230 * @param Tree $tree import the record into this tree 231 * @param bool $update whether this is an updated record that has been accepted 232 * 233 * @return void 234 * @throws GedcomErrorException 235 */ 236 public function importRecord(string $gedrec, Tree $tree, bool $update): void 237 { 238 $tree_id = $tree->id(); 239 240 // Escaped @ signs (only if importing from file) 241 if (!$update) { 242 $gedrec = str_replace('@@', '@', $gedrec); 243 } 244 245 // Standardise gedcom format 246 $gedrec = $this->reformatRecord($gedrec, $tree); 247 248 // import different types of records 249 if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 250 [, $xref, $type] = $match; 251 } elseif (str_starts_with($gedrec, '0 HEAD')) { 252 $type = 'HEAD'; 253 $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF. 254 } elseif (str_starts_with($gedrec, '0 TRLR')) { 255 $tree->setPreference('imported', '1'); 256 $type = 'TRLR'; 257 $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF. 258 } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) { 259 $this->importLegacyPlacDefn($gedrec); 260 261 return; 262 } elseif (str_starts_with($gedrec, '0 _PLAC ')) { 263 $this->importTNGPlac($gedrec); 264 265 return; 266 } else { 267 foreach (Gedcom::CUSTOM_RECORDS_WITHOUT_XREFS as $record_type) { 268 if (preg_match('/^0 ' . $record_type . '\b/', $gedrec) === 1) { 269 return; 270 } 271 } 272 273 throw new GedcomErrorException($gedrec); 274 } 275 276 // Add a _UID 277 if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { 278 $element = Registry::elementFactory()->make($type . ':_UID'); 279 if (!$element instanceof UnknownElement) { 280 $gedrec .= "\n1 _UID " . $element->default($tree); 281 } 282 } 283 284 // If the user has downloaded their GEDCOM data (containing media objects) and edited it 285 // using an application which does not support (and deletes) media objects, then add them 286 // back in. 287 if ($tree->getPreference('keep_media') === '1') { 288 $old_linked_media = DB::table('link') 289 ->where('l_from', '=', $xref) 290 ->where('l_file', '=', $tree_id) 291 ->where('l_type', '=', 'OBJE') 292 ->pluck('l_to'); 293 294 // Delete these links - so that we do not insert them again in updateLinks() 295 DB::table('link') 296 ->where('l_from', '=', $xref) 297 ->where('l_file', '=', $tree_id) 298 ->where('l_type', '=', 'OBJE') 299 ->delete(); 300 301 foreach ($old_linked_media as $media_id) { 302 $gedrec .= "\n1 OBJE @" . $media_id . '@'; 303 } 304 } 305 306 // Convert inline media into media objects 307 $gedrec = $this->convertInlineMedia($tree, $gedrec); 308 309 switch ($type) { 310 case Individual::RECORD_TYPE: 311 $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); 312 313 if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { 314 $rin = $match[1]; 315 } else { 316 $rin = $xref; 317 } 318 319 // The database can only store MFU, and many of the stats queries assume this. 320 $sex = $record->sex(); 321 $sex = $sex === 'M' || $sex === 'F' ? $sex : 'U'; 322 323 DB::table('individuals')->insert([ 324 'i_id' => $xref, 325 'i_file' => $tree_id, 326 'i_rin' => $rin, 327 'i_sex' => $sex, 328 'i_gedcom' => $gedrec, 329 ]); 330 331 // Update the cross-reference/index tables. 332 $this->updatePlaces($xref, $tree, $gedrec); 333 $this->updateDates($xref, $tree_id, $gedrec); 334 $this->updateNames($xref, $tree_id, $record); 335 break; 336 337 case Family::RECORD_TYPE: 338 if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 339 $husb = $match[1]; 340 } else { 341 $husb = ''; 342 } 343 if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 344 $wife = $match[1]; 345 } else { 346 $wife = ''; 347 } 348 $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); 349 if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { 350 $nchi = max($nchi, $match[1]); 351 } 352 353 DB::table('families')->insert([ 354 'f_id' => $xref, 355 'f_file' => $tree_id, 356 'f_husb' => $husb, 357 'f_wife' => $wife, 358 'f_gedcom' => $gedrec, 359 'f_numchil' => $nchi, 360 ]); 361 362 // Update the cross-reference/index tables. 363 $this->updatePlaces($xref, $tree, $gedrec); 364 $this->updateDates($xref, $tree_id, $gedrec); 365 break; 366 367 case Source::RECORD_TYPE: 368 if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { 369 $name = $match[1]; 370 } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { 371 $name = $match[1]; 372 } else { 373 $name = $xref; 374 } 375 376 DB::table('sources')->insert([ 377 's_id' => $xref, 378 's_file' => $tree_id, 379 's_name' => mb_substr($name, 0, 255), 380 's_gedcom' => $gedrec, 381 ]); 382 break; 383 384 case Repository::RECORD_TYPE: 385 case Note::RECORD_TYPE: 386 case Submission::RECORD_TYPE: 387 case Submitter::RECORD_TYPE: 388 case Location::RECORD_TYPE: 389 DB::table('other')->insert([ 390 'o_id' => $xref, 391 'o_file' => $tree_id, 392 'o_type' => $type, 393 'o_gedcom' => $gedrec, 394 ]); 395 break; 396 397 case Header::RECORD_TYPE: 398 // Force HEAD records to have a creation date. 399 if (!str_contains($gedrec, "\n1 DATE ")) { 400 $today = strtoupper(date('d M Y')); 401 $gedrec .= "\n1 DATE " . $today; 402 } 403 404 DB::table('other')->insert([ 405 'o_id' => $xref, 406 'o_file' => $tree_id, 407 'o_type' => Header::RECORD_TYPE, 408 'o_gedcom' => $gedrec, 409 ]); 410 break; 411 412 413 case Media::RECORD_TYPE: 414 $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); 415 416 DB::table('media')->insert([ 417 'm_id' => $xref, 418 'm_file' => $tree_id, 419 'm_gedcom' => $gedrec, 420 ]); 421 422 foreach ($record->mediaFiles() as $media_file) { 423 DB::table('media_file')->insert([ 424 'm_id' => $xref, 425 'm_file' => $tree_id, 426 'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), 427 'multimedia_format' => mb_substr($media_file->format(), 0, 4), 428 'source_media_type' => mb_substr($media_file->type(), 0, 15), 429 'descriptive_title' => mb_substr($media_file->title(), 0, 248), 430 ]); 431 } 432 break; 433 434 default: // Custom record types. 435 DB::table('other')->insert([ 436 'o_id' => $xref, 437 'o_file' => $tree_id, 438 'o_type' => mb_substr($type, 0, 15), 439 'o_gedcom' => $gedrec, 440 ]); 441 break; 442 } 443 444 // Update the cross-reference/index tables. 445 $this->updateLinks($xref, $tree_id, $gedrec); 446 } 447 448 /** 449 * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values 450 * 451 * @param string $gedcom 452 */ 453 private function importLegacyPlacDefn(string $gedcom): void 454 { 455 $gedcom_service = new GedcomService(); 456 457 if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { 458 $place_name = $match[1]; 459 } else { 460 return; 461 } 462 463 if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { 464 $latitude = $gedcom_service->readLatitude($match[1]); 465 } else { 466 return; 467 } 468 469 if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { 470 $longitude = $gedcom_service->readLongitude($match[1]); 471 } else { 472 return; 473 } 474 475 $location = new PlaceLocation($place_name); 476 477 if ($location->latitude() === null && $location->longitude() === null) { 478 DB::table('place_location') 479 ->where('id', '=', $location->id()) 480 ->update([ 481 'latitude' => $latitude, 482 'longitude' => $longitude, 483 ]); 484 } 485 } 486 487 /** 488 * Legacy Family Tree software generates _PLAC records containing LAT/LONG values 489 * 490 * @param string $gedcom 491 */ 492 private function importTNGPlac(string $gedcom): void 493 { 494 if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { 495 $place_name = $match[1]; 496 } else { 497 return; 498 } 499 500 if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { 501 $latitude = (float) $match[1]; 502 } else { 503 return; 504 } 505 506 if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { 507 $longitude = (float) $match[1]; 508 } else { 509 return; 510 } 511 512 $location = new PlaceLocation($place_name); 513 514 if ($location->latitude() === null && $location->longitude() === null) { 515 DB::table('place_location') 516 ->where('id', '=', $location->id()) 517 ->update([ 518 'latitude' => $latitude, 519 'longitude' => $longitude, 520 ]); 521 } 522 } 523 524 /** 525 * Extract all level 2 places from the given record and insert them into the places table 526 * 527 * @param string $xref 528 * @param Tree $tree 529 * @param string $gedrec 530 * 531 * @return void 532 */ 533 public function updatePlaces(string $xref, Tree $tree, string $gedrec): void 534 { 535 // Insert all new rows together 536 $rows = []; 537 538 preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); 539 540 $places = array_unique($matches[1]); 541 542 foreach ($places as $place_name) { 543 $place = new Place($place_name, $tree); 544 545 // Calling Place::id() will create the entry in the database, if it doesn't already exist. 546 while ($place->id() !== 0) { 547 $rows[] = [ 548 'pl_p_id' => $place->id(), 549 'pl_gid' => $xref, 550 'pl_file' => $tree->id(), 551 ]; 552 553 $place = $place->parent(); 554 } 555 } 556 557 // array_unique doesn't work with arrays of arrays 558 $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 559 560 // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. 561 foreach (array_chunk($rows, 20000) as $chunk) { 562 DB::table('placelinks')->insert($chunk); 563 } 564 } 565 566 /** 567 * Extract all the dates from the given record and insert them into the database. 568 * 569 * @param string $xref 570 * @param int $ged_id 571 * @param string $gedrec 572 * 573 * @return void 574 */ 575 private function updateDates(string $xref, int $ged_id, string $gedrec): void 576 { 577 // Insert all new rows together 578 $rows = []; 579 580 preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER); 581 582 foreach ($matches as $match) { 583 $fact = $match[1]; 584 $date = new Date($match[2]); 585 $rows[] = [ 586 'd_day' => $date->minimumDate()->day, 587 'd_month' => $date->minimumDate()->format('%O'), 588 'd_mon' => $date->minimumDate()->month, 589 'd_year' => $date->minimumDate()->year, 590 'd_julianday1' => $date->minimumDate()->minimumJulianDay(), 591 'd_julianday2' => $date->minimumDate()->maximumJulianDay(), 592 'd_fact' => $fact, 593 'd_gid' => $xref, 594 'd_file' => $ged_id, 595 'd_type' => $date->minimumDate()->format('%@'), 596 ]; 597 598 $rows[] = [ 599 'd_day' => $date->maximumDate()->day, 600 'd_month' => $date->maximumDate()->format('%O'), 601 'd_mon' => $date->maximumDate()->month, 602 'd_year' => $date->maximumDate()->year, 603 'd_julianday1' => $date->maximumDate()->minimumJulianDay(), 604 'd_julianday2' => $date->maximumDate()->maximumJulianDay(), 605 'd_fact' => $fact, 606 'd_gid' => $xref, 607 'd_file' => $ged_id, 608 'd_type' => $date->minimumDate()->format('%@'), 609 ]; 610 } 611 612 // array_unique doesn't work with arrays of arrays 613 $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 614 615 DB::table('dates')->insert($rows); 616 } 617 618 /** 619 * Extract all the links from the given record and insert them into the database 620 * 621 * @param string $xref 622 * @param int $ged_id 623 * @param string $gedrec 624 * 625 * @return void 626 */ 627 private function updateLinks(string $xref, int $ged_id, string $gedrec): void 628 { 629 // Insert all new rows together 630 $rows = []; 631 632 preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER); 633 634 foreach ($matches as $match) { 635 // Some applications (e.g. GenoPro) create links longer than 15 characters. 636 $link = mb_substr($match[1], 0, 15); 637 638 // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@" 639 $rows[$link . strtoupper($match[2])] = [ 640 'l_from' => $xref, 641 'l_to' => $match[2], 642 'l_type' => $link, 643 'l_file' => $ged_id, 644 ]; 645 } 646 647 DB::table('link')->insert($rows); 648 } 649 650 /** 651 * Extract all the names from the given record and insert them into the database. 652 * 653 * @param string $xref 654 * @param int $ged_id 655 * @param Individual $record 656 * 657 * @return void 658 */ 659 private function updateNames(string $xref, int $ged_id, Individual $record): void 660 { 661 // Insert all new rows together 662 $rows = []; 663 664 foreach ($record->getAllNames() as $n => $name) { 665 if ($name['givn'] === Individual::PRAENOMEN_NESCIO) { 666 $soundex_givn_std = null; 667 $soundex_givn_dm = null; 668 } else { 669 $soundex_givn_std = Soundex::russell($name['givn']); 670 $soundex_givn_dm = Soundex::daitchMokotoff($name['givn']); 671 } 672 673 if ($name['surn'] === Individual::NOMEN_NESCIO) { 674 $soundex_surn_std = null; 675 $soundex_surn_dm = null; 676 } else { 677 $soundex_surn_std = Soundex::russell($name['surname']); 678 $soundex_surn_dm = Soundex::daitchMokotoff($name['surname']); 679 } 680 681 $rows[] = [ 682 'n_file' => $ged_id, 683 'n_id' => $xref, 684 'n_num' => $n, 685 'n_type' => $name['type'], 686 'n_sort' => mb_substr($name['sort'], 0, 255), 687 'n_full' => mb_substr($name['fullNN'], 0, 255), 688 'n_surname' => mb_substr($name['surname'], 0, 255), 689 'n_surn' => mb_substr($name['surn'], 0, 255), 690 'n_givn' => mb_substr($name['givn'], 0, 255), 691 'n_soundex_givn_std' => $soundex_givn_std, 692 'n_soundex_surn_std' => $soundex_surn_std, 693 'n_soundex_givn_dm' => $soundex_givn_dm, 694 'n_soundex_surn_dm' => $soundex_surn_dm, 695 ]; 696 } 697 698 DB::table('name')->insert($rows); 699 } 700 701 /** 702 * Extract inline media data, and convert to media objects. 703 * 704 * @param Tree $tree 705 * @param string $gedcom 706 * 707 * @return string 708 */ 709 private function convertInlineMedia(Tree $tree, string $gedcom): string 710 { 711 while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) { 712 $xref = $this->createMediaObject($match[0], $tree); 713 $gedcom = strtr($gedcom, [$match[0] => "\n1 OBJE @" . $xref . '@']); 714 } 715 while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) { 716 $xref = $this->createMediaObject($match[0], $tree); 717 $gedcom = strtr($gedcom, [$match[0] => "\n2 OBJE @" . $xref . '@']); 718 } 719 while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) { 720 $xref = $this->createMediaObject($match[0], $tree); 721 $gedcom = strtr($gedcom, [$match[0] => "\n3 OBJE @" . $xref . '@']); 722 } 723 724 return $gedcom; 725 } 726 727 /** 728 * Create a new media object, from inline media data. 729 * 730 * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL 731 * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL 732 * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI 733 * 734 * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE 735 * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL 736 * 737 * @param string $gedcom 738 * @param Tree $tree 739 * 740 * @return string 741 */ 742 private function createMediaObject(string $gedcom, Tree $tree): string 743 { 744 preg_match('/\n\d FILE (.+)/', $gedcom, $match); 745 $file = $match[1] ?? ''; 746 747 preg_match('/\n\d TITL (.+)/', $gedcom, $match); 748 $title = $match[1] ?? ''; 749 750 preg_match('/\n\d FORM (.+)/', $gedcom, $match); 751 $format = $match[1] ?? ''; 752 753 preg_match('/\n\d MEDI (.+)/', $gedcom, $match); 754 $media = $match[1] ?? ''; 755 756 preg_match('/\n\d _SCBK (.+)/', $gedcom, $match); 757 $scrapbook = $match[1] ?? ''; 758 759 preg_match('/\n\d _PRIM (.+)/', $gedcom, $match); 760 $primary = $match[1] ?? ''; 761 762 preg_match('/\n\d _TYPE (.+)/', $gedcom, $match); 763 if ($media === '') { 764 // Legacy uses _TYPE instead of MEDI 765 $media = $match[1] ?? ''; 766 $type = ''; 767 } else { 768 $type = $match[1] ?? ''; 769 } 770 771 preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches); 772 $notes = $matches[1] ?? []; 773 774 // Have we already created a media object with the same title/filename? 775 $xref = DB::table('media_file') 776 ->where('m_file', '=', $tree->id()) 777 ->where('descriptive_title', '=', mb_substr($title, 0, 248)) 778 ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248)) 779 ->value('m_id'); 780 781 if ($xref === null) { 782 $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE); 783 784 // convert to a media-object 785 $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file; 786 787 if ($format !== '') { 788 $gedcom .= "\n2 FORM " . $format; 789 790 if ($media !== '') { 791 $gedcom .= "\n3 TYPE " . $media; 792 } 793 } 794 795 if ($title !== '') { 796 $gedcom .= "\n2 TITL " . $title; 797 } 798 799 if ($scrapbook !== '') { 800 $gedcom .= "\n1 _SCBK " . $scrapbook; 801 } 802 803 if ($primary !== '') { 804 $gedcom .= "\n1 _PRIM " . $primary; 805 } 806 807 if ($type !== '') { 808 $gedcom .= "\n1 _TYPE " . $type; 809 } 810 811 foreach ($notes as $note) { 812 $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]); 813 } 814 815 DB::table('media')->insert([ 816 'm_id' => $xref, 817 'm_file' => $tree->id(), 818 'm_gedcom' => $gedcom, 819 ]); 820 821 DB::table('media_file')->insert([ 822 'm_id' => $xref, 823 'm_file' => $tree->id(), 824 'multimedia_file_refn' => mb_substr($file, 0, 248), 825 'multimedia_format' => mb_substr($format, 0, 4), 826 'source_media_type' => mb_substr($media, 0, 15), 827 'descriptive_title' => mb_substr($title, 0, 248), 828 ]); 829 } 830 831 return $xref; 832 } 833 834 /** 835 * update a record in the database 836 * 837 * @param string $gedrec 838 * @param Tree $tree 839 * @param bool $delete 840 * 841 * @return void 842 * @throws GedcomErrorException 843 */ 844 public function updateRecord(string $gedrec, Tree $tree, bool $delete): void 845 { 846 if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 847 [, $gid, $type] = $match; 848 } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) { 849 // The HEAD record has no XREF. Any others? 850 $gid = $match[1]; 851 $type = $match[1]; 852 } else { 853 throw new GedcomErrorException($gedrec); 854 } 855 856 // Place links 857 DB::table('placelinks') 858 ->where('pl_gid', '=', $gid) 859 ->where('pl_file', '=', $tree->id()) 860 ->delete(); 861 862 // Orphaned places. If we're deleting "Westminster, London, England", 863 // then we may also need to delete "London, England" and "England". 864 do { 865 $affected = DB::table('places') 866 ->leftJoin('placelinks', function (JoinClause $join): void { 867 $join 868 ->on('p_id', '=', 'pl_p_id') 869 ->on('p_file', '=', 'pl_file'); 870 }) 871 ->whereNull('pl_p_id') 872 ->delete(); 873 } while ($affected > 0); 874 875 DB::table('dates') 876 ->where('d_gid', '=', $gid) 877 ->where('d_file', '=', $tree->id()) 878 ->delete(); 879 880 DB::table('name') 881 ->where('n_id', '=', $gid) 882 ->where('n_file', '=', $tree->id()) 883 ->delete(); 884 885 DB::table('link') 886 ->where('l_from', '=', $gid) 887 ->where('l_file', '=', $tree->id()) 888 ->delete(); 889 890 switch ($type) { 891 case Individual::RECORD_TYPE: 892 DB::table('individuals') 893 ->where('i_id', '=', $gid) 894 ->where('i_file', '=', $tree->id()) 895 ->delete(); 896 break; 897 898 case Family::RECORD_TYPE: 899 DB::table('families') 900 ->where('f_id', '=', $gid) 901 ->where('f_file', '=', $tree->id()) 902 ->delete(); 903 break; 904 905 case Source::RECORD_TYPE: 906 DB::table('sources') 907 ->where('s_id', '=', $gid) 908 ->where('s_file', '=', $tree->id()) 909 ->delete(); 910 break; 911 912 case Media::RECORD_TYPE: 913 DB::table('media_file') 914 ->where('m_id', '=', $gid) 915 ->where('m_file', '=', $tree->id()) 916 ->delete(); 917 918 DB::table('media') 919 ->where('m_id', '=', $gid) 920 ->where('m_file', '=', $tree->id()) 921 ->delete(); 922 break; 923 924 default: 925 DB::table('other') 926 ->where('o_id', '=', $gid) 927 ->where('o_file', '=', $tree->id()) 928 ->delete(); 929 break; 930 } 931 932 if (!$delete) { 933 $this->importRecord($gedrec, $tree, true); 934 } 935 } 936} 937