1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Date; 23use Fisharebest\Webtrees\Elements\UnknownElement; 24use Fisharebest\Webtrees\Exceptions\GedcomErrorException; 25use Fisharebest\Webtrees\Family; 26use Fisharebest\Webtrees\Gedcom; 27use Fisharebest\Webtrees\Header; 28use Fisharebest\Webtrees\Individual; 29use Fisharebest\Webtrees\Location; 30use Fisharebest\Webtrees\Media; 31use Fisharebest\Webtrees\Note; 32use Fisharebest\Webtrees\Place; 33use Fisharebest\Webtrees\PlaceLocation; 34use Fisharebest\Webtrees\Registry; 35use Fisharebest\Webtrees\Repository; 36use Fisharebest\Webtrees\Soundex; 37use Fisharebest\Webtrees\Source; 38use Fisharebest\Webtrees\Submission; 39use Fisharebest\Webtrees\Submitter; 40use Fisharebest\Webtrees\Tree; 41use Illuminate\Database\Capsule\Manager as DB; 42use Illuminate\Database\Query\JoinClause; 43 44use function app; 45use function array_chunk; 46use function array_intersect_key; 47use function array_map; 48use function array_unique; 49use function assert; 50use function date; 51use function explode; 52use function max; 53use function mb_substr; 54use function preg_match; 55use function preg_match_all; 56use function preg_replace; 57use function round; 58use function str_contains; 59use function str_replace; 60use function str_starts_with; 61use function strlen; 62use function strtolower; 63use function strtoupper; 64use function strtr; 65use function substr; 66use function trim; 67 68use const PREG_SET_ORDER; 69 70/** 71 * Class GedcomImportService - import GEDCOM data 72 */ 73class GedcomImportService 74{ 75 /** 76 * Tidy up a gedcom record on import, so that we can access it consistently/efficiently. 77 * 78 * @param string $rec 79 * @param Tree $tree 80 * 81 * @return string 82 */ 83 private function reformatRecord(string $rec, Tree $tree): string 84 { 85 $gedcom_service = app(GedcomService::class); 86 assert($gedcom_service instanceof GedcomService); 87 88 // Strip out mac/msdos line endings 89 $rec = preg_replace("/[\r\n]+/", "\n", $rec); 90 91 // Extract lines from the record; lines consist of: level + optional xref + tag + optional data 92 $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER); 93 94 // Process the record line-by-line 95 $newrec = ''; 96 foreach ($matches as $n => $match) { 97 [, $level, $xref, $tag, $data] = $match; 98 99 $tag = $gedcom_service->canonicalTag($tag); 100 101 switch ($tag) { 102 case 'DATE': 103 // Preserve text from INT dates 104 if (str_contains($data, '(')) { 105 [$date, $text] = explode('(', $data, 2); 106 $text = ' (' . $text; 107 } else { 108 $date = $data; 109 $text = ''; 110 } 111 // Capitals 112 $date = strtoupper($date); 113 // Temporarily add leading/trailing spaces, to allow efficient matching below 114 $date = ' ' . $date . ' '; 115 // Ensure space digits and letters 116 $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date); 117 $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date); 118 // Ensure space before/after calendar escapes 119 $date = preg_replace('/@#[^@]+@/', ' $0 ', $date); 120 // "BET." => "BET" 121 $date = preg_replace('/(\w\w)\./', '$1', $date); 122 // "CIR" => "ABT" 123 $date = str_replace(' CIR ', ' ABT ', $date); 124 $date = str_replace(' APX ', ' ABT ', $date); 125 // B.C. => BC (temporarily, to allow easier handling of ".") 126 $date = str_replace(' B.C. ', ' BC ', $date); 127 // TMG uses "EITHER X OR Y" 128 $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date); 129 // "BET X - Y " => "BET X AND Y" 130 $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date); 131 $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date); 132 // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y" 133 $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date); 134 $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date); 135 // "@#ESC@ AFT X" => "AFT @#ESC@ X" 136 $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date); 137 // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900" 138 // (don't change "/" - it is used in NS/OS dates) 139 $date = preg_replace('/[.,:;-]/', ' ', $date); 140 // BC => B.C. 141 $date = str_replace(' BC ', ' B.C. ', $date); 142 // Append the "INT" text 143 $data = $date . $text; 144 break; 145 case 'FORM': 146 // Consistent commas 147 $data = preg_replace('/ *, */', ', ', $data); 148 break; 149 case 'HEAD': 150 // HEAD records don't have an XREF or DATA 151 if ($level === '0') { 152 $xref = ''; 153 $data = ''; 154 } 155 break; 156 case 'NAME': 157 // Tidy up non-printing characters 158 $data = preg_replace('/ +/', ' ', trim($data)); 159 break; 160 case 'PLAC': 161 // Consistent commas 162 $data = preg_replace('/ *[,,،] */u', ', ', $data); 163 // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W 164 if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match)) { 165 $data = 166 $match[1] . "\n" . 167 ($level + 1) . " MAP\n" . 168 ($level + 2) . ' LATI ' . ($match[5] . round($match[2] + ($match[3] / 60) + ($match[4] / 3600), 4)) . "\n" . 169 ($level + 2) . ' LONG ' . ($match[9] . round($match[6] + ($match[7] / 60) + ($match[8] / 3600), 4)); 170 } 171 break; 172 case 'RESN': 173 // RESN values are lower case (confidential, privacy, locked, none) 174 $data = strtolower($data); 175 if ($data === 'invisible') { 176 $data = 'confidential'; // From old versions of Legacy. 177 } 178 break; 179 case 'SEX': 180 $data = strtoupper($data); 181 break; 182 case 'TRLR': 183 // TRLR records don't have an XREF or DATA 184 if ($level === '0') { 185 $xref = ''; 186 $data = ''; 187 } 188 break; 189 } 190 // Suppress "Y", for facts/events with a DATE or PLAC 191 if ($data === 'y') { 192 $data = 'Y'; 193 } 194 if ($level === '1' && $data === 'Y') { 195 for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) { 196 if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') { 197 $data = ''; 198 break; 199 } 200 } 201 } 202 // Reassemble components back into a single line 203 switch ($tag) { 204 default: 205 // Remove tabs and multiple/leading/trailing spaces 206 $data = strtr($data, ["\t" => ' ']); 207 $data = trim($data, ' '); 208 while (str_contains($data, ' ')) { 209 $data = strtr($data, [' ' => ' ']); 210 } 211 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 212 break; 213 case 'NOTE': 214 case 'TEXT': 215 case 'DATA': 216 case 'CONT': 217 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 218 break; 219 case 'FILE': 220 // Strip off the user-defined path prefix 221 $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH'); 222 if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) { 223 $data = substr($data, strlen($GEDCOM_MEDIA_PATH)); 224 } 225 // convert backslashes in filenames to forward slashes 226 $data = preg_replace("/\\\\/", '/', $data); 227 228 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 229 break; 230 case 'CONC': 231 // Merge CONC lines, to simplify access later on. 232 $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data; 233 break; 234 } 235 } 236 237 return $newrec; 238 } 239 240 /** 241 * import record into database 242 * this function will parse the given gedcom record and add it to the database 243 * 244 * @param string $gedrec the raw gedcom record to parse 245 * @param Tree $tree import the record into this tree 246 * @param bool $update whether this is an updated record that has been accepted 247 * 248 * @return void 249 * @throws GedcomErrorException 250 */ 251 public function importRecord(string $gedrec, Tree $tree, bool $update): void 252 { 253 $tree_id = $tree->id(); 254 255 // Escaped @ signs (only if importing from file) 256 if (!$update) { 257 $gedrec = str_replace('@@', '@', $gedrec); 258 } 259 260 // Standardise gedcom format 261 $gedrec = $this->reformatRecord($gedrec, $tree); 262 263 // import different types of records 264 if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 265 [, $xref, $type] = $match; 266 } elseif (str_starts_with($gedrec, '0 HEAD')) { 267 $type = 'HEAD'; 268 $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF. 269 } elseif (str_starts_with($gedrec, '0 TRLR')) { 270 $tree->setPreference('imported', '1'); 271 $type = 'TRLR'; 272 $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF. 273 } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) { 274 $this->importLegacyPlacDefn($gedrec); 275 276 return; 277 } elseif (str_starts_with($gedrec, '0 _PLAC ')) { 278 $this->importTNGPlac($gedrec); 279 280 return; 281 } else { 282 foreach (Gedcom::CUSTOM_RECORDS_WITHOUT_XREFS as $record_type) { 283 if (preg_match('/^0 ' . $record_type . '\b/', $gedrec) === 1) { 284 return; 285 } 286 } 287 288 throw new GedcomErrorException($gedrec); 289 } 290 291 // Add a _UID 292 if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { 293 $element = Registry::elementFactory()->make($type . ':_UID'); 294 if (!$element instanceof UnknownElement) { 295 $gedrec .= "\n1 _UID " . $element->default($tree); 296 } 297 } 298 299 // If the user has downloaded their GEDCOM data (containing media objects) and edited it 300 // using an application which does not support (and deletes) media objects, then add them 301 // back in. 302 if ($tree->getPreference('keep_media')) { 303 $old_linked_media = DB::table('link') 304 ->where('l_from', '=', $xref) 305 ->where('l_file', '=', $tree_id) 306 ->where('l_type', '=', 'OBJE') 307 ->pluck('l_to'); 308 309 // Delete these links - so that we do not insert them again in updateLinks() 310 DB::table('link') 311 ->where('l_from', '=', $xref) 312 ->where('l_file', '=', $tree_id) 313 ->where('l_type', '=', 'OBJE') 314 ->delete(); 315 316 foreach ($old_linked_media as $media_id) { 317 $gedrec .= "\n1 OBJE @" . $media_id . '@'; 318 } 319 } 320 321 // Convert inline media into media objects 322 $gedrec = $this->convertInlineMedia($tree, $gedrec); 323 324 switch ($type) { 325 case Individual::RECORD_TYPE: 326 $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); 327 328 if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { 329 $rin = $match[1]; 330 } else { 331 $rin = $xref; 332 } 333 334 // The database can only store MFU, and many of the stats queries assume this. 335 $sex = $record->sex(); 336 $sex = $sex === 'M' || $sex === 'F' ? $sex : 'U'; 337 338 DB::table('individuals')->insert([ 339 'i_id' => $xref, 340 'i_file' => $tree_id, 341 'i_rin' => $rin, 342 'i_sex' => $sex, 343 'i_gedcom' => $gedrec, 344 ]); 345 346 // Update the cross-reference/index tables. 347 $this->updatePlaces($xref, $tree, $gedrec); 348 $this->updateDates($xref, $tree_id, $gedrec); 349 $this->updateNames($xref, $tree_id, $record); 350 break; 351 352 case Family::RECORD_TYPE: 353 if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 354 $husb = $match[1]; 355 } else { 356 $husb = ''; 357 } 358 if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 359 $wife = $match[1]; 360 } else { 361 $wife = ''; 362 } 363 $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); 364 if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { 365 $nchi = max($nchi, $match[1]); 366 } 367 368 DB::table('families')->insert([ 369 'f_id' => $xref, 370 'f_file' => $tree_id, 371 'f_husb' => $husb, 372 'f_wife' => $wife, 373 'f_gedcom' => $gedrec, 374 'f_numchil' => $nchi, 375 ]); 376 377 // Update the cross-reference/index tables. 378 $this->updatePlaces($xref, $tree, $gedrec); 379 $this->updateDates($xref, $tree_id, $gedrec); 380 break; 381 382 case Source::RECORD_TYPE: 383 if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { 384 $name = $match[1]; 385 } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { 386 $name = $match[1]; 387 } else { 388 $name = $xref; 389 } 390 391 DB::table('sources')->insert([ 392 's_id' => $xref, 393 's_file' => $tree_id, 394 's_name' => mb_substr($name, 0, 255), 395 's_gedcom' => $gedrec, 396 ]); 397 break; 398 399 case Repository::RECORD_TYPE: 400 case Note::RECORD_TYPE: 401 case Submission::RECORD_TYPE: 402 case Submitter::RECORD_TYPE: 403 case Location::RECORD_TYPE: 404 DB::table('other')->insert([ 405 'o_id' => $xref, 406 'o_file' => $tree_id, 407 'o_type' => $type, 408 'o_gedcom' => $gedrec, 409 ]); 410 break; 411 412 case Header::RECORD_TYPE: 413 // Force HEAD records to have a creation date. 414 if (!str_contains($gedrec, "\n1 DATE ")) { 415 $today = strtoupper(date('d M Y')); 416 $gedrec .= "\n1 DATE " . $today; 417 } 418 419 DB::table('other')->insert([ 420 'o_id' => $xref, 421 'o_file' => $tree_id, 422 'o_type' => Header::RECORD_TYPE, 423 'o_gedcom' => $gedrec, 424 ]); 425 break; 426 427 428 case Media::RECORD_TYPE: 429 $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); 430 431 DB::table('media')->insert([ 432 'm_id' => $xref, 433 'm_file' => $tree_id, 434 'm_gedcom' => $gedrec, 435 ]); 436 437 foreach ($record->mediaFiles() as $media_file) { 438 DB::table('media_file')->insert([ 439 'm_id' => $xref, 440 'm_file' => $tree_id, 441 'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), 442 'multimedia_format' => mb_substr($media_file->format(), 0, 4), 443 'source_media_type' => mb_substr($media_file->type(), 0, 15), 444 'descriptive_title' => mb_substr($media_file->title(), 0, 248), 445 ]); 446 } 447 break; 448 449 default: // Custom record types. 450 DB::table('other')->insert([ 451 'o_id' => $xref, 452 'o_file' => $tree_id, 453 'o_type' => mb_substr($type, 0, 15), 454 'o_gedcom' => $gedrec, 455 ]); 456 break; 457 } 458 459 // Update the cross-reference/index tables. 460 $this->updateLinks($xref, $tree_id, $gedrec); 461 } 462 463 /** 464 * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values 465 * 466 * @param string $gedcom 467 */ 468 private function importLegacyPlacDefn(string $gedcom): void 469 { 470 $gedcom_service = new GedcomService(); 471 472 if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { 473 $place_name = $match[1]; 474 } else { 475 return; 476 } 477 478 if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { 479 $latitude = $gedcom_service->readLatitude($match[1]); 480 } else { 481 return; 482 } 483 484 if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { 485 $longitude = $gedcom_service->readLongitude($match[1]); 486 } else { 487 return; 488 } 489 490 $location = new PlaceLocation($place_name); 491 492 if ($location->latitude() === null && $location->longitude() === null) { 493 DB::table('place_location') 494 ->where('id', '=', $location->id()) 495 ->update([ 496 'latitude' => $latitude, 497 'longitude' => $longitude, 498 ]); 499 } 500 } 501 502 /** 503 * Legacy Family Tree software generates _PLAC records containing LAT/LONG values 504 * 505 * @param string $gedcom 506 */ 507 private function importTNGPlac(string $gedcom): void 508 { 509 if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { 510 $place_name = $match[1]; 511 } else { 512 return; 513 } 514 515 if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { 516 $latitude = (float) $match[1]; 517 } else { 518 return; 519 } 520 521 if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { 522 $longitude = (float) $match[1]; 523 } else { 524 return; 525 } 526 527 $location = new PlaceLocation($place_name); 528 529 if ($location->latitude() === null && $location->longitude() === null) { 530 DB::table('place_location') 531 ->where('id', '=', $location->id()) 532 ->update([ 533 'latitude' => $latitude, 534 'longitude' => $longitude, 535 ]); 536 } 537 } 538 539 /** 540 * Extract all level 2 places from the given record and insert them into the places table 541 * 542 * @param string $xref 543 * @param Tree $tree 544 * @param string $gedrec 545 * 546 * @return void 547 */ 548 public function updatePlaces(string $xref, Tree $tree, string $gedrec): void 549 { 550 // Insert all new rows together 551 $rows = []; 552 553 preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); 554 555 $places = array_unique($matches[1]); 556 557 foreach ($places as $place_name) { 558 $place = new Place($place_name, $tree); 559 560 // Calling Place::id() will create the entry in the database, if it doesn't already exist. 561 while ($place->id() !== 0) { 562 $rows[] = [ 563 'pl_p_id' => $place->id(), 564 'pl_gid' => $xref, 565 'pl_file' => $tree->id(), 566 ]; 567 568 $place = $place->parent(); 569 } 570 } 571 572 // array_unique doesn't work with arrays of arrays 573 $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 574 575 // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. 576 foreach (array_chunk($rows, 20000) as $chunk) { 577 DB::table('placelinks')->insert($chunk); 578 } 579 } 580 581 /** 582 * Extract all the dates from the given record and insert them into the database. 583 * 584 * @param string $xref 585 * @param int $ged_id 586 * @param string $gedrec 587 * 588 * @return void 589 */ 590 private function updateDates(string $xref, int $ged_id, string $gedrec): void 591 { 592 // Insert all new rows together 593 $rows = []; 594 595 preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER); 596 597 foreach ($matches as $match) { 598 $fact = $match[1]; 599 $date = new Date($match[2]); 600 $rows[] = [ 601 'd_day' => $date->minimumDate()->day, 602 'd_month' => $date->minimumDate()->format('%O'), 603 'd_mon' => $date->minimumDate()->month, 604 'd_year' => $date->minimumDate()->year, 605 'd_julianday1' => $date->minimumDate()->minimumJulianDay(), 606 'd_julianday2' => $date->minimumDate()->maximumJulianDay(), 607 'd_fact' => $fact, 608 'd_gid' => $xref, 609 'd_file' => $ged_id, 610 'd_type' => $date->minimumDate()->format('%@'), 611 ]; 612 613 $rows[] = [ 614 'd_day' => $date->maximumDate()->day, 615 'd_month' => $date->maximumDate()->format('%O'), 616 'd_mon' => $date->maximumDate()->month, 617 'd_year' => $date->maximumDate()->year, 618 'd_julianday1' => $date->maximumDate()->minimumJulianDay(), 619 'd_julianday2' => $date->maximumDate()->maximumJulianDay(), 620 'd_fact' => $fact, 621 'd_gid' => $xref, 622 'd_file' => $ged_id, 623 'd_type' => $date->minimumDate()->format('%@'), 624 ]; 625 } 626 627 // array_unique doesn't work with arrays of arrays 628 $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 629 630 DB::table('dates')->insert($rows); 631 } 632 633 /** 634 * Extract all the links from the given record and insert them into the database 635 * 636 * @param string $xref 637 * @param int $ged_id 638 * @param string $gedrec 639 * 640 * @return void 641 */ 642 private function updateLinks(string $xref, int $ged_id, string $gedrec): void 643 { 644 // Insert all new rows together 645 $rows = []; 646 647 preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER); 648 649 foreach ($matches as $match) { 650 // Some applications (e.g. GenoPro) create links longer than 15 characters. 651 $link = mb_substr($match[1], 0, 15); 652 653 // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@" 654 $rows[$link . strtoupper($match[2])] = [ 655 'l_from' => $xref, 656 'l_to' => $match[2], 657 'l_type' => $link, 658 'l_file' => $ged_id, 659 ]; 660 } 661 662 DB::table('link')->insert($rows); 663 } 664 665 /** 666 * Extract all the names from the given record and insert them into the database. 667 * 668 * @param string $xref 669 * @param int $ged_id 670 * @param Individual $record 671 * 672 * @return void 673 */ 674 private function updateNames(string $xref, int $ged_id, Individual $record): void 675 { 676 // Insert all new rows together 677 $rows = []; 678 679 foreach ($record->getAllNames() as $n => $name) { 680 if ($name['givn'] === Individual::PRAENOMEN_NESCIO) { 681 $soundex_givn_std = null; 682 $soundex_givn_dm = null; 683 } else { 684 $soundex_givn_std = Soundex::russell($name['givn']); 685 $soundex_givn_dm = Soundex::daitchMokotoff($name['givn']); 686 } 687 688 if ($name['surn'] === Individual::NOMEN_NESCIO) { 689 $soundex_surn_std = null; 690 $soundex_surn_dm = null; 691 } else { 692 $soundex_surn_std = Soundex::russell($name['surname']); 693 $soundex_surn_dm = Soundex::daitchMokotoff($name['surname']); 694 } 695 696 $rows[] = [ 697 'n_file' => $ged_id, 698 'n_id' => $xref, 699 'n_num' => $n, 700 'n_type' => $name['type'], 701 'n_sort' => mb_substr($name['sort'], 0, 255), 702 'n_full' => mb_substr($name['fullNN'], 0, 255), 703 'n_surname' => mb_substr($name['surname'], 0, 255), 704 'n_surn' => mb_substr($name['surn'], 0, 255), 705 'n_givn' => mb_substr($name['givn'], 0, 255), 706 'n_soundex_givn_std' => $soundex_givn_std, 707 'n_soundex_surn_std' => $soundex_surn_std, 708 'n_soundex_givn_dm' => $soundex_givn_dm, 709 'n_soundex_surn_dm' => $soundex_surn_dm, 710 ]; 711 } 712 713 DB::table('name')->insert($rows); 714 } 715 716 /** 717 * Extract inline media data, and convert to media objects. 718 * 719 * @param Tree $tree 720 * @param string $gedcom 721 * 722 * @return string 723 */ 724 private function convertInlineMedia(Tree $tree, string $gedcom): string 725 { 726 while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) { 727 $xref = $this->createMediaObject($match[0], $tree); 728 $gedcom = strtr($gedcom, [$match[0] => "\n1 OBJE @" . $xref . '@']); 729 } 730 while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) { 731 $xref = $this->createMediaObject($match[0], $tree); 732 $gedcom = strtr($gedcom, [$match[0] => "\n2 OBJE @" . $xref . '@']); 733 } 734 while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) { 735 $xref = $this->createMediaObject($match[0], $tree); 736 $gedcom = strtr($gedcom, [$match[0] => "\n3 OBJE @" . $xref . '@']); 737 } 738 739 return $gedcom; 740 } 741 742 /** 743 * Create a new media object, from inline media data. 744 * 745 * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL 746 * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL 747 * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI 748 * 749 * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE 750 * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL 751 * 752 * @param string $gedcom 753 * @param Tree $tree 754 * 755 * @return string 756 */ 757 private function createMediaObject(string $gedcom, Tree $tree): string 758 { 759 preg_match('/\n\d FILE (.+)/', $gedcom, $match); 760 $file = $match[1] ?? ''; 761 762 preg_match('/\n\d TITL (.+)/', $gedcom, $match); 763 $title = $match[1] ?? ''; 764 765 preg_match('/\n\d FORM (.+)/', $gedcom, $match); 766 $format = $match[1] ?? ''; 767 768 preg_match('/\n\d MEDI (.+)/', $gedcom, $match); 769 $media = $match[1] ?? ''; 770 771 preg_match('/\n\d _SCBK (.+)/', $gedcom, $match); 772 $scrapbook = $match[1] ?? ''; 773 774 preg_match('/\n\d _PRIM (.+)/', $gedcom, $match); 775 $primary = $match[1] ?? ''; 776 777 preg_match('/\n\d _TYPE (.+)/', $gedcom, $match); 778 if ($media === '') { 779 // Legacy uses _TYPE instead of MEDI 780 $media = $match[1] ?? ''; 781 $type = ''; 782 } else { 783 $type = $match[1] ?? ''; 784 } 785 786 preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches); 787 $notes = $matches[1] ?? []; 788 789 // Have we already created a media object with the same title/filename? 790 $xref = DB::table('media_file') 791 ->where('m_file', '=', $tree->id()) 792 ->where('descriptive_title', '=', mb_substr($title, 0, 248)) 793 ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248)) 794 ->value('m_id'); 795 796 if ($xref === null) { 797 $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE); 798 799 // convert to a media-object 800 $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file; 801 802 if ($format !== '') { 803 $gedcom .= "\n2 FORM " . $format; 804 805 if ($media !== '') { 806 $gedcom .= "\n3 TYPE " . $media; 807 } 808 } 809 810 if ($title !== '') { 811 $gedcom .= "\n3 TITL " . $title; 812 } 813 814 if ($scrapbook !== '') { 815 $gedcom .= "\n1 _SCBK " . $scrapbook; 816 } 817 818 if ($primary !== '') { 819 $gedcom .= "\n1 _PRIM " . $primary; 820 } 821 822 if ($type !== '') { 823 $gedcom .= "\n1 _TYPE " . $type; 824 } 825 826 foreach ($notes as $note) { 827 $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]); 828 } 829 830 DB::table('media')->insert([ 831 'm_id' => $xref, 832 'm_file' => $tree->id(), 833 'm_gedcom' => $gedcom, 834 ]); 835 836 DB::table('media_file')->insert([ 837 'm_id' => $xref, 838 'm_file' => $tree->id(), 839 'multimedia_file_refn' => mb_substr($file, 0, 248), 840 'multimedia_format' => mb_substr($format, 0, 4), 841 'source_media_type' => mb_substr($media, 0, 15), 842 'descriptive_title' => mb_substr($title, 0, 248), 843 ]); 844 } 845 846 return $xref; 847 } 848 849 /** 850 * update a record in the database 851 * 852 * @param string $gedrec 853 * @param Tree $tree 854 * @param bool $delete 855 * 856 * @return void 857 * @throws GedcomErrorException 858 */ 859 public function updateRecord(string $gedrec, Tree $tree, bool $delete): void 860 { 861 if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 862 [, $gid, $type] = $match; 863 } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) { 864 // The HEAD record has no XREF. Any others? 865 $gid = $match[1]; 866 $type = $match[1]; 867 } else { 868 throw new GedcomErrorException($gedrec); 869 } 870 871 // Place links 872 DB::table('placelinks') 873 ->where('pl_gid', '=', $gid) 874 ->where('pl_file', '=', $tree->id()) 875 ->delete(); 876 877 // Orphaned places. If we're deleting "Westminster, London, England", 878 // then we may also need to delete "London, England" and "England". 879 do { 880 $affected = DB::table('places') 881 ->leftJoin('placelinks', function (JoinClause $join): void { 882 $join 883 ->on('p_id', '=', 'pl_p_id') 884 ->on('p_file', '=', 'pl_file'); 885 }) 886 ->whereNull('pl_p_id') 887 ->delete(); 888 } while ($affected > 0); 889 890 DB::table('dates') 891 ->where('d_gid', '=', $gid) 892 ->where('d_file', '=', $tree->id()) 893 ->delete(); 894 895 DB::table('name') 896 ->where('n_id', '=', $gid) 897 ->where('n_file', '=', $tree->id()) 898 ->delete(); 899 900 DB::table('link') 901 ->where('l_from', '=', $gid) 902 ->where('l_file', '=', $tree->id()) 903 ->delete(); 904 905 switch ($type) { 906 case Individual::RECORD_TYPE: 907 DB::table('individuals') 908 ->where('i_id', '=', $gid) 909 ->where('i_file', '=', $tree->id()) 910 ->delete(); 911 break; 912 913 case Family::RECORD_TYPE: 914 DB::table('families') 915 ->where('f_id', '=', $gid) 916 ->where('f_file', '=', $tree->id()) 917 ->delete(); 918 break; 919 920 case Source::RECORD_TYPE: 921 DB::table('sources') 922 ->where('s_id', '=', $gid) 923 ->where('s_file', '=', $tree->id()) 924 ->delete(); 925 break; 926 927 case Media::RECORD_TYPE: 928 DB::table('media_file') 929 ->where('m_id', '=', $gid) 930 ->where('m_file', '=', $tree->id()) 931 ->delete(); 932 933 DB::table('media') 934 ->where('m_id', '=', $gid) 935 ->where('m_file', '=', $tree->id()) 936 ->delete(); 937 break; 938 939 default: 940 DB::table('other') 941 ->where('o_id', '=', $gid) 942 ->where('o_file', '=', $tree->id()) 943 ->delete(); 944 break; 945 } 946 947 if (!$delete) { 948 $this->importRecord($gedrec, $tree, true); 949 } 950 } 951} 952