1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Date; 23use Fisharebest\Webtrees\Elements\UnknownElement; 24use Fisharebest\Webtrees\Exceptions\GedcomErrorException; 25use Fisharebest\Webtrees\Family; 26use Fisharebest\Webtrees\Gedcom; 27use Fisharebest\Webtrees\Header; 28use Fisharebest\Webtrees\Individual; 29use Fisharebest\Webtrees\Location; 30use Fisharebest\Webtrees\Media; 31use Fisharebest\Webtrees\Note; 32use Fisharebest\Webtrees\Place; 33use Fisharebest\Webtrees\PlaceLocation; 34use Fisharebest\Webtrees\Registry; 35use Fisharebest\Webtrees\Repository; 36use Fisharebest\Webtrees\Soundex; 37use Fisharebest\Webtrees\Source; 38use Fisharebest\Webtrees\Submission; 39use Fisharebest\Webtrees\Submitter; 40use Fisharebest\Webtrees\Tree; 41use Illuminate\Database\Capsule\Manager as DB; 42use Illuminate\Database\Query\JoinClause; 43 44use function app; 45use function array_chunk; 46use function array_intersect_key; 47use function array_map; 48use function array_unique; 49use function assert; 50use function date; 51use function explode; 52use function max; 53use function mb_substr; 54use function preg_match; 55use function preg_match_all; 56use function preg_replace; 57use function round; 58use function str_contains; 59use function str_replace; 60use function str_starts_with; 61use function strlen; 62use function strtoupper; 63use function strtr; 64use function substr; 65use function trim; 66 67use const PREG_SET_ORDER; 68 69/** 70 * Class GedcomImportService - import GEDCOM data 71 */ 72class GedcomImportService 73{ 74 /** 75 * Tidy up a gedcom record on import, so that we can access it consistently/efficiently. 76 * 77 * @param string $rec 78 * @param Tree $tree 79 * 80 * @return string 81 */ 82 private function reformatRecord(string $rec, Tree $tree): string 83 { 84 $gedcom_service = app(GedcomService::class); 85 assert($gedcom_service instanceof GedcomService); 86 87 // Strip out mac/msdos line endings 88 $rec = preg_replace("/[\r\n]+/", "\n", $rec); 89 90 // Extract lines from the record; lines consist of: level + optional xref + tag + optional data 91 $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER); 92 93 // Process the record line-by-line 94 $newrec = ''; 95 foreach ($matches as $n => $match) { 96 [, $level, $xref, $tag, $data] = $match; 97 98 $tag = $gedcom_service->canonicalTag($tag); 99 100 switch ($tag) { 101 case 'DATE': 102 // Preserve text from INT dates 103 if (str_contains($data, '(')) { 104 [$date, $text] = explode('(', $data, 2); 105 $text = ' (' . $text; 106 } else { 107 $date = $data; 108 $text = ''; 109 } 110 // Capitals 111 $date = strtoupper($date); 112 // Temporarily add leading/trailing spaces, to allow efficient matching below 113 $date = ' ' . $date . ' '; 114 // Ensure space digits and letters 115 $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date); 116 $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date); 117 // Ensure space before/after calendar escapes 118 $date = preg_replace('/@#[^@]+@/', ' $0 ', $date); 119 // "BET." => "BET" 120 $date = preg_replace('/(\w\w)\./', '$1', $date); 121 // "CIR" => "ABT" 122 $date = str_replace(' CIR ', ' ABT ', $date); 123 $date = str_replace(' APX ', ' ABT ', $date); 124 // B.C. => BC (temporarily, to allow easier handling of ".") 125 $date = str_replace(' B.C. ', ' BC ', $date); 126 // TMG uses "EITHER X OR Y" 127 $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date); 128 // "BET X - Y " => "BET X AND Y" 129 $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date); 130 $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date); 131 // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y" 132 $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date); 133 $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date); 134 // "@#ESC@ AFT X" => "AFT @#ESC@ X" 135 $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date); 136 // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900" 137 // (don't change "/" - it is used in NS/OS dates) 138 $date = preg_replace('/[.,:;-]/', ' ', $date); 139 // BC => B.C. 140 $date = str_replace(' BC ', ' B.C. ', $date); 141 // Append the "INT" text 142 $data = $date . $text; 143 break; 144 case 'HEAD': 145 case 'TRLR': 146 // HEAD and TRLR records do not have an XREF or DATA 147 if ($level === '0') { 148 $xref = ''; 149 $data = ''; 150 } 151 break; 152 case 'NAME': 153 // Tidy up non-printing characters 154 $data = preg_replace('/ +/', ' ', trim($data)); 155 break; 156 case 'PLAC': 157 // Consistent commas 158 $data = preg_replace('/ *[,,،] */u', ', ', $data); 159 // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W 160 if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match) === 1) { 161 $degns = (int) $match[2]; 162 $minns = (int) $match[3]; 163 $secns = (int) $match[4]; 164 $degew = (int) $match[6]; 165 $minew = (int) $match[7]; 166 $secew = (int) $match[8]; 167 $data = 168 $match[1] . "\n" . 169 (1 + (int) $level) . " MAP\n" . 170 (2 + (int) $level) . ' LATI ' . ($match[5] . round($degns + $minns / 60 + $secns / 3600, 4)) . "\n" . 171 (2 + (int) $level) . ' LONG ' . ($match[9] . round($degew + $minew / 60 + $secew / 3600, 4)); 172 } 173 break; 174 case 'SEX': 175 $data = strtoupper($data); 176 break; 177 } 178 // Suppress "Y", for facts/events with a DATE or PLAC 179 if ($data === 'y') { 180 $data = 'Y'; 181 } 182 if ($level === '1' && $data === 'Y') { 183 for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) { 184 if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') { 185 $data = ''; 186 break; 187 } 188 } 189 } 190 // Reassemble components back into a single line 191 switch ($tag) { 192 default: 193 // Remove tabs and multiple/leading/trailing spaces 194 $data = strtr($data, ["\t" => ' ']); 195 $data = trim($data, ' '); 196 while (str_contains($data, ' ')) { 197 $data = strtr($data, [' ' => ' ']); 198 } 199 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 200 break; 201 case 'NOTE': 202 case 'TEXT': 203 case 'DATA': 204 case 'CONT': 205 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 206 break; 207 case 'FILE': 208 // Strip off the user-defined path prefix 209 $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH'); 210 if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) { 211 $data = substr($data, strlen($GEDCOM_MEDIA_PATH)); 212 } 213 // convert backslashes in filenames to forward slashes 214 $data = preg_replace("/\\\\/", '/', $data); 215 216 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 217 break; 218 case 'CONC': 219 // Merge CONC lines, to simplify access later on. 220 $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data; 221 break; 222 } 223 } 224 225 return $newrec; 226 } 227 228 /** 229 * import record into database 230 * this function will parse the given gedcom record and add it to the database 231 * 232 * @param string $gedrec the raw gedcom record to parse 233 * @param Tree $tree import the record into this tree 234 * @param bool $update whether this is an updated record that has been accepted 235 * 236 * @return void 237 * @throws GedcomErrorException 238 */ 239 public function importRecord(string $gedrec, Tree $tree, bool $update): void 240 { 241 $tree_id = $tree->id(); 242 243 // Escaped @ signs (only if importing from file) 244 if (!$update) { 245 $gedrec = str_replace('@@', '@', $gedrec); 246 } 247 248 // Standardise gedcom format 249 $gedrec = $this->reformatRecord($gedrec, $tree); 250 251 // import different types of records 252 if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 253 [, $xref, $type] = $match; 254 } elseif (str_starts_with($gedrec, '0 HEAD')) { 255 $type = 'HEAD'; 256 $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF. 257 } elseif (str_starts_with($gedrec, '0 TRLR')) { 258 $tree->setPreference('imported', '1'); 259 $type = 'TRLR'; 260 $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF. 261 } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) { 262 $this->importLegacyPlacDefn($gedrec); 263 264 return; 265 } elseif (str_starts_with($gedrec, '0 _PLAC ')) { 266 $this->importTNGPlac($gedrec); 267 268 return; 269 } else { 270 foreach (Gedcom::CUSTOM_RECORDS_WITHOUT_XREFS as $record_type) { 271 if (preg_match('/^0 ' . $record_type . '\b/', $gedrec) === 1) { 272 return; 273 } 274 } 275 276 throw new GedcomErrorException($gedrec); 277 } 278 279 // Add a _UID 280 if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { 281 $element = Registry::elementFactory()->make($type . ':_UID'); 282 if (!$element instanceof UnknownElement) { 283 $gedrec .= "\n1 _UID " . $element->default($tree); 284 } 285 } 286 287 // If the user has downloaded their GEDCOM data (containing media objects) and edited it 288 // using an application which does not support (and deletes) media objects, then add them 289 // back in. 290 if ($tree->getPreference('keep_media')) { 291 $old_linked_media = DB::table('link') 292 ->where('l_from', '=', $xref) 293 ->where('l_file', '=', $tree_id) 294 ->where('l_type', '=', 'OBJE') 295 ->pluck('l_to'); 296 297 // Delete these links - so that we do not insert them again in updateLinks() 298 DB::table('link') 299 ->where('l_from', '=', $xref) 300 ->where('l_file', '=', $tree_id) 301 ->where('l_type', '=', 'OBJE') 302 ->delete(); 303 304 foreach ($old_linked_media as $media_id) { 305 $gedrec .= "\n1 OBJE @" . $media_id . '@'; 306 } 307 } 308 309 // Convert inline media into media objects 310 $gedrec = $this->convertInlineMedia($tree, $gedrec); 311 312 switch ($type) { 313 case Individual::RECORD_TYPE: 314 $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); 315 316 if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { 317 $rin = $match[1]; 318 } else { 319 $rin = $xref; 320 } 321 322 // The database can only store MFU, and many of the stats queries assume this. 323 $sex = $record->sex(); 324 $sex = $sex === 'M' || $sex === 'F' ? $sex : 'U'; 325 326 DB::table('individuals')->insert([ 327 'i_id' => $xref, 328 'i_file' => $tree_id, 329 'i_rin' => $rin, 330 'i_sex' => $sex, 331 'i_gedcom' => $gedrec, 332 ]); 333 334 // Update the cross-reference/index tables. 335 $this->updatePlaces($xref, $tree, $gedrec); 336 $this->updateDates($xref, $tree_id, $gedrec); 337 $this->updateNames($xref, $tree_id, $record); 338 break; 339 340 case Family::RECORD_TYPE: 341 if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 342 $husb = $match[1]; 343 } else { 344 $husb = ''; 345 } 346 if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 347 $wife = $match[1]; 348 } else { 349 $wife = ''; 350 } 351 $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); 352 if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { 353 $nchi = max($nchi, $match[1]); 354 } 355 356 DB::table('families')->insert([ 357 'f_id' => $xref, 358 'f_file' => $tree_id, 359 'f_husb' => $husb, 360 'f_wife' => $wife, 361 'f_gedcom' => $gedrec, 362 'f_numchil' => $nchi, 363 ]); 364 365 // Update the cross-reference/index tables. 366 $this->updatePlaces($xref, $tree, $gedrec); 367 $this->updateDates($xref, $tree_id, $gedrec); 368 break; 369 370 case Source::RECORD_TYPE: 371 if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { 372 $name = $match[1]; 373 } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { 374 $name = $match[1]; 375 } else { 376 $name = $xref; 377 } 378 379 DB::table('sources')->insert([ 380 's_id' => $xref, 381 's_file' => $tree_id, 382 's_name' => mb_substr($name, 0, 255), 383 's_gedcom' => $gedrec, 384 ]); 385 break; 386 387 case Repository::RECORD_TYPE: 388 case Note::RECORD_TYPE: 389 case Submission::RECORD_TYPE: 390 case Submitter::RECORD_TYPE: 391 case Location::RECORD_TYPE: 392 DB::table('other')->insert([ 393 'o_id' => $xref, 394 'o_file' => $tree_id, 395 'o_type' => $type, 396 'o_gedcom' => $gedrec, 397 ]); 398 break; 399 400 case Header::RECORD_TYPE: 401 // Force HEAD records to have a creation date. 402 if (!str_contains($gedrec, "\n1 DATE ")) { 403 $today = strtoupper(date('d M Y')); 404 $gedrec .= "\n1 DATE " . $today; 405 } 406 407 DB::table('other')->insert([ 408 'o_id' => $xref, 409 'o_file' => $tree_id, 410 'o_type' => Header::RECORD_TYPE, 411 'o_gedcom' => $gedrec, 412 ]); 413 break; 414 415 416 case Media::RECORD_TYPE: 417 $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); 418 419 DB::table('media')->insert([ 420 'm_id' => $xref, 421 'm_file' => $tree_id, 422 'm_gedcom' => $gedrec, 423 ]); 424 425 foreach ($record->mediaFiles() as $media_file) { 426 DB::table('media_file')->insert([ 427 'm_id' => $xref, 428 'm_file' => $tree_id, 429 'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), 430 'multimedia_format' => mb_substr($media_file->format(), 0, 4), 431 'source_media_type' => mb_substr($media_file->type(), 0, 15), 432 'descriptive_title' => mb_substr($media_file->title(), 0, 248), 433 ]); 434 } 435 break; 436 437 default: // Custom record types. 438 DB::table('other')->insert([ 439 'o_id' => $xref, 440 'o_file' => $tree_id, 441 'o_type' => mb_substr($type, 0, 15), 442 'o_gedcom' => $gedrec, 443 ]); 444 break; 445 } 446 447 // Update the cross-reference/index tables. 448 $this->updateLinks($xref, $tree_id, $gedrec); 449 } 450 451 /** 452 * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values 453 * 454 * @param string $gedcom 455 */ 456 private function importLegacyPlacDefn(string $gedcom): void 457 { 458 $gedcom_service = new GedcomService(); 459 460 if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { 461 $place_name = $match[1]; 462 } else { 463 return; 464 } 465 466 if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { 467 $latitude = $gedcom_service->readLatitude($match[1]); 468 } else { 469 return; 470 } 471 472 if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { 473 $longitude = $gedcom_service->readLongitude($match[1]); 474 } else { 475 return; 476 } 477 478 $location = new PlaceLocation($place_name); 479 480 if ($location->latitude() === null && $location->longitude() === null) { 481 DB::table('place_location') 482 ->where('id', '=', $location->id()) 483 ->update([ 484 'latitude' => $latitude, 485 'longitude' => $longitude, 486 ]); 487 } 488 } 489 490 /** 491 * Legacy Family Tree software generates _PLAC records containing LAT/LONG values 492 * 493 * @param string $gedcom 494 */ 495 private function importTNGPlac(string $gedcom): void 496 { 497 if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { 498 $place_name = $match[1]; 499 } else { 500 return; 501 } 502 503 if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { 504 $latitude = (float) $match[1]; 505 } else { 506 return; 507 } 508 509 if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { 510 $longitude = (float) $match[1]; 511 } else { 512 return; 513 } 514 515 $location = new PlaceLocation($place_name); 516 517 if ($location->latitude() === null && $location->longitude() === null) { 518 DB::table('place_location') 519 ->where('id', '=', $location->id()) 520 ->update([ 521 'latitude' => $latitude, 522 'longitude' => $longitude, 523 ]); 524 } 525 } 526 527 /** 528 * Extract all level 2 places from the given record and insert them into the places table 529 * 530 * @param string $xref 531 * @param Tree $tree 532 * @param string $gedrec 533 * 534 * @return void 535 */ 536 public function updatePlaces(string $xref, Tree $tree, string $gedrec): void 537 { 538 // Insert all new rows together 539 $rows = []; 540 541 preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); 542 543 $places = array_unique($matches[1]); 544 545 foreach ($places as $place_name) { 546 $place = new Place($place_name, $tree); 547 548 // Calling Place::id() will create the entry in the database, if it doesn't already exist. 549 while ($place->id() !== 0) { 550 $rows[] = [ 551 'pl_p_id' => $place->id(), 552 'pl_gid' => $xref, 553 'pl_file' => $tree->id(), 554 ]; 555 556 $place = $place->parent(); 557 } 558 } 559 560 // array_unique doesn't work with arrays of arrays 561 $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 562 563 // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. 564 foreach (array_chunk($rows, 20000) as $chunk) { 565 DB::table('placelinks')->insert($chunk); 566 } 567 } 568 569 /** 570 * Extract all the dates from the given record and insert them into the database. 571 * 572 * @param string $xref 573 * @param int $ged_id 574 * @param string $gedrec 575 * 576 * @return void 577 */ 578 private function updateDates(string $xref, int $ged_id, string $gedrec): void 579 { 580 // Insert all new rows together 581 $rows = []; 582 583 preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER); 584 585 foreach ($matches as $match) { 586 $fact = $match[1]; 587 $date = new Date($match[2]); 588 $rows[] = [ 589 'd_day' => $date->minimumDate()->day, 590 'd_month' => $date->minimumDate()->format('%O'), 591 'd_mon' => $date->minimumDate()->month, 592 'd_year' => $date->minimumDate()->year, 593 'd_julianday1' => $date->minimumDate()->minimumJulianDay(), 594 'd_julianday2' => $date->minimumDate()->maximumJulianDay(), 595 'd_fact' => $fact, 596 'd_gid' => $xref, 597 'd_file' => $ged_id, 598 'd_type' => $date->minimumDate()->format('%@'), 599 ]; 600 601 $rows[] = [ 602 'd_day' => $date->maximumDate()->day, 603 'd_month' => $date->maximumDate()->format('%O'), 604 'd_mon' => $date->maximumDate()->month, 605 'd_year' => $date->maximumDate()->year, 606 'd_julianday1' => $date->maximumDate()->minimumJulianDay(), 607 'd_julianday2' => $date->maximumDate()->maximumJulianDay(), 608 'd_fact' => $fact, 609 'd_gid' => $xref, 610 'd_file' => $ged_id, 611 'd_type' => $date->minimumDate()->format('%@'), 612 ]; 613 } 614 615 // array_unique doesn't work with arrays of arrays 616 $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 617 618 DB::table('dates')->insert($rows); 619 } 620 621 /** 622 * Extract all the links from the given record and insert them into the database 623 * 624 * @param string $xref 625 * @param int $ged_id 626 * @param string $gedrec 627 * 628 * @return void 629 */ 630 private function updateLinks(string $xref, int $ged_id, string $gedrec): void 631 { 632 // Insert all new rows together 633 $rows = []; 634 635 preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER); 636 637 foreach ($matches as $match) { 638 // Some applications (e.g. GenoPro) create links longer than 15 characters. 639 $link = mb_substr($match[1], 0, 15); 640 641 // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@" 642 $rows[$link . strtoupper($match[2])] = [ 643 'l_from' => $xref, 644 'l_to' => $match[2], 645 'l_type' => $link, 646 'l_file' => $ged_id, 647 ]; 648 } 649 650 DB::table('link')->insert($rows); 651 } 652 653 /** 654 * Extract all the names from the given record and insert them into the database. 655 * 656 * @param string $xref 657 * @param int $ged_id 658 * @param Individual $record 659 * 660 * @return void 661 */ 662 private function updateNames(string $xref, int $ged_id, Individual $record): void 663 { 664 // Insert all new rows together 665 $rows = []; 666 667 foreach ($record->getAllNames() as $n => $name) { 668 if ($name['givn'] === Individual::PRAENOMEN_NESCIO) { 669 $soundex_givn_std = null; 670 $soundex_givn_dm = null; 671 } else { 672 $soundex_givn_std = Soundex::russell($name['givn']); 673 $soundex_givn_dm = Soundex::daitchMokotoff($name['givn']); 674 } 675 676 if ($name['surn'] === Individual::NOMEN_NESCIO) { 677 $soundex_surn_std = null; 678 $soundex_surn_dm = null; 679 } else { 680 $soundex_surn_std = Soundex::russell($name['surname']); 681 $soundex_surn_dm = Soundex::daitchMokotoff($name['surname']); 682 } 683 684 $rows[] = [ 685 'n_file' => $ged_id, 686 'n_id' => $xref, 687 'n_num' => $n, 688 'n_type' => $name['type'], 689 'n_sort' => mb_substr($name['sort'], 0, 255), 690 'n_full' => mb_substr($name['fullNN'], 0, 255), 691 'n_surname' => mb_substr($name['surname'], 0, 255), 692 'n_surn' => mb_substr($name['surn'], 0, 255), 693 'n_givn' => mb_substr($name['givn'], 0, 255), 694 'n_soundex_givn_std' => $soundex_givn_std, 695 'n_soundex_surn_std' => $soundex_surn_std, 696 'n_soundex_givn_dm' => $soundex_givn_dm, 697 'n_soundex_surn_dm' => $soundex_surn_dm, 698 ]; 699 } 700 701 DB::table('name')->insert($rows); 702 } 703 704 /** 705 * Extract inline media data, and convert to media objects. 706 * 707 * @param Tree $tree 708 * @param string $gedcom 709 * 710 * @return string 711 */ 712 private function convertInlineMedia(Tree $tree, string $gedcom): string 713 { 714 while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) { 715 $xref = $this->createMediaObject($match[0], $tree); 716 $gedcom = strtr($gedcom, [$match[0] => "\n1 OBJE @" . $xref . '@']); 717 } 718 while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) { 719 $xref = $this->createMediaObject($match[0], $tree); 720 $gedcom = strtr($gedcom, [$match[0] => "\n2 OBJE @" . $xref . '@']); 721 } 722 while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) { 723 $xref = $this->createMediaObject($match[0], $tree); 724 $gedcom = strtr($gedcom, [$match[0] => "\n3 OBJE @" . $xref . '@']); 725 } 726 727 return $gedcom; 728 } 729 730 /** 731 * Create a new media object, from inline media data. 732 * 733 * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL 734 * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL 735 * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI 736 * 737 * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE 738 * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL 739 * 740 * @param string $gedcom 741 * @param Tree $tree 742 * 743 * @return string 744 */ 745 private function createMediaObject(string $gedcom, Tree $tree): string 746 { 747 preg_match('/\n\d FILE (.+)/', $gedcom, $match); 748 $file = $match[1] ?? ''; 749 750 preg_match('/\n\d TITL (.+)/', $gedcom, $match); 751 $title = $match[1] ?? ''; 752 753 preg_match('/\n\d FORM (.+)/', $gedcom, $match); 754 $format = $match[1] ?? ''; 755 756 preg_match('/\n\d MEDI (.+)/', $gedcom, $match); 757 $media = $match[1] ?? ''; 758 759 preg_match('/\n\d _SCBK (.+)/', $gedcom, $match); 760 $scrapbook = $match[1] ?? ''; 761 762 preg_match('/\n\d _PRIM (.+)/', $gedcom, $match); 763 $primary = $match[1] ?? ''; 764 765 preg_match('/\n\d _TYPE (.+)/', $gedcom, $match); 766 if ($media === '') { 767 // Legacy uses _TYPE instead of MEDI 768 $media = $match[1] ?? ''; 769 $type = ''; 770 } else { 771 $type = $match[1] ?? ''; 772 } 773 774 preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches); 775 $notes = $matches[1] ?? []; 776 777 // Have we already created a media object with the same title/filename? 778 $xref = DB::table('media_file') 779 ->where('m_file', '=', $tree->id()) 780 ->where('descriptive_title', '=', mb_substr($title, 0, 248)) 781 ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248)) 782 ->value('m_id'); 783 784 if ($xref === null) { 785 $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE); 786 787 // convert to a media-object 788 $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file; 789 790 if ($format !== '') { 791 $gedcom .= "\n2 FORM " . $format; 792 793 if ($media !== '') { 794 $gedcom .= "\n3 TYPE " . $media; 795 } 796 } 797 798 if ($title !== '') { 799 $gedcom .= "\n2 TITL " . $title; 800 } 801 802 if ($scrapbook !== '') { 803 $gedcom .= "\n1 _SCBK " . $scrapbook; 804 } 805 806 if ($primary !== '') { 807 $gedcom .= "\n1 _PRIM " . $primary; 808 } 809 810 if ($type !== '') { 811 $gedcom .= "\n1 _TYPE " . $type; 812 } 813 814 foreach ($notes as $note) { 815 $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]); 816 } 817 818 DB::table('media')->insert([ 819 'm_id' => $xref, 820 'm_file' => $tree->id(), 821 'm_gedcom' => $gedcom, 822 ]); 823 824 DB::table('media_file')->insert([ 825 'm_id' => $xref, 826 'm_file' => $tree->id(), 827 'multimedia_file_refn' => mb_substr($file, 0, 248), 828 'multimedia_format' => mb_substr($format, 0, 4), 829 'source_media_type' => mb_substr($media, 0, 15), 830 'descriptive_title' => mb_substr($title, 0, 248), 831 ]); 832 } 833 834 return $xref; 835 } 836 837 /** 838 * update a record in the database 839 * 840 * @param string $gedrec 841 * @param Tree $tree 842 * @param bool $delete 843 * 844 * @return void 845 * @throws GedcomErrorException 846 */ 847 public function updateRecord(string $gedrec, Tree $tree, bool $delete): void 848 { 849 if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 850 [, $gid, $type] = $match; 851 } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) { 852 // The HEAD record has no XREF. Any others? 853 $gid = $match[1]; 854 $type = $match[1]; 855 } else { 856 throw new GedcomErrorException($gedrec); 857 } 858 859 // Place links 860 DB::table('placelinks') 861 ->where('pl_gid', '=', $gid) 862 ->where('pl_file', '=', $tree->id()) 863 ->delete(); 864 865 // Orphaned places. If we're deleting "Westminster, London, England", 866 // then we may also need to delete "London, England" and "England". 867 do { 868 $affected = DB::table('places') 869 ->leftJoin('placelinks', function (JoinClause $join): void { 870 $join 871 ->on('p_id', '=', 'pl_p_id') 872 ->on('p_file', '=', 'pl_file'); 873 }) 874 ->whereNull('pl_p_id') 875 ->delete(); 876 } while ($affected > 0); 877 878 DB::table('dates') 879 ->where('d_gid', '=', $gid) 880 ->where('d_file', '=', $tree->id()) 881 ->delete(); 882 883 DB::table('name') 884 ->where('n_id', '=', $gid) 885 ->where('n_file', '=', $tree->id()) 886 ->delete(); 887 888 DB::table('link') 889 ->where('l_from', '=', $gid) 890 ->where('l_file', '=', $tree->id()) 891 ->delete(); 892 893 switch ($type) { 894 case Individual::RECORD_TYPE: 895 DB::table('individuals') 896 ->where('i_id', '=', $gid) 897 ->where('i_file', '=', $tree->id()) 898 ->delete(); 899 break; 900 901 case Family::RECORD_TYPE: 902 DB::table('families') 903 ->where('f_id', '=', $gid) 904 ->where('f_file', '=', $tree->id()) 905 ->delete(); 906 break; 907 908 case Source::RECORD_TYPE: 909 DB::table('sources') 910 ->where('s_id', '=', $gid) 911 ->where('s_file', '=', $tree->id()) 912 ->delete(); 913 break; 914 915 case Media::RECORD_TYPE: 916 DB::table('media_file') 917 ->where('m_id', '=', $gid) 918 ->where('m_file', '=', $tree->id()) 919 ->delete(); 920 921 DB::table('media') 922 ->where('m_id', '=', $gid) 923 ->where('m_file', '=', $tree->id()) 924 ->delete(); 925 break; 926 927 default: 928 DB::table('other') 929 ->where('o_id', '=', $gid) 930 ->where('o_file', '=', $tree->id()) 931 ->delete(); 932 break; 933 } 934 935 if (!$delete) { 936 $this->importRecord($gedrec, $tree, true); 937 } 938 } 939} 940