1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Services; 21 22use Fisharebest\Webtrees\Date; 23use Fisharebest\Webtrees\Elements\UnknownElement; 24use Fisharebest\Webtrees\Exceptions\GedcomErrorException; 25use Fisharebest\Webtrees\Family; 26use Fisharebest\Webtrees\Gedcom; 27use Fisharebest\Webtrees\Header; 28use Fisharebest\Webtrees\Individual; 29use Fisharebest\Webtrees\Location; 30use Fisharebest\Webtrees\Media; 31use Fisharebest\Webtrees\Note; 32use Fisharebest\Webtrees\Place; 33use Fisharebest\Webtrees\PlaceLocation; 34use Fisharebest\Webtrees\Registry; 35use Fisharebest\Webtrees\Repository; 36use Fisharebest\Webtrees\Soundex; 37use Fisharebest\Webtrees\Source; 38use Fisharebest\Webtrees\Submission; 39use Fisharebest\Webtrees\Submitter; 40use Fisharebest\Webtrees\Tree; 41use Illuminate\Database\Capsule\Manager as DB; 42use Illuminate\Database\Query\JoinClause; 43 44use function app; 45use function array_chunk; 46use function array_intersect_key; 47use function array_map; 48use function array_unique; 49use function assert; 50use function date; 51use function explode; 52use function max; 53use function mb_substr; 54use function preg_match; 55use function preg_match_all; 56use function preg_replace; 57use function round; 58use function str_contains; 59use function str_replace; 60use function str_starts_with; 61use function strlen; 62use function strtolower; 63use function strtoupper; 64use function strtr; 65use function substr; 66use function trim; 67 68use const PREG_SET_ORDER; 69 70/** 71 * Class GedcomImportService - import GEDCOM data 72 */ 73class GedcomImportService 74{ 75 /** 76 * Tidy up a gedcom record on import, so that we can access it consistently/efficiently. 77 * 78 * @param string $rec 79 * @param Tree $tree 80 * 81 * @return string 82 */ 83 private function reformatRecord(string $rec, Tree $tree): string 84 { 85 $gedcom_service = app(GedcomService::class); 86 assert($gedcom_service instanceof GedcomService); 87 88 // Strip out mac/msdos line endings 89 $rec = preg_replace("/[\r\n]+/", "\n", $rec); 90 91 // Extract lines from the record; lines consist of: level + optional xref + tag + optional data 92 $num_matches = preg_match_all('/^[ \t]*(\d+)[ \t]*(@[^@]*@)?[ \t]*(\w+)[ \t]?(.*)$/m', $rec, $matches, PREG_SET_ORDER); 93 94 // Process the record line-by-line 95 $newrec = ''; 96 foreach ($matches as $n => $match) { 97 [, $level, $xref, $tag, $data] = $match; 98 99 $tag = $gedcom_service->canonicalTag($tag); 100 101 switch ($tag) { 102 case 'AFN': 103 // AFN values are upper case 104 $data = strtoupper($data); 105 break; 106 case 'DATE': 107 // Preserve text from INT dates 108 if (str_contains($data, '(')) { 109 [$date, $text] = explode('(', $data, 2); 110 $text = ' (' . $text; 111 } else { 112 $date = $data; 113 $text = ''; 114 } 115 // Capitals 116 $date = strtoupper($date); 117 // Temporarily add leading/trailing spaces, to allow efficient matching below 118 $date = ' ' . $date . ' '; 119 // Ensure space digits and letters 120 $date = preg_replace('/([A-Z])(\d)/', '$1 $2', $date); 121 $date = preg_replace('/(\d)([A-Z])/', '$1 $2', $date); 122 // Ensure space before/after calendar escapes 123 $date = preg_replace('/@#[^@]+@/', ' $0 ', $date); 124 // "BET." => "BET" 125 $date = preg_replace('/(\w\w)\./', '$1', $date); 126 // "CIR" => "ABT" 127 $date = str_replace(' CIR ', ' ABT ', $date); 128 $date = str_replace(' APX ', ' ABT ', $date); 129 // B.C. => BC (temporarily, to allow easier handling of ".") 130 $date = str_replace(' B.C. ', ' BC ', $date); 131 // TMG uses "EITHER X OR Y" 132 $date = preg_replace('/^ EITHER (.+) OR (.+)/', ' BET $1 AND $2', $date); 133 // "BET X - Y " => "BET X AND Y" 134 $date = preg_replace('/^(.* BET .+) - (.+)/', '$1 AND $2', $date); 135 $date = preg_replace('/^(.* FROM .+) - (.+)/', '$1 TO $2', $date); 136 // "@#ESC@ FROM X TO Y" => "FROM @#ESC@ X TO @#ESC@ Y" 137 $date = preg_replace('/^ +(@#[^@]+@) +FROM +(.+) +TO +(.+)/', ' FROM $1 $2 TO $1 $3', $date); 138 $date = preg_replace('/^ +(@#[^@]+@) +BET +(.+) +AND +(.+)/', ' BET $1 $2 AND $1 $3', $date); 139 // "@#ESC@ AFT X" => "AFT @#ESC@ X" 140 $date = preg_replace('/^ +(@#[^@]+@) +(FROM|BET|TO|AND|BEF|AFT|CAL|EST|INT|ABT) +(.+)/', ' $2 $1 $3', $date); 141 // Ignore any remaining punctuation, e.g. "14-MAY, 1900" => "14 MAY 1900" 142 // (don't change "/" - it is used in NS/OS dates) 143 $date = preg_replace('/[.,:;-]/', ' ', $date); 144 // BC => B.C. 145 $date = str_replace(' BC ', ' B.C. ', $date); 146 // Append the "INT" text 147 $data = $date . $text; 148 break; 149 case '_FILE': 150 $tag = 'FILE'; 151 break; 152 case 'FORM': 153 // Consistent commas 154 $data = preg_replace('/ *, */', ', ', $data); 155 break; 156 case 'HEAD': 157 // HEAD records don't have an XREF or DATA 158 if ($level === '0') { 159 $xref = ''; 160 $data = ''; 161 } 162 break; 163 case 'NAME': 164 // Tidy up non-printing characters 165 $data = preg_replace('/ +/', ' ', trim($data)); 166 break; 167 case 'PEDI': 168 // PEDI values are lower case 169 $data = strtolower($data); 170 break; 171 case 'PLAC': 172 // Consistent commas 173 $data = preg_replace('/ *[,,،] */u', ', ', $data); 174 // The Master Genealogist stores LAT/LONG data in the PLAC field, e.g. Pennsylvania, USA, 395945N0751013W 175 if (preg_match('/(.*), (\d\d)(\d\d)(\d\d)([NS])(\d\d\d)(\d\d)(\d\d)([EW])$/', $data, $match)) { 176 $data = 177 $match[1] . "\n" . 178 ($level + 1) . " MAP\n" . 179 ($level + 2) . ' LATI ' . ($match[5] . round($match[2] + ($match[3] / 60) + ($match[4] / 3600), 4)) . "\n" . 180 ($level + 2) . ' LONG ' . ($match[9] . round($match[6] + ($match[7] / 60) + ($match[8] / 3600), 4)); 181 } 182 break; 183 case 'RESN': 184 // RESN values are lower case (confidential, privacy, locked, none) 185 $data = strtolower($data); 186 if ($data === 'invisible') { 187 $data = 'confidential'; // From old versions of Legacy. 188 } 189 break; 190 case 'SEX': 191 $data = strtoupper($data); 192 break; 193 case 'STAT': 194 if ($data === 'CANCELLED') { 195 // PhpGedView mis-spells this tag - correct it. 196 $data = 'CANCELED'; 197 } 198 break; 199 case 'TEMP': 200 // Temple codes are upper case 201 $data = strtoupper($data); 202 break; 203 case 'TRLR': 204 // TRLR records don't have an XREF or DATA 205 if ($level === '0') { 206 $xref = ''; 207 $data = ''; 208 } 209 break; 210 } 211 // Suppress "Y", for facts/events with a DATE or PLAC 212 if ($data === 'y') { 213 $data = 'Y'; 214 } 215 if ($level === '1' && $data === 'Y') { 216 for ($i = $n + 1; $i < $num_matches - 1 && $matches[$i][1] !== '1'; ++$i) { 217 if ($matches[$i][3] === 'DATE' || $matches[$i][3] === 'PLAC') { 218 $data = ''; 219 break; 220 } 221 } 222 } 223 // Reassemble components back into a single line 224 switch ($tag) { 225 default: 226 // Remove tabs and multiple/leading/trailing spaces 227 $data = strtr($data, ["\t" => ' ']); 228 $data = trim($data, ' '); 229 while (str_contains($data, ' ')) { 230 $data = strtr($data, [' ' => ' ']); 231 } 232 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 233 break; 234 case 'NOTE': 235 case 'TEXT': 236 case 'DATA': 237 case 'CONT': 238 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 239 break; 240 case 'FILE': 241 // Strip off the user-defined path prefix 242 $GEDCOM_MEDIA_PATH = $tree->getPreference('GEDCOM_MEDIA_PATH'); 243 if ($GEDCOM_MEDIA_PATH !== '' && str_starts_with($data, $GEDCOM_MEDIA_PATH)) { 244 $data = substr($data, strlen($GEDCOM_MEDIA_PATH)); 245 } 246 // convert backslashes in filenames to forward slashes 247 $data = preg_replace("/\\\\/", '/', $data); 248 249 $newrec .= ($newrec ? "\n" : '') . $level . ' ' . ($level === '0' && $xref ? $xref . ' ' : '') . $tag . ($data === '' && $tag !== 'NOTE' ? '' : ' ' . $data); 250 break; 251 case 'CONC': 252 // Merge CONC lines, to simplify access later on. 253 $newrec .= ($tree->getPreference('WORD_WRAPPED_NOTES') ? ' ' : '') . $data; 254 break; 255 } 256 } 257 258 return $newrec; 259 } 260 261 /** 262 * import record into database 263 * this function will parse the given gedcom record and add it to the database 264 * 265 * @param string $gedrec the raw gedcom record to parse 266 * @param Tree $tree import the record into this tree 267 * @param bool $update whether this is an updated record that has been accepted 268 * 269 * @return void 270 * @throws GedcomErrorException 271 */ 272 public function importRecord(string $gedrec, Tree $tree, bool $update): void 273 { 274 $tree_id = $tree->id(); 275 276 // Escaped @ signs (only if importing from file) 277 if (!$update) { 278 $gedrec = str_replace('@@', '@', $gedrec); 279 } 280 281 // Standardise gedcom format 282 $gedrec = $this->reformatRecord($gedrec, $tree); 283 284 // import different types of records 285 if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 286 [, $xref, $type] = $match; 287 } elseif (str_starts_with($gedrec, '0 HEAD')) { 288 $type = 'HEAD'; 289 $xref = 'HEAD'; // For records without an XREF, use the type as a pseudo XREF. 290 } elseif (str_starts_with($gedrec, '0 TRLR')) { 291 $tree->setPreference('imported', '1'); 292 $type = 'TRLR'; 293 $xref = 'TRLR'; // For records without an XREF, use the type as a pseudo XREF. 294 } elseif (str_starts_with($gedrec, '0 _PLAC_DEFN')) { 295 $this->importLegacyPlacDefn($gedrec); 296 297 return; 298 } elseif (str_starts_with($gedrec, '0 _PLAC ')) { 299 $this->importTNGPlac($gedrec); 300 301 return; 302 } elseif (str_starts_with($gedrec, '0 _EVDEF')) { 303 // Created by RootsMagic. We cannot process these records without an XREF. 304 return; 305 } elseif (str_starts_with($gedrec, '0 _HASHTAG_DEFN')) { 306 // Created by Legacy. We cannot process these records without an XREF. 307 return; 308 } elseif (str_starts_with($gedrec, '0 _EVENT_DEFN')) { 309 // Created by PAF and Legacy. We cannot process these records without an XREF. 310 return; 311 } elseif (str_starts_with($gedrec, '0 _TODO')) { 312 // Created by Legacy. We cannot process these records without an XREF. 313 return; 314 } elseif (str_starts_with($gedrec, '0 PEDIGREELINK')) { 315 // Created by GenoPro. We cannot process these records without an XREF. 316 return; 317 } elseif (str_starts_with($gedrec, '0 GLOBAL')) { 318 // Created by GenoPro. We cannot process these records without an XREF. 319 return; 320 } elseif (str_starts_with($gedrec, '0 GENOMAP')) { 321 // Created by GenoPro. We cannot process these records without an XREF. 322 return; 323 } elseif (str_starts_with($gedrec, '0 EMOTIONALRELATIONSHIP')) { 324 // Created by GenoPro. We cannot process these records without an XREF. 325 return; 326 } elseif (str_starts_with($gedrec, '0 SOCIALRELATIONSHIP')) { 327 // Created by GenoPro. We cannot process these records without an XREF. 328 return; 329 } elseif (str_starts_with($gedrec, '0 LABEL')) { 330 // Created by GenoPro. We cannot process these records without an XREF. 331 return; 332 } else { 333 throw new GedcomErrorException($gedrec); 334 } 335 336 // Add a _UID 337 if ($tree->getPreference('GENERATE_UIDS') === '1' && !str_contains($gedrec, "\n1 _UID ")) { 338 $element = Registry::elementFactory()->make($type . ':_UID'); 339 if (!$element instanceof UnknownElement) { 340 $gedrec .= "\n1 _UID " . $element->default($tree); 341 } 342 } 343 344 // If the user has downloaded their GEDCOM data (containing media objects) and edited it 345 // using an application which does not support (and deletes) media objects, then add them 346 // back in. 347 if ($tree->getPreference('keep_media')) { 348 $old_linked_media = DB::table('link') 349 ->where('l_from', '=', $xref) 350 ->where('l_file', '=', $tree_id) 351 ->where('l_type', '=', 'OBJE') 352 ->pluck('l_to'); 353 354 // Delete these links - so that we do not insert them again in updateLinks() 355 DB::table('link') 356 ->where('l_from', '=', $xref) 357 ->where('l_file', '=', $tree_id) 358 ->where('l_type', '=', 'OBJE') 359 ->delete(); 360 361 foreach ($old_linked_media as $media_id) { 362 $gedrec .= "\n1 OBJE @" . $media_id . '@'; 363 } 364 } 365 366 // Convert inline media into media objects 367 $gedrec = $this->convertInlineMedia($tree, $gedrec); 368 369 switch ($type) { 370 case Individual::RECORD_TYPE: 371 $record = Registry::individualFactory()->new($xref, $gedrec, null, $tree); 372 373 if (preg_match('/\n1 RIN (.+)/', $gedrec, $match)) { 374 $rin = $match[1]; 375 } else { 376 $rin = $xref; 377 } 378 379 // The database can only store MFU, and many of the stats queries assume this. 380 $sex = $record->sex(); 381 $sex = $sex === 'M' || $sex === 'F' ? $sex : 'U'; 382 383 DB::table('individuals')->insert([ 384 'i_id' => $xref, 385 'i_file' => $tree_id, 386 'i_rin' => $rin, 387 'i_sex' => $sex, 388 'i_gedcom' => $gedrec, 389 ]); 390 391 // Update the cross-reference/index tables. 392 $this->updatePlaces($xref, $tree, $gedrec); 393 $this->updateDates($xref, $tree_id, $gedrec); 394 $this->updateNames($xref, $tree_id, $record); 395 break; 396 397 case Family::RECORD_TYPE: 398 if (preg_match('/\n1 HUSB @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 399 $husb = $match[1]; 400 } else { 401 $husb = ''; 402 } 403 if (preg_match('/\n1 WIFE @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match)) { 404 $wife = $match[1]; 405 } else { 406 $wife = ''; 407 } 408 $nchi = preg_match_all('/\n1 CHIL @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $match); 409 if (preg_match('/\n1 NCHI (\d+)/', $gedrec, $match)) { 410 $nchi = max($nchi, $match[1]); 411 } 412 413 DB::table('families')->insert([ 414 'f_id' => $xref, 415 'f_file' => $tree_id, 416 'f_husb' => $husb, 417 'f_wife' => $wife, 418 'f_gedcom' => $gedrec, 419 'f_numchil' => $nchi, 420 ]); 421 422 // Update the cross-reference/index tables. 423 $this->updatePlaces($xref, $tree, $gedrec); 424 $this->updateDates($xref, $tree_id, $gedrec); 425 break; 426 427 case Source::RECORD_TYPE: 428 if (preg_match('/\n1 TITL (.+)/', $gedrec, $match)) { 429 $name = $match[1]; 430 } elseif (preg_match('/\n1 ABBR (.+)/', $gedrec, $match)) { 431 $name = $match[1]; 432 } else { 433 $name = $xref; 434 } 435 436 DB::table('sources')->insert([ 437 's_id' => $xref, 438 's_file' => $tree_id, 439 's_name' => mb_substr($name, 0, 255), 440 's_gedcom' => $gedrec, 441 ]); 442 break; 443 444 case Repository::RECORD_TYPE: 445 case Note::RECORD_TYPE: 446 case Submission::RECORD_TYPE: 447 case Submitter::RECORD_TYPE: 448 case Location::RECORD_TYPE: 449 DB::table('other')->insert([ 450 'o_id' => $xref, 451 'o_file' => $tree_id, 452 'o_type' => $type, 453 'o_gedcom' => $gedrec, 454 ]); 455 break; 456 457 case Header::RECORD_TYPE: 458 // Force HEAD records to have a creation date. 459 if (!str_contains($gedrec, "\n1 DATE ")) { 460 $today = strtoupper(date('d M Y')); 461 $gedrec .= "\n1 DATE " . $today; 462 } 463 464 DB::table('other')->insert([ 465 'o_id' => $xref, 466 'o_file' => $tree_id, 467 'o_type' => Header::RECORD_TYPE, 468 'o_gedcom' => $gedrec, 469 ]); 470 break; 471 472 473 case Media::RECORD_TYPE: 474 $record = Registry::mediaFactory()->new($xref, $gedrec, null, $tree); 475 476 DB::table('media')->insert([ 477 'm_id' => $xref, 478 'm_file' => $tree_id, 479 'm_gedcom' => $gedrec, 480 ]); 481 482 foreach ($record->mediaFiles() as $media_file) { 483 DB::table('media_file')->insert([ 484 'm_id' => $xref, 485 'm_file' => $tree_id, 486 'multimedia_file_refn' => mb_substr($media_file->filename(), 0, 248), 487 'multimedia_format' => mb_substr($media_file->format(), 0, 4), 488 'source_media_type' => mb_substr($media_file->type(), 0, 15), 489 'descriptive_title' => mb_substr($media_file->title(), 0, 248), 490 ]); 491 } 492 break; 493 494 default: // Custom record types. 495 DB::table('other')->insert([ 496 'o_id' => $xref, 497 'o_file' => $tree_id, 498 'o_type' => mb_substr($type, 0, 15), 499 'o_gedcom' => $gedrec, 500 ]); 501 break; 502 } 503 504 // Update the cross-reference/index tables. 505 $this->updateLinks($xref, $tree_id, $gedrec); 506 } 507 508 /** 509 * Legacy Family Tree software generates _PLAC_DEFN records containing LAT/LONG values 510 * 511 * @param string $gedcom 512 */ 513 private function importLegacyPlacDefn(string $gedcom): void 514 { 515 $gedcom_service = new GedcomService(); 516 517 if (preg_match('/\n1 PLAC (.+)/', $gedcom, $match)) { 518 $place_name = $match[1]; 519 } else { 520 return; 521 } 522 523 if (preg_match('/\n3 LATI ([NS].+)/', $gedcom, $match)) { 524 $latitude = $gedcom_service->readLatitude($match[1]); 525 } else { 526 return; 527 } 528 529 if (preg_match('/\n3 LONG ([EW].+)/', $gedcom, $match)) { 530 $longitude = $gedcom_service->readLongitude($match[1]); 531 } else { 532 return; 533 } 534 535 $location = new PlaceLocation($place_name); 536 537 if ($location->latitude() === null && $location->longitude() === null) { 538 DB::table('place_location') 539 ->where('id', '=', $location->id()) 540 ->update([ 541 'latitude' => $latitude, 542 'longitude' => $longitude, 543 ]); 544 } 545 } 546 547 /** 548 * Legacy Family Tree software generates _PLAC records containing LAT/LONG values 549 * 550 * @param string $gedcom 551 */ 552 private function importTNGPlac(string $gedcom): void 553 { 554 if (preg_match('/^0 _PLAC (.+)/', $gedcom, $match)) { 555 $place_name = $match[1]; 556 } else { 557 return; 558 } 559 560 if (preg_match('/\n2 LATI (.+)/', $gedcom, $match)) { 561 $latitude = (float) $match[1]; 562 } else { 563 return; 564 } 565 566 if (preg_match('/\n2 LONG (.+)/', $gedcom, $match)) { 567 $longitude = (float) $match[1]; 568 } else { 569 return; 570 } 571 572 $location = new PlaceLocation($place_name); 573 574 if ($location->latitude() === null && $location->longitude() === null) { 575 DB::table('place_location') 576 ->where('id', '=', $location->id()) 577 ->update([ 578 'latitude' => $latitude, 579 'longitude' => $longitude, 580 ]); 581 } 582 } 583 584 /** 585 * Extract all level 2 places from the given record and insert them into the places table 586 * 587 * @param string $xref 588 * @param Tree $tree 589 * @param string $gedrec 590 * 591 * @return void 592 */ 593 public function updatePlaces(string $xref, Tree $tree, string $gedrec): void 594 { 595 // Insert all new rows together 596 $rows = []; 597 598 preg_match_all('/\n2 PLAC (.+)/', $gedrec, $matches); 599 600 $places = array_unique($matches[1]); 601 602 foreach ($places as $place_name) { 603 $place = new Place($place_name, $tree); 604 605 // Calling Place::id() will create the entry in the database, if it doesn't already exist. 606 while ($place->id() !== 0) { 607 $rows[] = [ 608 'pl_p_id' => $place->id(), 609 'pl_gid' => $xref, 610 'pl_file' => $tree->id(), 611 ]; 612 613 $place = $place->parent(); 614 } 615 } 616 617 // array_unique doesn't work with arrays of arrays 618 $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 619 620 // PDO has a limit of 65535 placeholders, and each row requires 3 placeholders. 621 foreach (array_chunk($rows, 20000) as $chunk) { 622 DB::table('placelinks')->insert($chunk); 623 } 624 } 625 626 /** 627 * Extract all the dates from the given record and insert them into the database. 628 * 629 * @param string $xref 630 * @param int $ged_id 631 * @param string $gedrec 632 * 633 * @return void 634 */ 635 private function updateDates(string $xref, int $ged_id, string $gedrec): void 636 { 637 // Insert all new rows together 638 $rows = []; 639 640 preg_match_all("/\n1 (\w+).*(?:\n[2-9].*)*\n2 DATE (.+)(?:\n[2-9].*)*/", $gedrec, $matches, PREG_SET_ORDER); 641 642 foreach ($matches as $match) { 643 $fact = $match[1]; 644 $date = new Date($match[2]); 645 $rows[] = [ 646 'd_day' => $date->minimumDate()->day, 647 'd_month' => $date->minimumDate()->format('%O'), 648 'd_mon' => $date->minimumDate()->month, 649 'd_year' => $date->minimumDate()->year, 650 'd_julianday1' => $date->minimumDate()->minimumJulianDay(), 651 'd_julianday2' => $date->minimumDate()->maximumJulianDay(), 652 'd_fact' => $fact, 653 'd_gid' => $xref, 654 'd_file' => $ged_id, 655 'd_type' => $date->minimumDate()->format('%@'), 656 ]; 657 658 $rows[] = [ 659 'd_day' => $date->maximumDate()->day, 660 'd_month' => $date->maximumDate()->format('%O'), 661 'd_mon' => $date->maximumDate()->month, 662 'd_year' => $date->maximumDate()->year, 663 'd_julianday1' => $date->maximumDate()->minimumJulianDay(), 664 'd_julianday2' => $date->maximumDate()->maximumJulianDay(), 665 'd_fact' => $fact, 666 'd_gid' => $xref, 667 'd_file' => $ged_id, 668 'd_type' => $date->minimumDate()->format('%@'), 669 ]; 670 } 671 672 // array_unique doesn't work with arrays of arrays 673 $rows = array_intersect_key($rows, array_unique(array_map('serialize', $rows))); 674 675 DB::table('dates')->insert($rows); 676 } 677 678 /** 679 * Extract all the links from the given record and insert them into the database 680 * 681 * @param string $xref 682 * @param int $ged_id 683 * @param string $gedrec 684 * 685 * @return void 686 */ 687 private function updateLinks(string $xref, int $ged_id, string $gedrec): void 688 { 689 // Insert all new rows together 690 $rows = []; 691 692 preg_match_all('/\n\d+ (' . Gedcom::REGEX_TAG . ') @(' . Gedcom::REGEX_XREF . ')@/', $gedrec, $matches, PREG_SET_ORDER); 693 694 foreach ($matches as $match) { 695 // Some applications (e.g. GenoPro) create links longer than 15 characters. 696 $link = mb_substr($match[1], 0, 15); 697 698 // Take care of "duplicates" that differ on case/collation, e.g. "SOUR @S1@" and "SOUR @s1@" 699 $rows[$link . strtoupper($match[2])] = [ 700 'l_from' => $xref, 701 'l_to' => $match[2], 702 'l_type' => $link, 703 'l_file' => $ged_id, 704 ]; 705 } 706 707 DB::table('link')->insert($rows); 708 } 709 710 /** 711 * Extract all the names from the given record and insert them into the database. 712 * 713 * @param string $xref 714 * @param int $ged_id 715 * @param Individual $record 716 * 717 * @return void 718 */ 719 private function updateNames(string $xref, int $ged_id, Individual $record): void 720 { 721 // Insert all new rows together 722 $rows = []; 723 724 foreach ($record->getAllNames() as $n => $name) { 725 if ($name['givn'] === Individual::PRAENOMEN_NESCIO) { 726 $soundex_givn_std = null; 727 $soundex_givn_dm = null; 728 } else { 729 $soundex_givn_std = Soundex::russell($name['givn']); 730 $soundex_givn_dm = Soundex::daitchMokotoff($name['givn']); 731 } 732 733 if ($name['surn'] === Individual::NOMEN_NESCIO) { 734 $soundex_surn_std = null; 735 $soundex_surn_dm = null; 736 } else { 737 $soundex_surn_std = Soundex::russell($name['surname']); 738 $soundex_surn_dm = Soundex::daitchMokotoff($name['surname']); 739 } 740 741 $rows[] = [ 742 'n_file' => $ged_id, 743 'n_id' => $xref, 744 'n_num' => $n, 745 'n_type' => $name['type'], 746 'n_sort' => mb_substr($name['sort'], 0, 255), 747 'n_full' => mb_substr($name['fullNN'], 0, 255), 748 'n_surname' => mb_substr($name['surname'], 0, 255), 749 'n_surn' => mb_substr($name['surn'], 0, 255), 750 'n_givn' => mb_substr($name['givn'], 0, 255), 751 'n_soundex_givn_std' => $soundex_givn_std, 752 'n_soundex_surn_std' => $soundex_surn_std, 753 'n_soundex_givn_dm' => $soundex_givn_dm, 754 'n_soundex_surn_dm' => $soundex_surn_dm, 755 ]; 756 } 757 758 DB::table('name')->insert($rows); 759 } 760 761 /** 762 * Extract inline media data, and convert to media objects. 763 * 764 * @param Tree $tree 765 * @param string $gedcom 766 * 767 * @return string 768 */ 769 private function convertInlineMedia(Tree $tree, string $gedcom): string 770 { 771 while (preg_match('/\n1 OBJE(?:\n[2-9].+)+/', $gedcom, $match)) { 772 $xref = $this->createMediaObject($match[0], $tree); 773 $gedcom = strtr($gedcom, [$match[0] => "\n1 OBJE @" . $xref . '@']); 774 } 775 while (preg_match('/\n2 OBJE(?:\n[3-9].+)+/', $gedcom, $match)) { 776 $xref = $this->createMediaObject($match[0], $tree); 777 $gedcom = strtr($gedcom, [$match[0] => "\n2 OBJE @" . $xref . '@']); 778 } 779 while (preg_match('/\n3 OBJE(?:\n[4-9].+)+/', $gedcom, $match)) { 780 $xref = $this->createMediaObject($match[0], $tree); 781 $gedcom = strtr($gedcom, [$match[0] => "\n3 OBJE @" . $xref . '@']); 782 } 783 784 return $gedcom; 785 } 786 787 /** 788 * Create a new media object, from inline media data. 789 * 790 * GEDCOM 5.5.1 specifies: +1 FILE / +2 FORM / +3 MEDI / +1 TITL 791 * GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +1 TITL 792 * GEDCOM 5.5.1 says that GEDCOM 5.5 specifies: +1 FILE / +1 FORM / +2 MEDI 793 * 794 * Legacy generates: +1 FORM / +1 FILE / +1 TITL / +1 _SCBK / +1 _PRIM / +1 _TYPE / +1 NOTE 795 * RootsMagic generates: +1 FILE / +1 FORM / +1 TITL 796 * 797 * @param string $gedcom 798 * @param Tree $tree 799 * 800 * @return string 801 */ 802 private function createMediaObject(string $gedcom, Tree $tree): string 803 { 804 preg_match('/\n\d FILE (.+)/', $gedcom, $match); 805 $file = $match[1] ?? ''; 806 807 preg_match('/\n\d TITL (.+)/', $gedcom, $match); 808 $title = $match[1] ?? ''; 809 810 preg_match('/\n\d FORM (.+)/', $gedcom, $match); 811 $format = $match[1] ?? ''; 812 813 preg_match('/\n\d MEDI (.+)/', $gedcom, $match); 814 $media = $match[1] ?? ''; 815 816 preg_match('/\n\d _SCBK (.+)/', $gedcom, $match); 817 $scrapbook = $match[1] ?? ''; 818 819 preg_match('/\n\d _PRIM (.+)/', $gedcom, $match); 820 $primary = $match[1] ?? ''; 821 822 preg_match('/\n\d _TYPE (.+)/', $gedcom, $match); 823 if ($media === '') { 824 // Legacy uses _TYPE instead of MEDI 825 $media = $match[1] ?? ''; 826 $type = ''; 827 } else { 828 $type = $match[1] ?? ''; 829 } 830 831 preg_match_all('/\n\d NOTE (.+(?:\n\d CONT.*)*)/', $gedcom, $matches); 832 $notes = $matches[1] ?? []; 833 834 // Have we already created a media object with the same title/filename? 835 $xref = DB::table('media_file') 836 ->where('m_file', '=', $tree->id()) 837 ->where('descriptive_title', '=', mb_substr($title, 0, 248)) 838 ->where('multimedia_file_refn', '=', mb_substr($file, 0, 248)) 839 ->value('m_id'); 840 841 if ($xref === null) { 842 $xref = Registry::xrefFactory()->make(Media::RECORD_TYPE); 843 844 // convert to a media-object 845 $gedcom = '0 @' . $xref . "@ OBJE\n1 FILE " . $file; 846 847 if ($format !== '') { 848 $gedcom .= "\n2 FORM " . $format; 849 850 if ($media !== '') { 851 $gedcom .= "\n3 TYPE " . $media; 852 } 853 } 854 855 if ($title !== '') { 856 $gedcom .= "\n3 TITL " . $title; 857 } 858 859 if ($scrapbook !== '') { 860 $gedcom .= "\n1 _SCBK " . $scrapbook; 861 } 862 863 if ($primary !== '') { 864 $gedcom .= "\n1 _PRIM " . $primary; 865 } 866 867 if ($type !== '') { 868 $gedcom .= "\n1 _TYPE " . $type; 869 } 870 871 foreach ($notes as $note) { 872 $gedcom .= "\n1 NOTE " . strtr($note, ["\n3" => "\n2", "\n4" => "\n2", "\n5" => "\n2"]); 873 } 874 875 DB::table('media')->insert([ 876 'm_id' => $xref, 877 'm_file' => $tree->id(), 878 'm_gedcom' => $gedcom, 879 ]); 880 881 DB::table('media_file')->insert([ 882 'm_id' => $xref, 883 'm_file' => $tree->id(), 884 'multimedia_file_refn' => mb_substr($file, 0, 248), 885 'multimedia_format' => mb_substr($format, 0, 4), 886 'source_media_type' => mb_substr($media, 0, 15), 887 'descriptive_title' => mb_substr($title, 0, 248), 888 ]); 889 } 890 891 return $xref; 892 } 893 894 /** 895 * update a record in the database 896 * 897 * @param string $gedrec 898 * @param Tree $tree 899 * @param bool $delete 900 * 901 * @return void 902 * @throws GedcomErrorException 903 */ 904 public function updateRecord(string $gedrec, Tree $tree, bool $delete): void 905 { 906 if (preg_match('/^0 @(' . Gedcom::REGEX_XREF . ')@ (' . Gedcom::REGEX_TAG . ')/', $gedrec, $match)) { 907 [, $gid, $type] = $match; 908 } elseif (preg_match('/^0 (HEAD)(?:\n|$)/', $gedrec, $match)) { 909 // The HEAD record has no XREF. Any others? 910 $gid = $match[1]; 911 $type = $match[1]; 912 } else { 913 throw new GedcomErrorException($gedrec); 914 } 915 916 // Place links 917 DB::table('placelinks') 918 ->where('pl_gid', '=', $gid) 919 ->where('pl_file', '=', $tree->id()) 920 ->delete(); 921 922 // Orphaned places. If we're deleting "Westminster, London, England", 923 // then we may also need to delete "London, England" and "England". 924 do { 925 $affected = DB::table('places') 926 ->leftJoin('placelinks', function (JoinClause $join): void { 927 $join 928 ->on('p_id', '=', 'pl_p_id') 929 ->on('p_file', '=', 'pl_file'); 930 }) 931 ->whereNull('pl_p_id') 932 ->delete(); 933 } while ($affected > 0); 934 935 DB::table('dates') 936 ->where('d_gid', '=', $gid) 937 ->where('d_file', '=', $tree->id()) 938 ->delete(); 939 940 DB::table('name') 941 ->where('n_id', '=', $gid) 942 ->where('n_file', '=', $tree->id()) 943 ->delete(); 944 945 DB::table('link') 946 ->where('l_from', '=', $gid) 947 ->where('l_file', '=', $tree->id()) 948 ->delete(); 949 950 switch ($type) { 951 case Individual::RECORD_TYPE: 952 DB::table('individuals') 953 ->where('i_id', '=', $gid) 954 ->where('i_file', '=', $tree->id()) 955 ->delete(); 956 break; 957 958 case Family::RECORD_TYPE: 959 DB::table('families') 960 ->where('f_id', '=', $gid) 961 ->where('f_file', '=', $tree->id()) 962 ->delete(); 963 break; 964 965 case Source::RECORD_TYPE: 966 DB::table('sources') 967 ->where('s_id', '=', $gid) 968 ->where('s_file', '=', $tree->id()) 969 ->delete(); 970 break; 971 972 case Media::RECORD_TYPE: 973 DB::table('media_file') 974 ->where('m_id', '=', $gid) 975 ->where('m_file', '=', $tree->id()) 976 ->delete(); 977 978 DB::table('media') 979 ->where('m_id', '=', $gid) 980 ->where('m_file', '=', $tree->id()) 981 ->delete(); 982 break; 983 984 default: 985 DB::table('other') 986 ->where('o_id', '=', $gid) 987 ->where('o_file', '=', $tree->id()) 988 ->delete(); 989 break; 990 } 991 992 if (!$delete) { 993 $this->importRecord($gedrec, $tree, true); 994 } 995 } 996} 997