16fd01894SGreg Roach<?php 26fd01894SGreg Roach 36fd01894SGreg Roach/** 46fd01894SGreg Roach * webtrees: online genealogy 5d11be702SGreg Roach * Copyright (C) 2023 webtrees development team 66fd01894SGreg Roach * This program is free software: you can redistribute it and/or modify 76fd01894SGreg Roach * it under the terms of the GNU General Public License as published by 86fd01894SGreg Roach * the Free Software Foundation, either version 3 of the License, or 96fd01894SGreg Roach * (at your option) any later version. 106fd01894SGreg Roach * This program is distributed in the hope that it will be useful, 116fd01894SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 126fd01894SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 136fd01894SGreg Roach * GNU General Public License for more details. 146fd01894SGreg Roach * You should have received a copy of the GNU General Public License 1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 166fd01894SGreg Roach */ 176fd01894SGreg Roach 186fd01894SGreg Roachdeclare(strict_types=1); 196fd01894SGreg Roach 206fd01894SGreg Roachnamespace Fisharebest\Webtrees\Http\RequestHandlers; 216fd01894SGreg Roach 226fd01894SGreg Roachuse Exception; 236f4ec3caSGreg Roachuse Fisharebest\Webtrees\DB; 241c6adce8SGreg Roachuse Fisharebest\Webtrees\Encodings\UTF8; 256fd01894SGreg Roachuse Fisharebest\Webtrees\Exceptions\GedcomErrorException; 266fd01894SGreg Roachuse Fisharebest\Webtrees\Http\ViewResponseTrait; 276fd01894SGreg Roachuse Fisharebest\Webtrees\I18N; 282c685d76SGreg Roachuse Fisharebest\Webtrees\Services\GedcomImportService; 296fd01894SGreg Roachuse Fisharebest\Webtrees\Services\TimeoutService; 30b55cbc6bSGreg Roachuse Fisharebest\Webtrees\Validator; 31182e92b8SGreg Roachuse Illuminate\Database\DetectsConcurrencyErrors; 326fd01894SGreg Roachuse Psr\Http\Message\ResponseInterface; 336fd01894SGreg Roachuse Psr\Http\Message\ServerRequestInterface; 346fd01894SGreg Roachuse Psr\Http\Server\RequestHandlerInterface; 356fd01894SGreg Roach 366fd01894SGreg Roachuse function preg_split; 376fd01894SGreg Roachuse function str_replace; 386fd01894SGreg Roachuse function str_starts_with; 396fd01894SGreg Roachuse function strlen; 406fd01894SGreg Roachuse function substr; 416fd01894SGreg Roach 426fd01894SGreg Roach/** 436fd01894SGreg Roach * Load a chunk of GEDCOM data. 446fd01894SGreg Roach */ 456fd01894SGreg Roachclass GedcomLoad implements RequestHandlerInterface 466fd01894SGreg Roach{ 476fd01894SGreg Roach use ViewResponseTrait; 48182e92b8SGreg Roach use DetectsConcurrencyErrors; 496fd01894SGreg Roach 502c685d76SGreg Roach private GedcomImportService $gedcom_import_service; 512c685d76SGreg Roach 52c4943cffSGreg Roach private TimeoutService $timeout_service; 536fd01894SGreg Roach 546fd01894SGreg Roach /** 552c685d76SGreg Roach * @param GedcomImportService $gedcom_import_service 566fd01894SGreg Roach * @param TimeoutService $timeout_service 576fd01894SGreg Roach */ 582c685d76SGreg Roach public function __construct( 592c685d76SGreg Roach GedcomImportService $gedcom_import_service, 603ddb2c3fSGreg Roach TimeoutService $timeout_service 612c685d76SGreg Roach ) { 622c685d76SGreg Roach $this->gedcom_import_service = $gedcom_import_service; 636fd01894SGreg Roach $this->timeout_service = $timeout_service; 646fd01894SGreg Roach } 656fd01894SGreg Roach 666fd01894SGreg Roach /** 676fd01894SGreg Roach * @param ServerRequestInterface $request 686fd01894SGreg Roach * 696fd01894SGreg Roach * @return ResponseInterface 706fd01894SGreg Roach */ 716fd01894SGreg Roach public function handle(ServerRequestInterface $request): ResponseInterface 726fd01894SGreg Roach { 736fd01894SGreg Roach $this->layout = 'layouts/ajax'; 746fd01894SGreg Roach 75b55cbc6bSGreg Roach $tree = Validator::attributes($request)->tree(); 766fd01894SGreg Roach 776fd01894SGreg Roach try { 786fd01894SGreg Roach // What is the current import status? 796fd01894SGreg Roach $import_offset = DB::table('gedcom_chunk') 806fd01894SGreg Roach ->where('gedcom_id', '=', $tree->id()) 816fd01894SGreg Roach ->where('imported', '=', '1') 826fd01894SGreg Roach ->count(); 836fd01894SGreg Roach 846fd01894SGreg Roach $import_total = DB::table('gedcom_chunk') 856fd01894SGreg Roach ->where('gedcom_id', '=', $tree->id()) 866fd01894SGreg Roach ->count(); 876fd01894SGreg Roach 886fd01894SGreg Roach // Finished? 896fd01894SGreg Roach if ($import_offset === $import_total) { 90c1a23560SGreg Roach if ($tree->getPreference('imported') !== '1') { 91c1a23560SGreg Roach return $this->viewResponse('admin/import-fail', [ 92c1a23560SGreg Roach 'error' => I18N::translate('Invalid GEDCOM file - no trailer record found.'), 93c1a23560SGreg Roach 'tree' => $tree, 94c1a23560SGreg Roach ]); 95c1a23560SGreg Roach } 966fd01894SGreg Roach 97c1a23560SGreg Roach return $this->viewResponse('admin/import-complete', ['tree' => $tree]); 986fd01894SGreg Roach } 996fd01894SGreg Roach 1003ddb2c3fSGreg Roach // If we are loading the first (header) record, then delete old data. 1013ddb2c3fSGreg Roach if ($import_offset === 0) { 1023ddb2c3fSGreg Roach $queries = [ 1033ddb2c3fSGreg Roach 'individuals' => DB::table('individuals')->where('i_file', '=', $tree->id()), 1043ddb2c3fSGreg Roach 'families' => DB::table('families')->where('f_file', '=', $tree->id()), 1053ddb2c3fSGreg Roach 'sources' => DB::table('sources')->where('s_file', '=', $tree->id()), 1063ddb2c3fSGreg Roach 'other' => DB::table('other')->where('o_file', '=', $tree->id()), 1073ddb2c3fSGreg Roach 'places' => DB::table('places')->where('p_file', '=', $tree->id()), 1083ddb2c3fSGreg Roach 'placelinks' => DB::table('placelinks')->where('pl_file', '=', $tree->id()), 1093ddb2c3fSGreg Roach 'name' => DB::table('name')->where('n_file', '=', $tree->id()), 1103ddb2c3fSGreg Roach 'dates' => DB::table('dates')->where('d_file', '=', $tree->id()), 1113ddb2c3fSGreg Roach 'change' => DB::table('change')->where('gedcom_id', '=', $tree->id()), 1123ddb2c3fSGreg Roach ]; 1133ddb2c3fSGreg Roach 1143ddb2c3fSGreg Roach if ($tree->getPreference('keep_media') === '1') { 1153ddb2c3fSGreg Roach $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id()) 1163ddb2c3fSGreg Roach ->where('l_type', '<>', 'OBJE'); 1173ddb2c3fSGreg Roach } else { 1183ddb2c3fSGreg Roach $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id()); 1193ddb2c3fSGreg Roach $queries['media_file'] = DB::table('media_file')->where('m_file', '=', $tree->id()); 1203ddb2c3fSGreg Roach $queries['media'] = DB::table('media')->where('m_file', '=', $tree->id()); 1213ddb2c3fSGreg Roach } 1223ddb2c3fSGreg Roach 1233ddb2c3fSGreg Roach foreach ($queries as $table => $query) { 1243ddb2c3fSGreg Roach // take() and delete() together don't return the number of delete rows. 1253ddb2c3fSGreg Roach while ((clone $query)->count() > 0) { 1263ddb2c3fSGreg Roach (clone $query)->take(1000)->delete(); 1273ddb2c3fSGreg Roach 1283ddb2c3fSGreg Roach if ($this->timeout_service->isTimeLimitUp()) { 1293ddb2c3fSGreg Roach return $this->viewResponse('admin/import-progress', [ 1303ddb2c3fSGreg Roach 'errors' => '', 1313ddb2c3fSGreg Roach 'progress' => 0.0, 1323ddb2c3fSGreg Roach 'status' => I18N::translate('Deleting…') . ' ' . $table, 1333ddb2c3fSGreg Roach 'tree' => $tree, 1343ddb2c3fSGreg Roach ]); 1353ddb2c3fSGreg Roach } 1363ddb2c3fSGreg Roach } 1373ddb2c3fSGreg Roach } 1383ddb2c3fSGreg Roach } 1393ddb2c3fSGreg Roach 1406fd01894SGreg Roach // Calculate progress so far 1416fd01894SGreg Roach $progress = $import_offset / $import_total; 1426fd01894SGreg Roach 1437dd04261SGreg Roach $first_time = $import_offset === 0; 1446fd01894SGreg Roach 1456fd01894SGreg Roach // Collect up any errors, and show them later. 1466fd01894SGreg Roach $errors = ''; 1476fd01894SGreg Roach 1486fd01894SGreg Roach // Run for a short period of time. This keeps the resource requirements low. 1496fd01894SGreg Roach do { 1506fd01894SGreg Roach $data = DB::table('gedcom_chunk') 1516fd01894SGreg Roach ->where('gedcom_id', '=', $tree->id()) 1526fd01894SGreg Roach ->where('imported', '=', '0') 1536fd01894SGreg Roach ->orderBy('gedcom_chunk_id') 1546fd01894SGreg Roach ->select(['gedcom_chunk_id', 'chunk_data']) 1556fd01894SGreg Roach ->first(); 1566fd01894SGreg Roach 1579d173e09SGreg Roach if ($data === null) { 1589d173e09SGreg Roach break; 1599d173e09SGreg Roach } 1609d173e09SGreg Roach 1619d173e09SGreg Roach // Mark the chunk as imported. This will create a row-lock, to prevent other 1629d173e09SGreg Roach // processes from reading it until we have finished. 1639d173e09SGreg Roach $n = DB::table('gedcom_chunk') 1649d173e09SGreg Roach ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 1659d173e09SGreg Roach ->where('imported', '=', '0') 1669d173e09SGreg Roach ->update(['imported' => 1]); 1679d173e09SGreg Roach 1689d173e09SGreg Roach // Another process has already imported this data? 1699d173e09SGreg Roach if ($n === 0) { 1709d173e09SGreg Roach break; 1719d173e09SGreg Roach } 1729d173e09SGreg Roach 1736fd01894SGreg Roach if ($first_time) { 1746fd01894SGreg Roach // Remove any byte-order-mark 1751c6adce8SGreg Roach if (str_starts_with($data->chunk_data, UTF8::BYTE_ORDER_MARK)) { 1761c6adce8SGreg Roach $data->chunk_data = substr($data->chunk_data, strlen(UTF8::BYTE_ORDER_MARK)); 1776fd01894SGreg Roach DB::table('gedcom_chunk') 1786fd01894SGreg Roach ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 1796fd01894SGreg Roach ->update(['chunk_data' => $data->chunk_data]); 1806fd01894SGreg Roach } 1816fd01894SGreg Roach 1826fd01894SGreg Roach if (!str_starts_with($data->chunk_data, '0 HEAD')) { 1836fd01894SGreg Roach return $this->viewResponse('admin/import-fail', [ 1846fd01894SGreg Roach 'error' => I18N::translate('Invalid GEDCOM file - no header record found.'), 1856fd01894SGreg Roach 'tree' => $tree, 1866fd01894SGreg Roach ]); 1876fd01894SGreg Roach } 1886fd01894SGreg Roach 1896fd01894SGreg Roach $first_time = false; 1906fd01894SGreg Roach } 1916fd01894SGreg Roach 1926fd01894SGreg Roach $data->chunk_data = str_replace("\r", "\n", $data->chunk_data); 1936fd01894SGreg Roach 1946fd01894SGreg Roach // Import all the records in this chunk of data 1956fd01894SGreg Roach foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { 1966fd01894SGreg Roach try { 1972c685d76SGreg Roach $this->gedcom_import_service->importRecord($rec, $tree, false); 1986fd01894SGreg Roach } catch (GedcomErrorException $exception) { 1996fd01894SGreg Roach $errors .= $exception->getMessage(); 2006fd01894SGreg Roach } 2016fd01894SGreg Roach } 2026fd01894SGreg Roach 2039d173e09SGreg Roach // Do not need the data any more. 2046fd01894SGreg Roach DB::table('gedcom_chunk') 2056fd01894SGreg Roach ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 2069d173e09SGreg Roach ->update(['chunk_data' => '']); 2076fd01894SGreg Roach } while (!$this->timeout_service->isTimeLimitUp()); 2086fd01894SGreg Roach 2096fd01894SGreg Roach return $this->viewResponse('admin/import-progress', [ 2106fd01894SGreg Roach 'errors' => $errors, 2116fd01894SGreg Roach 'progress' => $progress, 2123ddb2c3fSGreg Roach 'status' => '', 2136fd01894SGreg Roach 'tree' => $tree, 2146fd01894SGreg Roach ]); 2156fd01894SGreg Roach } catch (Exception $ex) { 2166fd01894SGreg Roach DB::connection()->rollBack(); 2176fd01894SGreg Roach 2189d173e09SGreg Roach // Deadlock? Try again. 219f32d77e6SGreg Roach if ($this->causedByConcurrencyError($ex)) { 2209d173e09SGreg Roach return $this->viewResponse('admin/import-progress', [ 2219d173e09SGreg Roach 'errors' => '', 222*6bd19c8cSGreg Roach 'progress' => $progress, 223ed946cf6SGreg Roach 'status' => e($ex->getMessage()), 2249d173e09SGreg Roach 'tree' => $tree, 2259d173e09SGreg Roach ]); 2269d173e09SGreg Roach } 2279d173e09SGreg Roach 2286fd01894SGreg Roach return $this->viewResponse('admin/import-fail', [ 229ed946cf6SGreg Roach 'error' => e($ex->getMessage()), 2306fd01894SGreg Roach 'tree' => $tree, 2316fd01894SGreg Roach ]); 2326fd01894SGreg Roach } 2336fd01894SGreg Roach } 2346fd01894SGreg Roach} 235