1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Exception; 23use Fisharebest\Webtrees\Encodings\UTF8; 24use Fisharebest\Webtrees\Exceptions\GedcomErrorException; 25use Fisharebest\Webtrees\Http\ViewResponseTrait; 26use Fisharebest\Webtrees\I18N; 27use Fisharebest\Webtrees\Services\GedcomImportService; 28use Fisharebest\Webtrees\Services\TimeoutService; 29use Fisharebest\Webtrees\Validator; 30use Illuminate\Database\Capsule\Manager as DB; 31use Illuminate\Database\DetectsConcurrencyErrors; 32use Psr\Http\Message\ResponseInterface; 33use Psr\Http\Message\ServerRequestInterface; 34use Psr\Http\Server\RequestHandlerInterface; 35 36use function preg_split; 37use function str_replace; 38use function str_starts_with; 39use function strlen; 40use function substr; 41 42/** 43 * Load a chunk of GEDCOM data. 44 */ 45class GedcomLoad implements RequestHandlerInterface 46{ 47 use ViewResponseTrait; 48 use DetectsConcurrencyErrors; 49 50 private GedcomImportService $gedcom_import_service; 51 52 private TimeoutService $timeout_service; 53 54 /** 55 * GedcomLoad constructor. 56 * 57 * @param GedcomImportService $gedcom_import_service 58 * @param TimeoutService $timeout_service 59 */ 60 public function __construct( 61 GedcomImportService $gedcom_import_service, 62 TimeoutService $timeout_service 63 ) { 64 $this->gedcom_import_service = $gedcom_import_service; 65 $this->timeout_service = $timeout_service; 66 } 67 68 /** 69 * @param ServerRequestInterface $request 70 * 71 * @return ResponseInterface 72 */ 73 public function handle(ServerRequestInterface $request): ResponseInterface 74 { 75 $this->layout = 'layouts/ajax'; 76 77 $tree = Validator::attributes($request)->tree(); 78 79 try { 80 // What is the current import status? 81 $import_offset = DB::table('gedcom_chunk') 82 ->where('gedcom_id', '=', $tree->id()) 83 ->where('imported', '=', '1') 84 ->count(); 85 86 $import_total = DB::table('gedcom_chunk') 87 ->where('gedcom_id', '=', $tree->id()) 88 ->count(); 89 90 // Finished? 91 if ($import_offset === $import_total) { 92 if ($tree->getPreference('imported') !== '1') { 93 return $this->viewResponse('admin/import-fail', [ 94 'error' => I18N::translate('Invalid GEDCOM file - no trailer record found.'), 95 'tree' => $tree, 96 ]); 97 } 98 99 return $this->viewResponse('admin/import-complete', ['tree' => $tree]); 100 } 101 102 // If we are loading the first (header) record, then delete old data. 103 if ($import_offset === 0) { 104 $queries = [ 105 'individuals' => DB::table('individuals')->where('i_file', '=', $tree->id()), 106 'families' => DB::table('families')->where('f_file', '=', $tree->id()), 107 'sources' => DB::table('sources')->where('s_file', '=', $tree->id()), 108 'other' => DB::table('other')->where('o_file', '=', $tree->id()), 109 'places' => DB::table('places')->where('p_file', '=', $tree->id()), 110 'placelinks' => DB::table('placelinks')->where('pl_file', '=', $tree->id()), 111 'name' => DB::table('name')->where('n_file', '=', $tree->id()), 112 'dates' => DB::table('dates')->where('d_file', '=', $tree->id()), 113 'change' => DB::table('change')->where('gedcom_id', '=', $tree->id()), 114 ]; 115 116 if ($tree->getPreference('keep_media') === '1') { 117 $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id()) 118 ->where('l_type', '<>', 'OBJE'); 119 } else { 120 $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id()); 121 $queries['media_file'] = DB::table('media_file')->where('m_file', '=', $tree->id()); 122 $queries['media'] = DB::table('media')->where('m_file', '=', $tree->id()); 123 } 124 125 foreach ($queries as $table => $query) { 126 // take() and delete() together don't return the number of delete rows. 127 while ((clone $query)->count() > 0) { 128 (clone $query)->take(1000)->delete(); 129 130 if ($this->timeout_service->isTimeLimitUp()) { 131 return $this->viewResponse('admin/import-progress', [ 132 'errors' => '', 133 'progress' => 0.0, 134 'status' => I18N::translate('Deleting…') . ' ' . $table, 135 'tree' => $tree, 136 ]); 137 } 138 } 139 } 140 } 141 142 // Calculate progress so far 143 $progress = $import_offset / $import_total; 144 145 $first_time = $import_offset === 0; 146 147 // Collect up any errors, and show them later. 148 $errors = ''; 149 150 // Run for a short period of time. This keeps the resource requirements low. 151 do { 152 $data = DB::table('gedcom_chunk') 153 ->where('gedcom_id', '=', $tree->id()) 154 ->where('imported', '=', '0') 155 ->orderBy('gedcom_chunk_id') 156 ->select(['gedcom_chunk_id', 'chunk_data']) 157 ->first(); 158 159 if ($data === null) { 160 break; 161 } 162 163 // Mark the chunk as imported. This will create a row-lock, to prevent other 164 // processes from reading it until we have finished. 165 $n = DB::table('gedcom_chunk') 166 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 167 ->where('imported', '=', '0') 168 ->update(['imported' => 1]); 169 170 // Another process has already imported this data? 171 if ($n === 0) { 172 break; 173 } 174 175 if ($first_time) { 176 // Remove any byte-order-mark 177 if (str_starts_with($data->chunk_data, UTF8::BYTE_ORDER_MARK)) { 178 $data->chunk_data = substr($data->chunk_data, strlen(UTF8::BYTE_ORDER_MARK)); 179 DB::table('gedcom_chunk') 180 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 181 ->update(['chunk_data' => $data->chunk_data]); 182 } 183 184 if (!str_starts_with($data->chunk_data, '0 HEAD')) { 185 return $this->viewResponse('admin/import-fail', [ 186 'error' => I18N::translate('Invalid GEDCOM file - no header record found.'), 187 'tree' => $tree, 188 ]); 189 } 190 191 $first_time = false; 192 } 193 194 $data->chunk_data = str_replace("\r", "\n", $data->chunk_data); 195 196 // Import all the records in this chunk of data 197 foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { 198 try { 199 $this->gedcom_import_service->importRecord($rec, $tree, false); 200 } catch (GedcomErrorException $exception) { 201 $errors .= $exception->getMessage(); 202 } 203 } 204 205 // Do not need the data any more. 206 DB::table('gedcom_chunk') 207 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 208 ->update(['chunk_data' => '']); 209 } while (!$this->timeout_service->isTimeLimitUp()); 210 211 return $this->viewResponse('admin/import-progress', [ 212 'errors' => $errors, 213 'progress' => $progress, 214 'status' => '', 215 'tree' => $tree, 216 ]); 217 } catch (Exception $ex) { 218 DB::connection()->rollBack(); 219 220 // Deadlock? Try again. 221 if ($this->causedByConcurrencyError($ex)) { 222 return $this->viewResponse('admin/import-progress', [ 223 'errors' => '', 224 'progress' => $progress ?? 0.0, 225 'status' => e($ex->getMessage()), 226 'tree' => $tree, 227 ]); 228 } 229 230 return $this->viewResponse('admin/import-fail', [ 231 'error' => e($ex->getMessage()), 232 'tree' => $tree, 233 ]); 234 } 235 } 236} 237