1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Exception; 23use Fisharebest\Webtrees\DB; 24use Fisharebest\Webtrees\Encodings\UTF8; 25use Fisharebest\Webtrees\Exceptions\GedcomErrorException; 26use Fisharebest\Webtrees\Http\ViewResponseTrait; 27use Fisharebest\Webtrees\I18N; 28use Fisharebest\Webtrees\Services\GedcomImportService; 29use Fisharebest\Webtrees\Services\TimeoutService; 30use Fisharebest\Webtrees\Validator; 31use Illuminate\Database\DetectsConcurrencyErrors; 32use Psr\Http\Message\ResponseInterface; 33use Psr\Http\Message\ServerRequestInterface; 34use Psr\Http\Server\RequestHandlerInterface; 35 36use function preg_split; 37use function str_replace; 38use function str_starts_with; 39use function strlen; 40use function substr; 41 42/** 43 * Load a chunk of GEDCOM data. 44 */ 45class GedcomLoad implements RequestHandlerInterface 46{ 47 use ViewResponseTrait; 48 use DetectsConcurrencyErrors; 49 50 private GedcomImportService $gedcom_import_service; 51 52 private TimeoutService $timeout_service; 53 54 /** 55 * @param GedcomImportService $gedcom_import_service 56 * @param TimeoutService $timeout_service 57 */ 58 public function __construct( 59 GedcomImportService $gedcom_import_service, 60 TimeoutService $timeout_service 61 ) { 62 $this->gedcom_import_service = $gedcom_import_service; 63 $this->timeout_service = $timeout_service; 64 } 65 66 /** 67 * @param ServerRequestInterface $request 68 * 69 * @return ResponseInterface 70 */ 71 public function handle(ServerRequestInterface $request): ResponseInterface 72 { 73 $this->layout = 'layouts/ajax'; 74 75 $tree = Validator::attributes($request)->tree(); 76 77 try { 78 // What is the current import status? 79 $import_offset = DB::table('gedcom_chunk') 80 ->where('gedcom_id', '=', $tree->id()) 81 ->where('imported', '=', '1') 82 ->count(); 83 84 $import_total = DB::table('gedcom_chunk') 85 ->where('gedcom_id', '=', $tree->id()) 86 ->count(); 87 88 // Finished? 89 if ($import_offset === $import_total) { 90 if ($tree->getPreference('imported') !== '1') { 91 return $this->viewResponse('admin/import-fail', [ 92 'error' => I18N::translate('Invalid GEDCOM file - no trailer record found.'), 93 'tree' => $tree, 94 ]); 95 } 96 97 return $this->viewResponse('admin/import-complete', ['tree' => $tree]); 98 } 99 100 // If we are loading the first (header) record, then delete old data. 101 if ($import_offset === 0) { 102 $queries = [ 103 'individuals' => DB::table('individuals')->where('i_file', '=', $tree->id()), 104 'families' => DB::table('families')->where('f_file', '=', $tree->id()), 105 'sources' => DB::table('sources')->where('s_file', '=', $tree->id()), 106 'other' => DB::table('other')->where('o_file', '=', $tree->id()), 107 'places' => DB::table('places')->where('p_file', '=', $tree->id()), 108 'placelinks' => DB::table('placelinks')->where('pl_file', '=', $tree->id()), 109 'name' => DB::table('name')->where('n_file', '=', $tree->id()), 110 'dates' => DB::table('dates')->where('d_file', '=', $tree->id()), 111 'change' => DB::table('change')->where('gedcom_id', '=', $tree->id()), 112 ]; 113 114 if ($tree->getPreference('keep_media') === '1') { 115 $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id()) 116 ->where('l_type', '<>', 'OBJE'); 117 } else { 118 $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id()); 119 $queries['media_file'] = DB::table('media_file')->where('m_file', '=', $tree->id()); 120 $queries['media'] = DB::table('media')->where('m_file', '=', $tree->id()); 121 } 122 123 foreach ($queries as $table => $query) { 124 // take() and delete() together don't return the number of delete rows. 125 while ((clone $query)->count() > 0) { 126 (clone $query)->take(1000)->delete(); 127 128 if ($this->timeout_service->isTimeLimitUp()) { 129 return $this->viewResponse('admin/import-progress', [ 130 'errors' => '', 131 'progress' => 0.0, 132 'status' => I18N::translate('Deleting…') . ' ' . $table, 133 'tree' => $tree, 134 ]); 135 } 136 } 137 } 138 } 139 140 // Calculate progress so far 141 $progress = $import_offset / $import_total; 142 143 $first_time = $import_offset === 0; 144 145 // Collect up any errors, and show them later. 146 $errors = ''; 147 148 // Run for a short period of time. This keeps the resource requirements low. 149 do { 150 $data = DB::table('gedcom_chunk') 151 ->where('gedcom_id', '=', $tree->id()) 152 ->where('imported', '=', '0') 153 ->orderBy('gedcom_chunk_id') 154 ->select(['gedcom_chunk_id', 'chunk_data']) 155 ->first(); 156 157 if ($data === null) { 158 break; 159 } 160 161 // Mark the chunk as imported. This will create a row-lock, to prevent other 162 // processes from reading it until we have finished. 163 $n = DB::table('gedcom_chunk') 164 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 165 ->where('imported', '=', '0') 166 ->update(['imported' => 1]); 167 168 // Another process has already imported this data? 169 if ($n === 0) { 170 break; 171 } 172 173 if ($first_time) { 174 // Remove any byte-order-mark 175 if (str_starts_with($data->chunk_data, UTF8::BYTE_ORDER_MARK)) { 176 $data->chunk_data = substr($data->chunk_data, strlen(UTF8::BYTE_ORDER_MARK)); 177 DB::table('gedcom_chunk') 178 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 179 ->update(['chunk_data' => $data->chunk_data]); 180 } 181 182 if (!str_starts_with($data->chunk_data, '0 HEAD')) { 183 return $this->viewResponse('admin/import-fail', [ 184 'error' => I18N::translate('Invalid GEDCOM file - no header record found.'), 185 'tree' => $tree, 186 ]); 187 } 188 189 $first_time = false; 190 } 191 192 $data->chunk_data = str_replace("\r", "\n", $data->chunk_data); 193 194 // Import all the records in this chunk of data 195 foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { 196 try { 197 $this->gedcom_import_service->importRecord($rec, $tree, false); 198 } catch (GedcomErrorException $exception) { 199 $errors .= $exception->getMessage(); 200 } 201 } 202 203 // Do not need the data any more. 204 DB::table('gedcom_chunk') 205 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 206 ->update(['chunk_data' => '']); 207 } while (!$this->timeout_service->isTimeLimitUp()); 208 209 return $this->viewResponse('admin/import-progress', [ 210 'errors' => $errors, 211 'progress' => $progress, 212 'status' => '', 213 'tree' => $tree, 214 ]); 215 } catch (Exception $ex) { 216 DB::rollBack(); 217 218 // Deadlock? Try again. 219 if ($this->causedByConcurrencyError($ex)) { 220 return $this->viewResponse('admin/import-progress', [ 221 'errors' => '', 222 'progress' => $progress, 223 'status' => e($ex->getMessage()), 224 'tree' => $tree, 225 ]); 226 } 227 228 return $this->viewResponse('admin/import-fail', [ 229 'error' => e($ex->getMessage()), 230 'tree' => $tree, 231 ]); 232 } 233 } 234} 235