1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Exception; 23use Fisharebest\Webtrees\Encodings\UTF8; 24use Fisharebest\Webtrees\Exceptions\GedcomErrorException; 25use Fisharebest\Webtrees\Http\ViewResponseTrait; 26use Fisharebest\Webtrees\I18N; 27use Fisharebest\Webtrees\Services\GedcomImportService; 28use Fisharebest\Webtrees\Services\TimeoutService; 29use Fisharebest\Webtrees\Services\TreeService; 30use Fisharebest\Webtrees\Validator; 31use Illuminate\Database\Capsule\Manager as DB; 32use Illuminate\Database\DetectsConcurrencyErrors; 33use Psr\Http\Message\ResponseInterface; 34use Psr\Http\Message\ServerRequestInterface; 35use Psr\Http\Server\RequestHandlerInterface; 36 37use function preg_split; 38use function str_replace; 39use function str_starts_with; 40use function strlen; 41use function substr; 42 43/** 44 * Load a chunk of GEDCOM data. 45 */ 46class GedcomLoad implements RequestHandlerInterface 47{ 48 use ViewResponseTrait; 49 use DetectsConcurrencyErrors; 50 51 private GedcomImportService $gedcom_import_service; 52 53 private TimeoutService $timeout_service; 54 55 /** 56 * GedcomLoad constructor. 57 * 58 * @param GedcomImportService $gedcom_import_service 59 * @param TimeoutService $timeout_service 60 */ 61 public function __construct( 62 GedcomImportService $gedcom_import_service, 63 TimeoutService $timeout_service 64 ) { 65 $this->gedcom_import_service = $gedcom_import_service; 66 $this->timeout_service = $timeout_service; 67 } 68 69 /** 70 * @param ServerRequestInterface $request 71 * 72 * @return ResponseInterface 73 */ 74 public function handle(ServerRequestInterface $request): ResponseInterface 75 { 76 $this->layout = 'layouts/ajax'; 77 78 $tree = Validator::attributes($request)->tree(); 79 80 try { 81 // What is the current import status? 82 $import_offset = DB::table('gedcom_chunk') 83 ->where('gedcom_id', '=', $tree->id()) 84 ->where('imported', '=', '1') 85 ->count(); 86 87 $import_total = DB::table('gedcom_chunk') 88 ->where('gedcom_id', '=', $tree->id()) 89 ->count(); 90 91 // Finished? 92 if ($import_offset === $import_total) { 93 if ($tree->getPreference('imported') !== '1') { 94 return $this->viewResponse('admin/import-fail', [ 95 'error' => I18N::translate('Invalid GEDCOM file - no trailer record found.'), 96 'tree' => $tree, 97 ]); 98 } 99 100 return $this->viewResponse('admin/import-complete', ['tree' => $tree]); 101 } 102 103 // If we are loading the first (header) record, then delete old data. 104 if ($import_offset === 0) { 105 $queries = [ 106 'individuals' => DB::table('individuals')->where('i_file', '=', $tree->id()), 107 'families' => DB::table('families')->where('f_file', '=', $tree->id()), 108 'sources' => DB::table('sources')->where('s_file', '=', $tree->id()), 109 'other' => DB::table('other')->where('o_file', '=', $tree->id()), 110 'places' => DB::table('places')->where('p_file', '=', $tree->id()), 111 'placelinks' => DB::table('placelinks')->where('pl_file', '=', $tree->id()), 112 'name' => DB::table('name')->where('n_file', '=', $tree->id()), 113 'dates' => DB::table('dates')->where('d_file', '=', $tree->id()), 114 'change' => DB::table('change')->where('gedcom_id', '=', $tree->id()), 115 ]; 116 117 if ($tree->getPreference('keep_media') === '1') { 118 $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id()) 119 ->where('l_type', '<>', 'OBJE'); 120 } else { 121 $queries['link'] = DB::table('link')->where('l_file', '=', $tree->id()); 122 $queries['media_file'] = DB::table('media_file')->where('m_file', '=', $tree->id()); 123 $queries['media'] = DB::table('media')->where('m_file', '=', $tree->id()); 124 } 125 126 foreach ($queries as $table => $query) { 127 // take() and delete() together don't return the number of delete rows. 128 while ((clone $query)->count() > 0) { 129 (clone $query)->take(1000)->delete(); 130 131 if ($this->timeout_service->isTimeLimitUp()) { 132 return $this->viewResponse('admin/import-progress', [ 133 'errors' => '', 134 'progress' => 0.0, 135 'status' => I18N::translate('Deleting…') . ' ' . $table, 136 'tree' => $tree, 137 ]); 138 } 139 } 140 } 141 } 142 143 // Calculate progress so far 144 $progress = $import_offset / $import_total; 145 146 $first_time = $import_offset === 0; 147 148 // Collect up any errors, and show them later. 149 $errors = ''; 150 151 // Run for a short period of time. This keeps the resource requirements low. 152 do { 153 $data = DB::table('gedcom_chunk') 154 ->where('gedcom_id', '=', $tree->id()) 155 ->where('imported', '=', '0') 156 ->orderBy('gedcom_chunk_id') 157 ->select(['gedcom_chunk_id', 'chunk_data']) 158 ->first(); 159 160 if ($data === null) { 161 break; 162 } 163 164 // Mark the chunk as imported. This will create a row-lock, to prevent other 165 // processes from reading it until we have finished. 166 $n = DB::table('gedcom_chunk') 167 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 168 ->where('imported', '=', '0') 169 ->update(['imported' => 1]); 170 171 // Another process has already imported this data? 172 if ($n === 0) { 173 break; 174 } 175 176 if ($first_time) { 177 // Remove any byte-order-mark 178 if (str_starts_with($data->chunk_data, UTF8::BYTE_ORDER_MARK)) { 179 $data->chunk_data = substr($data->chunk_data, strlen(UTF8::BYTE_ORDER_MARK)); 180 DB::table('gedcom_chunk') 181 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 182 ->update(['chunk_data' => $data->chunk_data]); 183 } 184 185 if (!str_starts_with($data->chunk_data, '0 HEAD')) { 186 return $this->viewResponse('admin/import-fail', [ 187 'error' => I18N::translate('Invalid GEDCOM file - no header record found.'), 188 'tree' => $tree, 189 ]); 190 } 191 192 $first_time = false; 193 } 194 195 $data->chunk_data = str_replace("\r", "\n", $data->chunk_data); 196 197 // Import all the records in this chunk of data 198 foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { 199 try { 200 $this->gedcom_import_service->importRecord($rec, $tree, false); 201 } catch (GedcomErrorException $exception) { 202 $errors .= $exception->getMessage(); 203 } 204 } 205 206 // Do not need the data any more. 207 DB::table('gedcom_chunk') 208 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 209 ->update(['chunk_data' => '']); 210 } while (!$this->timeout_service->isTimeLimitUp()); 211 212 return $this->viewResponse('admin/import-progress', [ 213 'errors' => $errors, 214 'progress' => $progress, 215 'status' => '', 216 'tree' => $tree, 217 ]); 218 } catch (Exception $ex) { 219 DB::connection()->rollBack(); 220 221 // Deadlock? Try again. 222 if ($this->causedByConcurrencyError($ex)) { 223 return $this->viewResponse('admin/import-progress', [ 224 'errors' => '', 225 'progress' => $progress ?? 0.0, 226 'status' => $ex->getMessage(), 227 'tree' => $tree, 228 ]); 229 } 230 231 return $this->viewResponse('admin/import-fail', [ 232 'error' => $ex->getMessage(), 233 'tree' => $tree, 234 ]); 235 } 236 } 237} 238