1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Exception; 23use Fisharebest\Webtrees\Exceptions\GedcomErrorException; 24use Fisharebest\Webtrees\Functions\FunctionsImport; 25use Fisharebest\Webtrees\Gedcom; 26use Fisharebest\Webtrees\Http\ViewResponseTrait; 27use Fisharebest\Webtrees\I18N; 28use Fisharebest\Webtrees\Services\TimeoutService; 29use Fisharebest\Webtrees\Services\TreeService; 30use Fisharebest\Webtrees\Tree; 31use Illuminate\Database\Capsule\Manager as DB; 32use Illuminate\Database\DetectsConcurrencyErrors; 33use Illuminate\Database\Query\Expression; 34use Psr\Http\Message\ResponseInterface; 35use Psr\Http\Message\ServerRequestInterface; 36use Psr\Http\Server\RequestHandlerInterface; 37 38use function assert; 39use function preg_match; 40use function preg_split; 41use function response; 42use function str_replace; 43use function str_starts_with; 44use function strlen; 45use function strtoupper; 46use function substr; 47use function trim; 48use function view; 49 50/** 51 * Load a chunk of GEDCOM data. 52 */ 53class GedcomLoad implements RequestHandlerInterface 54{ 55 use ViewResponseTrait; 56 use DetectsConcurrencyErrors; 57 58 /** @var TimeoutService */ 59 private $timeout_service; 60 61 /** @var TreeService */ 62 private $tree_service; 63 64 /** 65 * GedcomLoad constructor. 66 * 67 * @param TimeoutService $timeout_service 68 * @param TreeService $tree_service 69 */ 70 public function __construct(TimeoutService $timeout_service, TreeService $tree_service) 71 { 72 $this->timeout_service = $timeout_service; 73 $this->tree_service = $tree_service; 74 } 75 76 /** 77 * @param ServerRequestInterface $request 78 * 79 * @return ResponseInterface 80 */ 81 public function handle(ServerRequestInterface $request): ResponseInterface 82 { 83 $this->layout = 'layouts/ajax'; 84 85 $tree = $request->getAttribute('tree'); 86 assert($tree instanceof Tree); 87 88 try { 89 // What is the current import status? 90 $import_offset = DB::table('gedcom_chunk') 91 ->where('gedcom_id', '=', $tree->id()) 92 ->where('imported', '=', '1') 93 ->count(); 94 95 $import_total = DB::table('gedcom_chunk') 96 ->where('gedcom_id', '=', $tree->id()) 97 ->count(); 98 99 // Finished? 100 if ($import_offset === $import_total) { 101 $tree->setPreference('imported', '1'); 102 103 $html = view('admin/import-complete', ['tree' => $tree]); 104 105 return response($html); 106 } 107 108 // Calculate progress so far 109 $progress = $import_offset / $import_total; 110 111 $first_time = ($import_offset === 0); 112 113 // Collect up any errors, and show them later. 114 $errors = ''; 115 116 // Run for a short period of time. This keeps the resource requirements low. 117 do { 118 $data = DB::table('gedcom_chunk') 119 ->where('gedcom_id', '=', $tree->id()) 120 ->where('imported', '=', '0') 121 ->orderBy('gedcom_chunk_id') 122 ->select(['gedcom_chunk_id', 'chunk_data']) 123 ->first(); 124 125 if ($data === null) { 126 break; 127 } 128 129 // Mark the chunk as imported. This will create a row-lock, to prevent other 130 // processes from reading it until we have finished. 131 $n = DB::table('gedcom_chunk') 132 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 133 ->where('imported', '=', '0') 134 ->update(['imported' => 1]); 135 136 // Another process has already imported this data? 137 if ($n === 0) { 138 break; 139 } 140 141 // If we are loading the first (header) record, then delete old data and convert to UTF-8. 142 if ($first_time) { 143 $this->tree_service->deleteGenealogyData($tree, (bool) $tree->getPreference('keep_media')); 144 145 // Remove any byte-order-mark 146 if (str_starts_with($data->chunk_data, Gedcom::UTF8_BOM)) { 147 $data->chunk_data = substr($data->chunk_data, strlen(Gedcom::UTF8_BOM)); 148 // Put it back in the database, so we can do character conversion 149 DB::table('gedcom_chunk') 150 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 151 ->update(['chunk_data' => $data->chunk_data]); 152 } 153 154 if (!str_starts_with($data->chunk_data, '0 HEAD')) { 155 return $this->viewResponse('admin/import-fail', [ 156 'error' => I18N::translate('Invalid GEDCOM file - no header record found.'), 157 'tree' => $tree, 158 ]); 159 } 160 161 // What character set is this? Need to convert it to UTF8 162 if (preg_match('/[\r\n][ \t]*1 CHAR(?:ACTER)? ([^\r\n]+)/', $data->chunk_data, $match)) { 163 $charset = strtoupper(trim($match[1])); 164 } else { 165 $charset = 'ASCII'; 166 } 167 168 // MySQL supports a wide range of collation conversions. These are ones that 169 // have been encountered "in the wild". 170 switch ($charset) { 171 case 'ASCII': 172 DB::table('gedcom_chunk') 173 ->where('gedcom_id', '=', $tree->id()) 174 ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING ascii) USING utf8)')]); 175 break; 176 case 'IBMPC': // IBMPC, IBM WINDOWS and MS-DOS could be anything. Mostly it means CP850. 177 case 'IBM WINDOWS': 178 case 'MS-DOS': 179 case 'CP437': 180 case 'CP850': 181 // CP850 has extra letters with diacritics to replace box-drawing chars in CP437. 182 DB::table('gedcom_chunk') 183 ->where('gedcom_id', '=', $tree->id()) 184 ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING cp850) USING utf8)')]); 185 break; 186 case 'ANSI': // ANSI could be anything. Most applications seem to treat it as latin1. 187 case 'WINDOWS': 188 case 'CP1252': 189 case 'ISO8859-1': 190 case 'ISO-8859-1': 191 case 'LATIN1': 192 case 'LATIN-1': 193 // Convert from ISO-8859-1 (western european) to UTF8. 194 DB::table('gedcom_chunk') 195 ->where('gedcom_id', '=', $tree->id()) 196 ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING latin1) USING utf8)')]); 197 break; 198 case 'CP1250': 199 case 'ISO8859-2': 200 case 'ISO-8859-2': 201 case 'LATIN2': 202 case 'LATIN-2': 203 // Convert from ISO-8859-2 (eastern european) to UTF8. 204 DB::table('gedcom_chunk') 205 ->where('gedcom_id', '=', $tree->id()) 206 ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING latin2) USING utf8)')]); 207 break; 208 case 'MACINTOSH': 209 // Convert from MAC Roman to UTF8. 210 DB::table('gedcom_chunk') 211 ->where('gedcom_id', '=', $tree->id()) 212 ->update(['chunk_data' => new Expression('CONVERT(CONVERT(chunk_data USING macroman) USING utf8)')]); 213 break; 214 case 'UTF8': 215 case 'UTF-8': 216 // Already UTF-8 so nothing to do! 217 break; 218 case 'ANSEL': 219 default: 220 return $this->viewResponse('admin/import-fail', [ 221 'error' => I18N::translate('Error: converting GEDCOM files from %s encoding to UTF-8 encoding not currently supported.', $charset), 222 'tree' => $tree, 223 ]); 224 } 225 $first_time = false; 226 227 // Re-fetch the data, now that we have performed character set conversion. 228 $data = DB::table('gedcom_chunk') 229 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 230 ->select(['gedcom_chunk_id', 'chunk_data']) 231 ->first(); 232 } 233 234 $data->chunk_data = str_replace("\r", "\n", $data->chunk_data); 235 236 // Import all the records in this chunk of data 237 foreach (preg_split('/\n+(?=0)/', $data->chunk_data) as $rec) { 238 try { 239 FunctionsImport::importRecord($rec, $tree, false); 240 } catch (GedcomErrorException $exception) { 241 $errors .= $exception->getMessage(); 242 } 243 } 244 245 // Do not need the data any more. 246 DB::table('gedcom_chunk') 247 ->where('gedcom_chunk_id', '=', $data->gedcom_chunk_id) 248 ->update(['chunk_data' => '']); 249 } while (!$this->timeout_service->isTimeLimitUp()); 250 251 return $this->viewResponse('admin/import-progress', [ 252 'errors' => $errors, 253 'progress' => $progress, 254 'tree' => $tree, 255 ]); 256 } catch (Exception $ex) { 257 DB::connection()->rollBack(); 258 259 // Deadlock? Try again. 260 if ($this->causedByConcurrencyError($ex)) { 261 return $this->viewResponse('admin/import-progress', [ 262 'errors' => '', 263 'progress' => $progress ?? 0.0, 264 'tree' => $tree, 265 ]); 266 } 267 268 return $this->viewResponse('admin/import-fail', [ 269 'error' => $ex->getMessage(), 270 'tree' => $tree, 271 ]); 272 } 273 } 274} 275