1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fisharebest\Webtrees\Family; 23use Fisharebest\Webtrees\GedcomRecord; 24use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 25use Fisharebest\Webtrees\I18N; 26use Fisharebest\Webtrees\Individual; 27use Fisharebest\Webtrees\Location; 28use Fisharebest\Webtrees\Media; 29use Fisharebest\Webtrees\Note; 30use Fisharebest\Webtrees\Repository; 31use Fisharebest\Webtrees\Services\DataFixService; 32use Fisharebest\Webtrees\Source; 33use Fisharebest\Webtrees\Submitter; 34use Fisharebest\Webtrees\Tree; 35use Illuminate\Database\Capsule\Manager as DB; 36use Illuminate\Database\Query\Builder; 37use Illuminate\Support\Collection; 38use Throwable; 39 40use function addcslashes; 41use function asort; 42use function preg_match; 43use function preg_quote; 44use function preg_replace; 45use function view; 46 47/** 48 * Class FixSearchAndReplace 49 */ 50class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface 51{ 52 use ModuleDataFixTrait; 53 54 // A regular expression that never matches. 55 private const INVALID_REGEX = '/(?!)/'; 56 57 private DataFixService $data_fix_service; 58 59 /** 60 * FixMissingDeaths constructor. 61 * 62 * @param DataFixService $data_fix_service 63 */ 64 public function __construct(DataFixService $data_fix_service) 65 { 66 $this->data_fix_service = $data_fix_service; 67 } 68 69 /** 70 * How should this module be identified in the control panel, etc.? 71 * 72 * @return string 73 */ 74 public function title(): string 75 { 76 /* I18N: Name of a module */ 77 return I18N::translate('Search and replace'); 78 } 79 80 /** 81 * A sentence describing what this module does. 82 * 83 * @return string 84 */ 85 public function description(): string 86 { 87 /* I18N: Description of a “Data fix” module */ 88 return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.'); 89 } 90 91 /** 92 * Options form. 93 * 94 * @param Tree $tree 95 * 96 * @return string 97 */ 98 public function fixOptions(Tree $tree): string 99 { 100 $methods = [ 101 'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'), 102 'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'), 103 'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'), 104 /* I18N: https://en.wikipedia.org/wiki/Regular_expression */ 105 'regex' => I18N::translate('Regular expression'), 106 ]; 107 108 $types = [ 109 Family::RECORD_TYPE => I18N::translate('Families'), 110 Individual::RECORD_TYPE => I18N::translate('Individuals'), 111 Location::RECORD_TYPE => I18N::translate('Locations'), 112 Media::RECORD_TYPE => I18N::translate('Media objects'), 113 Note::RECORD_TYPE => I18N::translate('Notes'), 114 Repository::RECORD_TYPE => I18N::translate('Repositories'), 115 Source::RECORD_TYPE => I18N::translate('Sources'), 116 Submitter::RECORD_TYPE => I18N::translate('Submitters'), 117 ]; 118 119 asort($types); 120 121 return view('modules/fix-search-and-replace/options', [ 122 'default_method' => 'exact', 123 'default_type' => Individual::RECORD_TYPE, 124 'methods' => $methods, 125 'types' => $types, 126 ]); 127 } 128 129 /** 130 * A list of all records that need examining. This may include records 131 * that do not need updating, if we can't detect this quickly using SQL. 132 * 133 * @param Tree $tree 134 * @param array<string,string> $params 135 * 136 * @return Collection<int,string>|null 137 */ 138 protected function familiesToFix(Tree $tree, array $params): ?Collection 139 { 140 if ($params['type'] !== Family::RECORD_TYPE || $params['search'] === '') { 141 return null; 142 } 143 144 $query = DB::table('families')->where('f_file', '=', $tree->id()); 145 $this->recordQuery($query, 'f_gedcom', $params); 146 147 return $query->pluck('f_id'); 148 } 149 150 /** 151 * A list of all records that need examining. This may include records 152 * that do not need updating, if we can't detect this quickly using SQL. 153 * 154 * @param Tree $tree 155 * @param array<string,string> $params 156 * 157 * @return Collection<int,string>|null 158 */ 159 protected function individualsToFix(Tree $tree, array $params): ?Collection 160 { 161 if ($params['type'] !== Individual::RECORD_TYPE || $params['search'] === '') { 162 return null; 163 } 164 165 $query = DB::table('individuals') 166 ->where('i_file', '=', $tree->id()); 167 168 $this->recordQuery($query, 'i_gedcom', $params); 169 170 return $query->pluck('i_id'); 171 } 172 173 /** 174 * A list of all records that need examining. This may include records 175 * that do not need updating, if we can't detect this quickly using SQL. 176 * 177 * @param Tree $tree 178 * @param array<string,string> $params 179 * 180 * @return Collection<int,string>|null 181 */ 182 protected function locationsToFix(Tree $tree, array $params): ?Collection 183 { 184 if ($params['type'] !== Location::RECORD_TYPE || $params['search'] === '') { 185 return null; 186 } 187 188 $query = DB::table('other') 189 ->where('o_file', '=', $tree->id()) 190 ->where('o_type', '=', Location::RECORD_TYPE); 191 192 $this->recordQuery($query, 'o_gedcom', $params); 193 194 return $query->pluck('o_id'); 195 } 196 197 /** 198 * A list of all records that need examining. This may include records 199 * that do not need updating, if we can't detect this quickly using SQL. 200 * 201 * @param Tree $tree 202 * @param array<string,string> $params 203 * 204 * @return Collection<int,string>|null 205 */ 206 protected function mediaToFix(Tree $tree, array $params): ?Collection 207 { 208 if ($params['type'] !== Media::RECORD_TYPE || $params['search'] === '') { 209 return null; 210 } 211 212 $query = DB::table('media') 213 ->where('m_file', '=', $tree->id()); 214 215 $this->recordQuery($query, 'm_gedcom', $params); 216 217 return $query->pluck('m_id'); 218 } 219 220 /** 221 * A list of all records that need examining. This may include records 222 * that do not need updating, if we can't detect this quickly using SQL. 223 * 224 * @param Tree $tree 225 * @param array<string,string> $params 226 * 227 * @return Collection<int,string>|null 228 */ 229 protected function notesToFix(Tree $tree, array $params): ?Collection 230 { 231 if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') { 232 return null; 233 } 234 235 $query = DB::table('other') 236 ->where('o_file', '=', $tree->id()) 237 ->where('o_type', '=', Note::RECORD_TYPE); 238 239 $this->recordQuery($query, 'o_gedcom', $params); 240 241 return $query->pluck('o_id'); 242 } 243 244 /** 245 * A list of all records that need examining. This may include records 246 * that do not need updating, if we can't detect this quickly using SQL. 247 * 248 * @param Tree $tree 249 * @param array<string,string> $params 250 * 251 * @return Collection<int,string>|null 252 */ 253 protected function repositoriesToFix(Tree $tree, array $params): ?Collection 254 { 255 if ($params['type'] !== Repository::RECORD_TYPE || $params['search'] === '') { 256 return null; 257 } 258 259 $query = DB::table('other') 260 ->where('o_file', '=', $tree->id()) 261 ->where('o_type', '=', Repository::RECORD_TYPE); 262 263 $this->recordQuery($query, 'o_gedcom', $params); 264 265 return $query->pluck('o_id'); 266 } 267 268 /** 269 * A list of all records that need examining. This may include records 270 * that do not need updating, if we can't detect this quickly using SQL. 271 * 272 * @param Tree $tree 273 * @param array<string,string> $params 274 * 275 * @return Collection<int,string>|null 276 */ 277 protected function sourcesToFix(Tree $tree, array $params): ?Collection 278 { 279 if ($params['type'] !== Source::RECORD_TYPE || $params['search'] === '') { 280 return null; 281 } 282 283 $query = $this->sourcesToFixQuery($tree, $params); 284 285 $this->recordQuery($query, 's_gedcom', $params); 286 287 return $query->pluck('s_id'); 288 } 289 290 /** 291 * A list of all records that need examining. This may include records 292 * that do not need updating, if we can't detect this quickly using SQL. 293 * 294 * @param Tree $tree 295 * @param array<string,string> $params 296 * 297 * @return Collection<int,string>|null 298 */ 299 protected function submittersToFix(Tree $tree, array $params): ?Collection 300 { 301 if ($params['type'] !== Submitter::RECORD_TYPE || $params['search'] === '') { 302 return null; 303 } 304 305 $query = $this->submittersToFixQuery($tree, $params); 306 307 $this->recordQuery($query, 'o_gedcom', $params); 308 309 return $query->pluck('o_id'); 310 } 311 312 /** 313 * Does a record need updating? 314 * 315 * @param GedcomRecord $record 316 * @param array<string,string> $params 317 * 318 * @return bool 319 */ 320 public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool 321 { 322 return preg_match($this->createRegex($params), $record->gedcom()) === 1; 323 } 324 325 /** 326 * Show the changes we would make 327 * 328 * @param GedcomRecord $record 329 * @param array<string,string> $params 330 * 331 * @return string 332 */ 333 public function previewUpdate(GedcomRecord $record, array $params): string 334 { 335 $old = $record->gedcom(); 336 $new = $this->updateGedcom($record, $params); 337 338 return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new); 339 } 340 341 /** 342 * Fix a record 343 * 344 * @param GedcomRecord $record 345 * @param array<string,string> $params 346 * 347 * @return void 348 */ 349 public function updateRecord(GedcomRecord $record, array $params): void 350 { 351 $record->updateRecord($this->updateGedcom($record, $params), false); 352 } 353 354 /** 355 * @param GedcomRecord $record 356 * @param array<string,string> $params 357 * 358 * @return string 359 */ 360 private function updateGedcom(GedcomRecord $record, array $params): string 361 { 362 // Allow "\n" to indicate a line-feed in replacement text. 363 // Back-references such as $1, $2 are handled automatically. 364 $replace = strtr($params['replace'], ['\n' => "\n"]); 365 366 $regex = $this->createRegex($params); 367 368 return preg_replace($regex, $replace, $record->gedcom()); 369 } 370 371 /** 372 * Create a regular expression from the search pattern. 373 * 374 * @param array<string,string> $params 375 * 376 * @return string 377 */ 378 private function createRegex(array $params): string 379 { 380 $search = $params['search']; 381 $method = $params['method']; 382 $case = $params['case']; 383 384 switch ($method) { 385 case 'exact': 386 return '/' . preg_quote($search, '/') . '/u' . $case; 387 388 case 'words': 389 return '/\b' . preg_quote($search, '/') . '\b/u' . $case; 390 391 case 'wildcards': 392 return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case; 393 394 case 'regex': 395 $regex = '/' . addcslashes($search, '/') . '/u' . $case; 396 397 try { 398 // A valid regex on an empty string returns zero. 399 // An invalid regex on an empty string returns false and throws a warning. 400 preg_match($regex, ''); 401 } catch (Throwable $ex) { 402 $regex = self::INVALID_REGEX; 403 } 404 405 return $regex; 406 } 407 408 throw new HttpNotFoundException(); 409 } 410 411 /** 412 * Create a regular expression from the search pattern. 413 * 414 * @param Builder $query 415 * @param string $column 416 * @param array<string,string> $params 417 * 418 * @return void 419 */ 420 private function recordQuery(Builder $query, string $column, array $params): void 421 { 422 $search = $params['search']; 423 $method = $params['method']; 424 $like = '%' . addcslashes($search, '\\%_') . '%'; 425 426 switch ($method) { 427 case 'exact': 428 case 'words': 429 $query->where($column, 'LIKE', $like); 430 break; 431 432 case 'wildcards': 433 $like = strtr($like, ['?' => '_', '*' => '%']); 434 $query->where($column, 'LIKE', $like); 435 break; 436 437 case 'regex': 438 // Substituting newlines seems to be necessary on *some* versions 439 //.of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0). 440 $search = strtr($search, ['\n' => "\n"]); 441 442 switch (DB::connection()->getDriverName()) { 443 case 'sqlite': 444 case 'mysql': 445 $query->where($column, 'REGEXP', $search); 446 break; 447 448 case 'pgsql': 449 $query->where($column, '~', $search); 450 break; 451 452 case 'sqlsvr': 453 // Not available 454 break; 455 } 456 break; 457 } 458 } 459} 460