1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fisharebest\Webtrees\Family; 23use Fisharebest\Webtrees\GedcomRecord; 24use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 25use Fisharebest\Webtrees\I18N; 26use Fisharebest\Webtrees\Individual; 27use Fisharebest\Webtrees\Location; 28use Fisharebest\Webtrees\Media; 29use Fisharebest\Webtrees\Note; 30use Fisharebest\Webtrees\Repository; 31use Fisharebest\Webtrees\Services\DataFixService; 32use Fisharebest\Webtrees\Source; 33use Fisharebest\Webtrees\Submitter; 34use Fisharebest\Webtrees\Tree; 35use Illuminate\Database\Capsule\Manager as DB; 36use Illuminate\Database\Query\Builder; 37use Illuminate\Support\Collection; 38use Throwable; 39 40use function addcslashes; 41use function asort; 42use function preg_match; 43use function preg_quote; 44use function preg_replace; 45use function view; 46 47/** 48 * Class FixSearchAndReplace 49 */ 50class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface 51{ 52 use ModuleDataFixTrait; 53 54 // A regular expression that never matches. 55 private const INVALID_REGEX = '/(?!)/'; 56 57 /** @var DataFixService */ 58 private $data_fix_service; 59 60 /** 61 * FixMissingDeaths constructor. 62 * 63 * @param DataFixService $data_fix_service 64 */ 65 public function __construct(DataFixService $data_fix_service) 66 { 67 $this->data_fix_service = $data_fix_service; 68 } 69 70 /** 71 * How should this module be identified in the control panel, etc.? 72 * 73 * @return string 74 */ 75 public function title(): string 76 { 77 /* I18N: Name of a module */ 78 return I18N::translate('Search and replace'); 79 } 80 81 /** 82 * A sentence describing what this module does. 83 * 84 * @return string 85 */ 86 public function description(): string 87 { 88 /* I18N: Description of a “Data fix” module */ 89 return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.'); 90 } 91 92 /** 93 * Options form. 94 * 95 * @param Tree $tree 96 * 97 * @return string 98 */ 99 public function fixOptions(Tree $tree): string 100 { 101 $methods = [ 102 'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'), 103 'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'), 104 'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'), 105 /* I18N: https://en.wikipedia.org/wiki/Regular_expression */ 106 'regex' => I18N::translate('Regular expression'), 107 ]; 108 109 $types = [ 110 Family::RECORD_TYPE => I18N::translate('Families'), 111 Individual::RECORD_TYPE => I18N::translate('Individuals'), 112 Location::RECORD_TYPE => I18N::translate('Locations'), 113 Media::RECORD_TYPE => I18N::translate('Media objects'), 114 Note::RECORD_TYPE => I18N::translate('Notes'), 115 Repository::RECORD_TYPE => I18N::translate('Repositories'), 116 Source::RECORD_TYPE => I18N::translate('Sources'), 117 Submitter::RECORD_TYPE => I18N::translate('Submitters'), 118 ]; 119 120 asort($types); 121 122 return view('modules/fix-search-and-replace/options', [ 123 'default_method' => 'exact', 124 'default_type' => Individual::RECORD_TYPE, 125 'methods' => $methods, 126 'types' => $types, 127 ]); 128 } 129 130 /** 131 * A list of all records that need examining. This may include records 132 * that do not need updating, if we can't detect this quickly using SQL. 133 * 134 * @param Tree $tree 135 * @param array<string,string> $params 136 * 137 * @return Collection<string>|null 138 */ 139 protected function familiesToFix(Tree $tree, array $params): ?Collection 140 { 141 if ($params['type'] !== Family::RECORD_TYPE || $params['search'] === '') { 142 return null; 143 } 144 145 $query = DB::table('families')->where('f_file', '=', $tree->id()); 146 $this->recordQuery($query, 'f_gedcom', $params); 147 148 return $query->pluck('f_id'); 149 } 150 151 /** 152 * A list of all records that need examining. This may include records 153 * that do not need updating, if we can't detect this quickly using SQL. 154 * 155 * @param Tree $tree 156 * @param array<string,string> $params 157 * 158 * @return Collection<string>|null 159 */ 160 protected function individualsToFix(Tree $tree, array $params): ?Collection 161 { 162 if ($params['type'] !== Individual::RECORD_TYPE || $params['search'] === '') { 163 return null; 164 } 165 166 $query = DB::table('individuals') 167 ->where('i_file', '=', $tree->id()); 168 169 $this->recordQuery($query, 'i_gedcom', $params); 170 171 return $query->pluck('i_id'); 172 } 173 174 /** 175 * A list of all records that need examining. This may include records 176 * that do not need updating, if we can't detect this quickly using SQL. 177 * 178 * @param Tree $tree 179 * @param array<string,string> $params 180 * 181 * @return Collection<string>|null 182 */ 183 protected function locationsToFix(Tree $tree, array $params): ?Collection 184 { 185 if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') { 186 return null; 187 } 188 189 $query = DB::table('other') 190 ->where('o_file', '=', $tree->id()) 191 ->where('o_type', '=', Location::RECORD_TYPE); 192 193 $this->recordQuery($query, 'o_gedcom', $params); 194 195 return $query->pluck('o_id'); 196 } 197 198 /** 199 * A list of all records that need examining. This may include records 200 * that do not need updating, if we can't detect this quickly using SQL. 201 * 202 * @param Tree $tree 203 * @param array<string,string> $params 204 * 205 * @return Collection<string>|null 206 */ 207 protected function mediaToFix(Tree $tree, array $params): ?Collection 208 { 209 if ($params['type'] !== Media::RECORD_TYPE || $params['search'] === '') { 210 return null; 211 } 212 213 $query = DB::table('media') 214 ->where('m_file', '=', $tree->id()); 215 216 $this->recordQuery($query, 'm_gedcom', $params); 217 218 return $query->pluck('m_id'); 219 } 220 221 /** 222 * A list of all records that need examining. This may include records 223 * that do not need updating, if we can't detect this quickly using SQL. 224 * 225 * @param Tree $tree 226 * @param array<string,string> $params 227 * 228 * @return Collection<string>|null 229 */ 230 protected function notesToFix(Tree $tree, array $params): ?Collection 231 { 232 if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') { 233 return null; 234 } 235 236 $query = DB::table('other') 237 ->where('o_file', '=', $tree->id()) 238 ->where('o_type', '=', Note::RECORD_TYPE); 239 240 $this->recordQuery($query, 'o_gedcom', $params); 241 242 return $query->pluck('o_id'); 243 } 244 245 /** 246 * A list of all records that need examining. This may include records 247 * that do not need updating, if we can't detect this quickly using SQL. 248 * 249 * @param Tree $tree 250 * @param array<string,string> $params 251 * 252 * @return Collection<string>|null 253 */ 254 protected function repositoriesToFix(Tree $tree, array $params): ?Collection 255 { 256 if ($params['type'] !== Repository::RECORD_TYPE || $params['search'] === '') { 257 return null; 258 } 259 260 $query = DB::table('other') 261 ->where('o_file', '=', $tree->id()) 262 ->where('o_type', '=', Repository::RECORD_TYPE); 263 264 $this->recordQuery($query, 'o_gedcom', $params); 265 266 return $query->pluck('o_id'); 267 } 268 269 /** 270 * A list of all records that need examining. This may include records 271 * that do not need updating, if we can't detect this quickly using SQL. 272 * 273 * @param Tree $tree 274 * @param array<string,string> $params 275 * 276 * @return Collection<string>|null 277 */ 278 protected function sourcesToFix(Tree $tree, array $params): ?Collection 279 { 280 if ($params['type'] !== Source::RECORD_TYPE || $params['search'] === '') { 281 return null; 282 } 283 284 $query = $this->sourcesToFixQuery($tree, $params); 285 286 $this->recordQuery($query, 's_gedcom', $params); 287 288 return $query->pluck('s_id'); 289 } 290 291 /** 292 * A list of all records that need examining. This may include records 293 * that do not need updating, if we can't detect this quickly using SQL. 294 * 295 * @param Tree $tree 296 * @param array<string,string> $params 297 * 298 * @return Collection<string>|null 299 */ 300 protected function submittersToFix(Tree $tree, array $params): ?Collection 301 { 302 if ($params['type'] !== Submitter::RECORD_TYPE || $params['search'] === '') { 303 return null; 304 } 305 306 $query = $this->submittersToFixQuery($tree, $params); 307 308 $this->recordQuery($query, 'o_gedcom', $params); 309 310 return $query->pluck('o_id'); 311 } 312 313 /** 314 * Does a record need updating? 315 * 316 * @param GedcomRecord $record 317 * @param array<string,string> $params 318 * 319 * @return bool 320 */ 321 public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool 322 { 323 return preg_match($this->createRegex($params), $record->gedcom()) === 1; 324 } 325 326 /** 327 * Show the changes we would make 328 * 329 * @param GedcomRecord $record 330 * @param array<string,string> $params 331 * 332 * @return string 333 */ 334 public function previewUpdate(GedcomRecord $record, array $params): string 335 { 336 $old = $record->gedcom(); 337 $new = $this->updateGedcom($record, $params); 338 339 return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new); 340 } 341 342 /** 343 * Fix a record 344 * 345 * @param GedcomRecord $record 346 * @param array<string,string> $params 347 * 348 * @return void 349 */ 350 public function updateRecord(GedcomRecord $record, array $params): void 351 { 352 $record->updateRecord($this->updateGedcom($record, $params), false); 353 } 354 355 /** 356 * @param GedcomRecord $record 357 * @param array<string,string> $params 358 * 359 * @return string 360 */ 361 private function updateGedcom(GedcomRecord $record, array $params): string 362 { 363 // Allow "\n" to indicate a line-feed in replacement text. 364 // Back-references such as $1, $2 are handled automatically. 365 $replace = strtr($params['replace'], ['\n' => "\n"]); 366 367 $regex = $this->createRegex($params); 368 369 return preg_replace($regex, $replace, $record->gedcom()); 370 } 371 372 /** 373 * Create a regular expression from the search pattern. 374 * 375 * @param array<string,string> $params 376 * 377 * @return string 378 */ 379 private function createRegex(array $params): string 380 { 381 $search = $params['search']; 382 $method = $params['method']; 383 $case = $params['case']; 384 385 switch ($method) { 386 case 'exact': 387 return '/' . preg_quote($search, '/') . '/u' . $case; 388 389 case 'words': 390 return '/\b' . preg_quote($search, '/') . '\b/u' . $case; 391 392 case 'wildcards': 393 return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case; 394 395 case 'regex': 396 $regex = '/' . addcslashes($search, '/') . '/u' . $case; 397 398 try { 399 // A valid regex on an empty string returns zero. 400 // An invalid regex on an empty string returns false and throws a warning. 401 preg_match($regex, ''); 402 } catch (Throwable $ex) { 403 $regex = self::INVALID_REGEX; 404 } 405 406 return $regex; 407 } 408 409 throw new HttpNotFoundException(); 410 } 411 412 /** 413 * Create a regular expression from the search pattern. 414 * 415 * @param Builder $query 416 * @param string $column 417 * @param array<string,string> $params 418 * 419 * @return void 420 */ 421 private function recordQuery(Builder $query, string $column, array $params): void 422 { 423 $search = $params['search']; 424 $method = $params['method']; 425 $like = '%' . addcslashes($search, '\\%_') . '%'; 426 427 switch ($method) { 428 case 'exact': 429 case 'words': 430 $query->where($column, 'LIKE', $like); 431 break; 432 433 case 'wildcards': 434 $like = strtr($like, ['?' => '_', '*' => '%']); 435 $query->where($column, 'LIKE', $like); 436 break; 437 438 case 'regex': 439 // Substituting newlines seems to be necessary on *some* versions 440 //.of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0). 441 $search = strtr($search, ['\n' => "\n"]); 442 443 switch (DB::connection()->getDriverName()) { 444 case 'sqlite': 445 case 'mysql': 446 $query->where($column, 'REGEXP', $search); 447 break; 448 449 case 'pgsql': 450 $query->where($column, '~', $search); 451 break; 452 453 case 'sqlsvr': 454 // Not available 455 break; 456 } 457 break; 458 } 459 } 460} 461