1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fisharebest\Webtrees\DB; 23use Fisharebest\Webtrees\Family; 24use Fisharebest\Webtrees\GedcomRecord; 25use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 26use Fisharebest\Webtrees\I18N; 27use Fisharebest\Webtrees\Individual; 28use Fisharebest\Webtrees\Location; 29use Fisharebest\Webtrees\Media; 30use Fisharebest\Webtrees\Note; 31use Fisharebest\Webtrees\Repository; 32use Fisharebest\Webtrees\Services\DataFixService; 33use Fisharebest\Webtrees\Source; 34use Fisharebest\Webtrees\Submitter; 35use Fisharebest\Webtrees\Tree; 36use Illuminate\Database\Query\Builder; 37use Illuminate\Support\Collection; 38use Throwable; 39 40use function addcslashes; 41use function asort; 42use function preg_match; 43use function preg_quote; 44use function preg_replace; 45use function view; 46 47/** 48 * Class FixSearchAndReplace 49 */ 50class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface 51{ 52 use ModuleDataFixTrait; 53 54 // A regular expression that never matches. 55 private const string INVALID_REGEX = '/(?!)/'; 56 57 private DataFixService $data_fix_service; 58 59 /** 60 * @param DataFixService $data_fix_service 61 */ 62 public function __construct(DataFixService $data_fix_service) 63 { 64 $this->data_fix_service = $data_fix_service; 65 } 66 67 /** 68 * How should this module be identified in the control panel, etc.? 69 * 70 * @return string 71 */ 72 public function title(): string 73 { 74 /* I18N: Name of a module */ 75 return I18N::translate('Search and replace'); 76 } 77 78 public function description(): string 79 { 80 /* I18N: Description of a “Data fix” module */ 81 return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.'); 82 } 83 84 /** 85 * Options form. 86 * 87 * @param Tree $tree 88 * 89 * @return string 90 */ 91 public function fixOptions(Tree $tree): string 92 { 93 $methods = [ 94 'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'), 95 'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'), 96 'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'), 97 /* I18N: https://en.wikipedia.org/wiki/Regular_expression */ 98 'regex' => I18N::translate('Regular expression'), 99 ]; 100 101 $types = [ 102 Family::RECORD_TYPE => I18N::translate('Families'), 103 Individual::RECORD_TYPE => I18N::translate('Individuals'), 104 Location::RECORD_TYPE => I18N::translate('Locations'), 105 Media::RECORD_TYPE => I18N::translate('Media objects'), 106 Note::RECORD_TYPE => I18N::translate('Notes'), 107 Repository::RECORD_TYPE => I18N::translate('Repositories'), 108 Source::RECORD_TYPE => I18N::translate('Sources'), 109 Submitter::RECORD_TYPE => I18N::translate('Submitters'), 110 ]; 111 112 asort($types); 113 114 return view('modules/fix-search-and-replace/options', [ 115 'default_method' => 'exact', 116 'default_type' => Individual::RECORD_TYPE, 117 'methods' => $methods, 118 'types' => $types, 119 ]); 120 } 121 122 /** 123 * A list of all records that need examining. This may include records 124 * that do not need updating, if we can't detect this quickly using SQL. 125 * 126 * @param Tree $tree 127 * @param array<string,string> $params 128 * 129 * @return Collection<int,string>|null 130 */ 131 protected function familiesToFix(Tree $tree, array $params): Collection|null 132 { 133 if ($params['type'] !== Family::RECORD_TYPE || $params['search-for'] === '') { 134 return null; 135 } 136 137 $query = DB::table('families')->where('f_file', '=', $tree->id()); 138 $this->recordQuery($query, 'f_gedcom', $params); 139 140 return $query->pluck('f_id'); 141 } 142 143 /** 144 * A list of all records that need examining. This may include records 145 * that do not need updating, if we can't detect this quickly using SQL. 146 * 147 * @param Tree $tree 148 * @param array<string,string> $params 149 * 150 * @return Collection<int,string>|null 151 */ 152 protected function individualsToFix(Tree $tree, array $params): Collection|null 153 { 154 if ($params['type'] !== Individual::RECORD_TYPE || $params['search-for'] === '') { 155 return null; 156 } 157 158 $query = DB::table('individuals') 159 ->where('i_file', '=', $tree->id()); 160 161 $this->recordQuery($query, 'i_gedcom', $params); 162 163 return $query->pluck('i_id'); 164 } 165 166 /** 167 * A list of all records that need examining. This may include records 168 * that do not need updating, if we can't detect this quickly using SQL. 169 * 170 * @param Tree $tree 171 * @param array<string,string> $params 172 * 173 * @return Collection<int,string>|null 174 */ 175 protected function locationsToFix(Tree $tree, array $params): Collection|null 176 { 177 if ($params['type'] !== Location::RECORD_TYPE || $params['search-for'] === '') { 178 return null; 179 } 180 181 $query = DB::table('other') 182 ->where('o_file', '=', $tree->id()) 183 ->where('o_type', '=', Location::RECORD_TYPE); 184 185 $this->recordQuery($query, 'o_gedcom', $params); 186 187 return $query->pluck('o_id'); 188 } 189 190 /** 191 * A list of all records that need examining. This may include records 192 * that do not need updating, if we can't detect this quickly using SQL. 193 * 194 * @param Tree $tree 195 * @param array<string,string> $params 196 * 197 * @return Collection<int,string>|null 198 */ 199 protected function mediaToFix(Tree $tree, array $params): Collection|null 200 { 201 if ($params['type'] !== Media::RECORD_TYPE || $params['search-for'] === '') { 202 return null; 203 } 204 205 $query = DB::table('media') 206 ->where('m_file', '=', $tree->id()); 207 208 $this->recordQuery($query, 'm_gedcom', $params); 209 210 return $query->pluck('m_id'); 211 } 212 213 /** 214 * A list of all records that need examining. This may include records 215 * that do not need updating, if we can't detect this quickly using SQL. 216 * 217 * @param Tree $tree 218 * @param array<string,string> $params 219 * 220 * @return Collection<int,string>|null 221 */ 222 protected function notesToFix(Tree $tree, array $params): Collection|null 223 { 224 if ($params['type'] !== Note::RECORD_TYPE || $params['search-for'] === '') { 225 return null; 226 } 227 228 $query = DB::table('other') 229 ->where('o_file', '=', $tree->id()) 230 ->where('o_type', '=', Note::RECORD_TYPE); 231 232 $this->recordQuery($query, 'o_gedcom', $params); 233 234 return $query->pluck('o_id'); 235 } 236 237 /** 238 * A list of all records that need examining. This may include records 239 * that do not need updating, if we can't detect this quickly using SQL. 240 * 241 * @param Tree $tree 242 * @param array<string,string> $params 243 * 244 * @return Collection<int,string>|null 245 */ 246 protected function repositoriesToFix(Tree $tree, array $params): Collection|null 247 { 248 if ($params['type'] !== Repository::RECORD_TYPE || $params['search-for'] === '') { 249 return null; 250 } 251 252 $query = DB::table('other') 253 ->where('o_file', '=', $tree->id()) 254 ->where('o_type', '=', Repository::RECORD_TYPE); 255 256 $this->recordQuery($query, 'o_gedcom', $params); 257 258 return $query->pluck('o_id'); 259 } 260 261 /** 262 * A list of all records that need examining. This may include records 263 * that do not need updating, if we can't detect this quickly using SQL. 264 * 265 * @param Tree $tree 266 * @param array<string,string> $params 267 * 268 * @return Collection<int,string>|null 269 */ 270 protected function sourcesToFix(Tree $tree, array $params): Collection|null 271 { 272 if ($params['type'] !== Source::RECORD_TYPE || $params['search-for'] === '') { 273 return null; 274 } 275 276 $query = $this->sourcesToFixQuery($tree, $params); 277 278 $this->recordQuery($query, 's_gedcom', $params); 279 280 return $query->pluck('s_id'); 281 } 282 283 /** 284 * A list of all records that need examining. This may include records 285 * that do not need updating, if we can't detect this quickly using SQL. 286 * 287 * @param Tree $tree 288 * @param array<string,string> $params 289 * 290 * @return Collection<int,string>|null 291 */ 292 protected function submittersToFix(Tree $tree, array $params): Collection|null 293 { 294 if ($params['type'] !== Submitter::RECORD_TYPE || $params['search-for'] === '') { 295 return null; 296 } 297 298 $query = $this->submittersToFixQuery($tree, $params); 299 300 $this->recordQuery($query, 'o_gedcom', $params); 301 302 return $query->pluck('o_id'); 303 } 304 305 /** 306 * Does a record need updating? 307 * 308 * @param GedcomRecord $record 309 * @param array<string,string> $params 310 * 311 * @return bool 312 */ 313 public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool 314 { 315 return preg_match($this->createRegex($params), $record->gedcom()) === 1; 316 } 317 318 /** 319 * Show the changes we would make 320 * 321 * @param GedcomRecord $record 322 * @param array<string,string> $params 323 * 324 * @return string 325 */ 326 public function previewUpdate(GedcomRecord $record, array $params): string 327 { 328 $old = $record->gedcom(); 329 $new = $this->updateGedcom($record, $params); 330 331 return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new); 332 } 333 334 /** 335 * Fix a record 336 * 337 * @param GedcomRecord $record 338 * @param array<string,string> $params 339 * 340 * @return void 341 */ 342 public function updateRecord(GedcomRecord $record, array $params): void 343 { 344 $record->updateRecord($this->updateGedcom($record, $params), false); 345 } 346 347 /** 348 * @param GedcomRecord $record 349 * @param array<string,string> $params 350 * 351 * @return string 352 */ 353 private function updateGedcom(GedcomRecord $record, array $params): string 354 { 355 // Allow "\n" to indicate a line-feed in replacement text. 356 // Back-references such as $1, $2 are handled automatically. 357 $replace = strtr($params['replace-with'], ['\n' => "\n"]); 358 359 $regex = $this->createRegex($params); 360 361 return preg_replace($regex, $replace, $record->gedcom()); 362 } 363 364 /** 365 * Create a regular expression from the search pattern. 366 * 367 * @param array<string,string> $params 368 * 369 * @return string 370 */ 371 private function createRegex(array $params): string 372 { 373 $search = $params['search-for']; 374 $method = $params['method']; 375 $case = $params['case']; 376 377 switch ($method) { 378 case 'exact': 379 return '/' . preg_quote($search, '/') . '/u' . $case; 380 381 case 'words': 382 return '/\b' . preg_quote($search, '/') . '\b/u' . $case; 383 384 case 'wildcards': 385 return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case; 386 387 case 'regex': 388 $regex = '/' . addcslashes($search, '/') . '/u' . $case; 389 390 try { 391 // A valid regex on an empty string returns zero. 392 // An invalid regex on an empty string returns false and throws a warning. 393 preg_match($regex, ''); 394 } catch (Throwable) { 395 $regex = self::INVALID_REGEX; 396 } 397 398 return $regex; 399 } 400 401 throw new HttpNotFoundException(); 402 } 403 404 /** 405 * Create a regular expression from the search pattern. 406 * 407 * @param Builder $query 408 * @param string $column 409 * @param array<string,string> $params 410 * 411 * @return void 412 */ 413 private function recordQuery(Builder $query, string $column, array $params): void 414 { 415 $search = $params['search-for']; 416 $method = $params['method']; 417 $like = '%' . addcslashes($search, '\\%_') . '%'; 418 419 switch ($method) { 420 case 'exact': 421 case 'words': 422 $query->where($column, 'LIKE', $like); 423 break; 424 425 case 'wildcards': 426 $like = strtr($like, ['?' => '_', '*' => '%']); 427 $query->where($column, 'LIKE', $like); 428 break; 429 430 case 'regex': 431 // Substituting newlines seems to be necessary on *some* versions 432 // of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0). 433 $search = strtr($search, ['\n' => "\n"]); 434 435 $query->where($column, DB::regexOperator(), $search); 436 break; 437 } 438 } 439} 440