1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fisharebest\Webtrees\DB; 23use Fisharebest\Webtrees\Family; 24use Fisharebest\Webtrees\GedcomRecord; 25use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 26use Fisharebest\Webtrees\I18N; 27use Fisharebest\Webtrees\Individual; 28use Fisharebest\Webtrees\Location; 29use Fisharebest\Webtrees\Media; 30use Fisharebest\Webtrees\Note; 31use Fisharebest\Webtrees\Repository; 32use Fisharebest\Webtrees\Services\DataFixService; 33use Fisharebest\Webtrees\Source; 34use Fisharebest\Webtrees\Submitter; 35use Fisharebest\Webtrees\Tree; 36use Illuminate\Database\Query\Builder; 37use Illuminate\Support\Collection; 38use Throwable; 39 40use function addcslashes; 41use function asort; 42use function preg_match; 43use function preg_quote; 44use function preg_replace; 45use function view; 46 47/** 48 * Class FixSearchAndReplace 49 */ 50class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface 51{ 52 use ModuleDataFixTrait; 53 54 // A regular expression that never matches. 55 private const INVALID_REGEX = '/(?!)/'; 56 57 private DataFixService $data_fix_service; 58 59 /** 60 * @param DataFixService $data_fix_service 61 */ 62 public function __construct(DataFixService $data_fix_service) 63 { 64 $this->data_fix_service = $data_fix_service; 65 } 66 67 /** 68 * How should this module be identified in the control panel, etc.? 69 * 70 * @return string 71 */ 72 public function title(): string 73 { 74 /* I18N: Name of a module */ 75 return I18N::translate('Search and replace'); 76 } 77 78 /** 79 * A sentence describing what this module does. 80 * 81 * @return string 82 */ 83 public function description(): string 84 { 85 /* I18N: Description of a “Data fix” module */ 86 return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.'); 87 } 88 89 /** 90 * Options form. 91 * 92 * @param Tree $tree 93 * 94 * @return string 95 */ 96 public function fixOptions(Tree $tree): string 97 { 98 $methods = [ 99 'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'), 100 'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'), 101 'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'), 102 /* I18N: https://en.wikipedia.org/wiki/Regular_expression */ 103 'regex' => I18N::translate('Regular expression'), 104 ]; 105 106 $types = [ 107 Family::RECORD_TYPE => I18N::translate('Families'), 108 Individual::RECORD_TYPE => I18N::translate('Individuals'), 109 Location::RECORD_TYPE => I18N::translate('Locations'), 110 Media::RECORD_TYPE => I18N::translate('Media objects'), 111 Note::RECORD_TYPE => I18N::translate('Notes'), 112 Repository::RECORD_TYPE => I18N::translate('Repositories'), 113 Source::RECORD_TYPE => I18N::translate('Sources'), 114 Submitter::RECORD_TYPE => I18N::translate('Submitters'), 115 ]; 116 117 asort($types); 118 119 return view('modules/fix-search-and-replace/options', [ 120 'default_method' => 'exact', 121 'default_type' => Individual::RECORD_TYPE, 122 'methods' => $methods, 123 'types' => $types, 124 ]); 125 } 126 127 /** 128 * A list of all records that need examining. This may include records 129 * that do not need updating, if we can't detect this quickly using SQL. 130 * 131 * @param Tree $tree 132 * @param array<string,string> $params 133 * 134 * @return Collection<int,string>|null 135 */ 136 protected function familiesToFix(Tree $tree, array $params): Collection|null 137 { 138 if ($params['type'] !== Family::RECORD_TYPE || $params['search-for'] === '') { 139 return null; 140 } 141 142 $query = DB::table('families')->where('f_file', '=', $tree->id()); 143 $this->recordQuery($query, 'f_gedcom', $params); 144 145 return $query->pluck('f_id'); 146 } 147 148 /** 149 * A list of all records that need examining. This may include records 150 * that do not need updating, if we can't detect this quickly using SQL. 151 * 152 * @param Tree $tree 153 * @param array<string,string> $params 154 * 155 * @return Collection<int,string>|null 156 */ 157 protected function individualsToFix(Tree $tree, array $params): Collection|null 158 { 159 if ($params['type'] !== Individual::RECORD_TYPE || $params['search-for'] === '') { 160 return null; 161 } 162 163 $query = DB::table('individuals') 164 ->where('i_file', '=', $tree->id()); 165 166 $this->recordQuery($query, 'i_gedcom', $params); 167 168 return $query->pluck('i_id'); 169 } 170 171 /** 172 * A list of all records that need examining. This may include records 173 * that do not need updating, if we can't detect this quickly using SQL. 174 * 175 * @param Tree $tree 176 * @param array<string,string> $params 177 * 178 * @return Collection<int,string>|null 179 */ 180 protected function locationsToFix(Tree $tree, array $params): Collection|null 181 { 182 if ($params['type'] !== Location::RECORD_TYPE || $params['search-for'] === '') { 183 return null; 184 } 185 186 $query = DB::table('other') 187 ->where('o_file', '=', $tree->id()) 188 ->where('o_type', '=', Location::RECORD_TYPE); 189 190 $this->recordQuery($query, 'o_gedcom', $params); 191 192 return $query->pluck('o_id'); 193 } 194 195 /** 196 * A list of all records that need examining. This may include records 197 * that do not need updating, if we can't detect this quickly using SQL. 198 * 199 * @param Tree $tree 200 * @param array<string,string> $params 201 * 202 * @return Collection<int,string>|null 203 */ 204 protected function mediaToFix(Tree $tree, array $params): Collection|null 205 { 206 if ($params['type'] !== Media::RECORD_TYPE || $params['search-for'] === '') { 207 return null; 208 } 209 210 $query = DB::table('media') 211 ->where('m_file', '=', $tree->id()); 212 213 $this->recordQuery($query, 'm_gedcom', $params); 214 215 return $query->pluck('m_id'); 216 } 217 218 /** 219 * A list of all records that need examining. This may include records 220 * that do not need updating, if we can't detect this quickly using SQL. 221 * 222 * @param Tree $tree 223 * @param array<string,string> $params 224 * 225 * @return Collection<int,string>|null 226 */ 227 protected function notesToFix(Tree $tree, array $params): Collection|null 228 { 229 if ($params['type'] !== Note::RECORD_TYPE || $params['search-for'] === '') { 230 return null; 231 } 232 233 $query = DB::table('other') 234 ->where('o_file', '=', $tree->id()) 235 ->where('o_type', '=', Note::RECORD_TYPE); 236 237 $this->recordQuery($query, 'o_gedcom', $params); 238 239 return $query->pluck('o_id'); 240 } 241 242 /** 243 * A list of all records that need examining. This may include records 244 * that do not need updating, if we can't detect this quickly using SQL. 245 * 246 * @param Tree $tree 247 * @param array<string,string> $params 248 * 249 * @return Collection<int,string>|null 250 */ 251 protected function repositoriesToFix(Tree $tree, array $params): Collection|null 252 { 253 if ($params['type'] !== Repository::RECORD_TYPE || $params['search-for'] === '') { 254 return null; 255 } 256 257 $query = DB::table('other') 258 ->where('o_file', '=', $tree->id()) 259 ->where('o_type', '=', Repository::RECORD_TYPE); 260 261 $this->recordQuery($query, 'o_gedcom', $params); 262 263 return $query->pluck('o_id'); 264 } 265 266 /** 267 * A list of all records that need examining. This may include records 268 * that do not need updating, if we can't detect this quickly using SQL. 269 * 270 * @param Tree $tree 271 * @param array<string,string> $params 272 * 273 * @return Collection<int,string>|null 274 */ 275 protected function sourcesToFix(Tree $tree, array $params): Collection|null 276 { 277 if ($params['type'] !== Source::RECORD_TYPE || $params['search-for'] === '') { 278 return null; 279 } 280 281 $query = $this->sourcesToFixQuery($tree, $params); 282 283 $this->recordQuery($query, 's_gedcom', $params); 284 285 return $query->pluck('s_id'); 286 } 287 288 /** 289 * A list of all records that need examining. This may include records 290 * that do not need updating, if we can't detect this quickly using SQL. 291 * 292 * @param Tree $tree 293 * @param array<string,string> $params 294 * 295 * @return Collection<int,string>|null 296 */ 297 protected function submittersToFix(Tree $tree, array $params): Collection|null 298 { 299 if ($params['type'] !== Submitter::RECORD_TYPE || $params['search-for'] === '') { 300 return null; 301 } 302 303 $query = $this->submittersToFixQuery($tree, $params); 304 305 $this->recordQuery($query, 'o_gedcom', $params); 306 307 return $query->pluck('o_id'); 308 } 309 310 /** 311 * Does a record need updating? 312 * 313 * @param GedcomRecord $record 314 * @param array<string,string> $params 315 * 316 * @return bool 317 */ 318 public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool 319 { 320 return preg_match($this->createRegex($params), $record->gedcom()) === 1; 321 } 322 323 /** 324 * Show the changes we would make 325 * 326 * @param GedcomRecord $record 327 * @param array<string,string> $params 328 * 329 * @return string 330 */ 331 public function previewUpdate(GedcomRecord $record, array $params): string 332 { 333 $old = $record->gedcom(); 334 $new = $this->updateGedcom($record, $params); 335 336 return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new); 337 } 338 339 /** 340 * Fix a record 341 * 342 * @param GedcomRecord $record 343 * @param array<string,string> $params 344 * 345 * @return void 346 */ 347 public function updateRecord(GedcomRecord $record, array $params): void 348 { 349 $record->updateRecord($this->updateGedcom($record, $params), false); 350 } 351 352 /** 353 * @param GedcomRecord $record 354 * @param array<string,string> $params 355 * 356 * @return string 357 */ 358 private function updateGedcom(GedcomRecord $record, array $params): string 359 { 360 // Allow "\n" to indicate a line-feed in replacement text. 361 // Back-references such as $1, $2 are handled automatically. 362 $replace = strtr($params['replace-with'], ['\n' => "\n"]); 363 364 $regex = $this->createRegex($params); 365 366 return preg_replace($regex, $replace, $record->gedcom()); 367 } 368 369 /** 370 * Create a regular expression from the search pattern. 371 * 372 * @param array<string,string> $params 373 * 374 * @return string 375 */ 376 private function createRegex(array $params): string 377 { 378 $search = $params['search-for']; 379 $method = $params['method']; 380 $case = $params['case']; 381 382 switch ($method) { 383 case 'exact': 384 return '/' . preg_quote($search, '/') . '/u' . $case; 385 386 case 'words': 387 return '/\b' . preg_quote($search, '/') . '\b/u' . $case; 388 389 case 'wildcards': 390 return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case; 391 392 case 'regex': 393 $regex = '/' . addcslashes($search, '/') . '/u' . $case; 394 395 try { 396 // A valid regex on an empty string returns zero. 397 // An invalid regex on an empty string returns false and throws a warning. 398 preg_match($regex, ''); 399 } catch (Throwable) { 400 $regex = self::INVALID_REGEX; 401 } 402 403 return $regex; 404 } 405 406 throw new HttpNotFoundException(); 407 } 408 409 /** 410 * Create a regular expression from the search pattern. 411 * 412 * @param Builder $query 413 * @param string $column 414 * @param array<string,string> $params 415 * 416 * @return void 417 */ 418 private function recordQuery(Builder $query, string $column, array $params): void 419 { 420 $search = $params['search-for']; 421 $method = $params['method']; 422 $like = '%' . addcslashes($search, '\\%_') . '%'; 423 424 switch ($method) { 425 case 'exact': 426 case 'words': 427 $query->where($column, 'LIKE', $like); 428 break; 429 430 case 'wildcards': 431 $like = strtr($like, ['?' => '_', '*' => '%']); 432 $query->where($column, 'LIKE', $like); 433 break; 434 435 case 'regex': 436 // Substituting newlines seems to be necessary on *some* versions 437 // of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0). 438 $search = strtr($search, ['\n' => "\n"]); 439 440 switch (DB::connection()->getDriverName()) { 441 case 'sqlite': 442 case 'mysql': 443 $query->where($column, 'REGEXP', $search); 444 break; 445 446 case 'pgsql': 447 $query->where($column, '~', $search); 448 break; 449 450 case 'sqlsrv': 451 // Not available 452 break; 453 } 454 break; 455 } 456 } 457} 458