1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fisharebest\Webtrees\Exceptions\HttpNotFoundException; 23use Fisharebest\Webtrees\Family; 24use Fisharebest\Webtrees\GedcomRecord; 25use Fisharebest\Webtrees\I18N; 26use Fisharebest\Webtrees\Individual; 27use Fisharebest\Webtrees\Media; 28use Fisharebest\Webtrees\Note; 29use Fisharebest\Webtrees\Repository; 30use Fisharebest\Webtrees\Services\DataFixService; 31use Fisharebest\Webtrees\Source; 32use Fisharebest\Webtrees\Submitter; 33use Fisharebest\Webtrees\Tree; 34use Illuminate\Database\Capsule\Manager as DB; 35use Illuminate\Database\Query\Builder; 36use Illuminate\Support\Collection; 37use Throwable; 38 39use function addcslashes; 40use function asort; 41use function preg_match; 42use function preg_quote; 43use function preg_replace; 44use function view; 45 46/** 47 * Class FixSearchAndReplace 48 */ 49class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface 50{ 51 use ModuleDataFixTrait; 52 53 // A regular expression that never matches. 54 private const INVALID_REGEX = '/(?!)/'; 55 56 /** @var DataFixService */ 57 private $data_fix_service; 58 59 /** 60 * FixMissingDeaths constructor. 61 * 62 * @param DataFixService $data_fix_service 63 */ 64 public function __construct(DataFixService $data_fix_service) 65 { 66 $this->data_fix_service = $data_fix_service; 67 } 68 69 /** 70 * How should this module be identified in the control panel, etc.? 71 * 72 * @return string 73 */ 74 public function title(): string 75 { 76 /* I18N: Name of a module */ 77 return I18N::translate('Search and replace'); 78 } 79 80 /** 81 * A sentence describing what this module does. 82 * 83 * @return string 84 */ 85 public function description(): string 86 { 87 /* I18N: Description of a “Data fix” module */ 88 return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.'); 89 } 90 91 /** 92 * Options form. 93 * 94 * @param Tree $tree 95 * 96 * @return string 97 */ 98 public function fixOptions(Tree $tree): string 99 { 100 $methods = [ 101 'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'), 102 'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'), 103 'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'), 104 /* I18N: http://en.wikipedia.org/wiki/Regular_expression */ 105 'regex' => I18N::translate('Regular expression'), 106 ]; 107 108 $types = [ 109 Family::RECORD_TYPE => I18N::translate('Families'), 110 Individual::RECORD_TYPE => I18N::translate('Individuals'), 111 Media::RECORD_TYPE => I18N::translate('Media objects'), 112 Note::RECORD_TYPE => I18N::translate('Notes'), 113 Repository::RECORD_TYPE => I18N::translate('Repositories'), 114 Source::RECORD_TYPE => I18N::translate('Sources'), 115 Submitter::RECORD_TYPE => I18N::translate('Submitters'), 116 ]; 117 118 asort($types); 119 120 return view('modules/fix-search-and-replace/options', [ 121 'default_method' => 'exact', 122 'default_type' => Individual::RECORD_TYPE, 123 'methods' => $methods, 124 'types' => $types, 125 ]); 126 } 127 128 /** 129 * A list of all records that need examining. This may include records 130 * that do not need updating, if we can't detect this quickly using SQL. 131 * 132 * @param Tree $tree 133 * @param array<string,string> $params 134 * 135 * @return Collection<string>|null 136 */ 137 protected function familiesToFix(Tree $tree, array $params): ?Collection 138 { 139 if ($params['type'] !== Family::RECORD_TYPE || $params['search'] === '') { 140 return null; 141 } 142 143 $query = DB::table('families')->where('f_file', '=', $tree->id()); 144 $this->recordQuery($query, 'f_gedcom', $params); 145 146 return $query->pluck('f_id'); 147 } 148 149 /** 150 * A list of all records that need examining. This may include records 151 * that do not need updating, if we can't detect this quickly using SQL. 152 * 153 * @param Tree $tree 154 * @param array<string,string> $params 155 * 156 * @return Collection<string>|null 157 */ 158 protected function individualsToFix(Tree $tree, array $params): ?Collection 159 { 160 if ($params['type'] !== Individual::RECORD_TYPE || $params['search'] === '') { 161 return null; 162 } 163 164 $query = DB::table('individuals') 165 ->where('i_file', '=', $tree->id()); 166 167 $this->recordQuery($query, 'i_gedcom', $params); 168 169 return $query->pluck('i_id'); 170 } 171 172 /** 173 * A list of all records that need examining. This may include records 174 * that do not need updating, if we can't detect this quickly using SQL. 175 * 176 * @param Tree $tree 177 * @param array<string,string> $params 178 * 179 * @return Collection<string>|null 180 */ 181 protected function locationsToFix(Tree $tree, array $params): ?Collection 182 { 183 if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') { 184 return null; 185 } 186 187 $query = DB::table('other') 188 ->where('o_file', '=', $tree->id()) 189 ->where('o_type', '=', Location::RECORD_TYPE); 190 191 $this->recordQuery($query, 'o_gedcom', $params); 192 193 return $query->pluck('o_id'); 194 } 195 196 /** 197 * A list of all records that need examining. This may include records 198 * that do not need updating, if we can't detect this quickly using SQL. 199 * 200 * @param Tree $tree 201 * @param array<string,string> $params 202 * 203 * @return Collection<string>|null 204 */ 205 protected function mediaToFix(Tree $tree, array $params): ?Collection 206 { 207 if ($params['type'] !== Media::RECORD_TYPE || $params['search'] === '') { 208 return null; 209 } 210 211 $query = DB::table('media') 212 ->where('m_file', '=', $tree->id()); 213 214 $this->recordQuery($query, 'm_gedcom', $params); 215 216 return $query->pluck('m_id'); 217 } 218 219 /** 220 * A list of all records that need examining. This may include records 221 * that do not need updating, if we can't detect this quickly using SQL. 222 * 223 * @param Tree $tree 224 * @param array<string,string> $params 225 * 226 * @return Collection<string>|null 227 */ 228 protected function notesToFix(Tree $tree, array $params): ?Collection 229 { 230 if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') { 231 return null; 232 } 233 234 $query = DB::table('other') 235 ->where('o_file', '=', $tree->id()) 236 ->where('o_type', '=', Note::RECORD_TYPE); 237 238 $this->recordQuery($query, 'o_gedcom', $params); 239 240 return $query->pluck('o_id'); 241 } 242 243 /** 244 * A list of all records that need examining. This may include records 245 * that do not need updating, if we can't detect this quickly using SQL. 246 * 247 * @param Tree $tree 248 * @param array<string,string> $params 249 * 250 * @return Collection<string>|null 251 */ 252 protected function repositoriesToFix(Tree $tree, array $params): ?Collection 253 { 254 if ($params['type'] !== Repository::RECORD_TYPE || $params['search'] === '') { 255 return null; 256 } 257 258 $query = DB::table('other') 259 ->where('o_file', '=', $tree->id()) 260 ->where('o_type', '=', Repository::RECORD_TYPE); 261 262 $this->recordQuery($query, 'o_gedcom', $params); 263 264 return $query->pluck('o_id'); 265 } 266 267 /** 268 * A list of all records that need examining. This may include records 269 * that do not need updating, if we can't detect this quickly using SQL. 270 * 271 * @param Tree $tree 272 * @param array<string,string> $params 273 * 274 * @return Collection<string>|null 275 */ 276 protected function sourcesToFix(Tree $tree, array $params): ?Collection 277 { 278 if ($params['type'] !== Source::RECORD_TYPE || $params['search'] === '') { 279 return null; 280 } 281 282 $query = $this->sourcesToFixQuery($tree, $params); 283 284 $this->recordQuery($query, 's_gedcom', $params); 285 286 return $query->pluck('s_id'); 287 } 288 289 /** 290 * A list of all records that need examining. This may include records 291 * that do not need updating, if we can't detect this quickly using SQL. 292 * 293 * @param Tree $tree 294 * @param array<string,string> $params 295 * 296 * @return Collection<string>|null 297 */ 298 protected function submittersToFix(Tree $tree, array $params): ?Collection 299 { 300 if ($params['type'] !== Submitter::RECORD_TYPE || $params['search'] === '') { 301 return null; 302 } 303 304 $query = $this->submittersToFixQuery($tree, $params); 305 306 $this->recordQuery($query, 'o_gedcom', $params); 307 308 return $query->pluck('o_id'); 309 } 310 311 /** 312 * Does a record need updating? 313 * 314 * @param GedcomRecord $record 315 * @param array<string,string> $params 316 * 317 * @return bool 318 */ 319 public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool 320 { 321 return preg_match($this->createRegex($params), $record->gedcom()) === 1; 322 } 323 324 /** 325 * Show the changes we would make 326 * 327 * @param GedcomRecord $record 328 * @param array<string,string> $params 329 * 330 * @return string 331 */ 332 public function previewUpdate(GedcomRecord $record, array $params): string 333 { 334 $old = $record->gedcom(); 335 $new = $this->updateGedcom($record, $params); 336 337 return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new); 338 } 339 340 /** 341 * Fix a record 342 * 343 * @param GedcomRecord $record 344 * @param array<string,string> $params 345 * 346 * @return void 347 */ 348 public function updateRecord(GedcomRecord $record, array $params): void 349 { 350 $record->updateRecord($this->updateGedcom($record, $params), false); 351 } 352 353 /** 354 * @param GedcomRecord $record 355 * @param array<string,string> $params 356 * 357 * @return string 358 */ 359 private function updateGedcom(GedcomRecord $record, array $params): string 360 { 361 // Allow "\n" to indicate a line-feed in replacement text. 362 // Back-references such as $1, $2 are handled automatically. 363 $replace = strtr($params['replace'], ['\n' => "\n"]); 364 365 $regex = $this->createRegex($params); 366 367 return preg_replace($regex, $replace, $record->gedcom()); 368 } 369 370 /** 371 * Create a regular expression from the search pattern. 372 * 373 * @param array<string,string> $params 374 * 375 * @return string 376 */ 377 private function createRegex(array $params): string 378 { 379 $search = $params['search']; 380 $method = $params['method']; 381 $case = $params['case']; 382 383 switch ($method) { 384 case 'exact': 385 return '/' . preg_quote($search, '/') . '/u' . $case; 386 387 case 'words': 388 return '/\b' . preg_quote($search, '/') . '\b/u' . $case; 389 390 case 'wildcards': 391 return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case; 392 393 case 'regex': 394 $regex = '/' . addcslashes($search, '/') . '/u' . $case; 395 396 try { 397 // A valid regex on an empty string returns zero. 398 // An invalid regex on an empty string returns false and throws a warning. 399 preg_match($regex, ''); 400 } catch (Throwable $ex) { 401 $regex = self::INVALID_REGEX; 402 } 403 404 return $regex; 405 } 406 407 throw new HttpNotFoundException(); 408 } 409 410 /** 411 * Create a regular expression from the search pattern. 412 * 413 * @param Builder $query 414 * @param string $column 415 * @param array<string,string> $params 416 * 417 * @return void 418 */ 419 private function recordQuery(Builder $query, string $column, array $params): void 420 { 421 $search = $params['search']; 422 $method = $params['method']; 423 $like = '%' . addcslashes($search, '\\%_') . '%'; 424 425 switch ($method) { 426 case 'exact': 427 case 'words': 428 $query->where($column, 'LIKE', $like); 429 break; 430 431 case 'wildcards': 432 $like = strtr($like, ['?' => '_', '*' => '%']); 433 $query->where($column, 'LIKE', $like); 434 break; 435 436 case 'regex': 437 // Substituting newlines seems to be necessary on *some* versions 438 //.of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0). 439 $search = strtr($search, ['\n' => "\n"]); 440 441 switch (DB::connection()->getDriverName()) { 442 case 'sqlite': 443 case 'mysql': 444 $query->where($column, 'REGEXP', $search); 445 break; 446 447 case 'pgsql': 448 $query->where($column, '~', $search); 449 break; 450 451 case 'sqlsvr': 452 // Not available 453 break; 454 } 455 break; 456 } 457 } 458} 459