1ce42304aSGreg Roach<?php 2ce42304aSGreg Roach 3ce42304aSGreg Roach/** 4ce42304aSGreg Roach * webtrees: online genealogy 5*89f7189bSGreg Roach * Copyright (C) 2021 webtrees development team 6ce42304aSGreg Roach * This program is free software: you can redistribute it and/or modify 7ce42304aSGreg Roach * it under the terms of the GNU General Public License as published by 8ce42304aSGreg Roach * the Free Software Foundation, either version 3 of the License, or 9ce42304aSGreg Roach * (at your option) any later version. 10ce42304aSGreg Roach * This program is distributed in the hope that it will be useful, 11ce42304aSGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 12ce42304aSGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13ce42304aSGreg Roach * GNU General Public License for more details. 14ce42304aSGreg Roach * You should have received a copy of the GNU General Public License 15*89f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 16ce42304aSGreg Roach */ 17ce42304aSGreg Roach 18ce42304aSGreg Roachdeclare(strict_types=1); 19ce42304aSGreg Roach 20ce42304aSGreg Roachnamespace Fisharebest\Webtrees\Module; 21ce42304aSGreg Roach 22ce42304aSGreg Roachuse Fisharebest\Webtrees\Exceptions\HttpNotFoundException; 23ce42304aSGreg Roachuse Fisharebest\Webtrees\Family; 24ce42304aSGreg Roachuse Fisharebest\Webtrees\GedcomRecord; 25ce42304aSGreg Roachuse Fisharebest\Webtrees\I18N; 26ce42304aSGreg Roachuse Fisharebest\Webtrees\Individual; 27ce42304aSGreg Roachuse Fisharebest\Webtrees\Media; 28ce42304aSGreg Roachuse Fisharebest\Webtrees\Note; 29ce42304aSGreg Roachuse Fisharebest\Webtrees\Repository; 30ce42304aSGreg Roachuse Fisharebest\Webtrees\Services\DataFixService; 31ce42304aSGreg Roachuse Fisharebest\Webtrees\Source; 32ce42304aSGreg Roachuse Fisharebest\Webtrees\Submitter; 33ce42304aSGreg Roachuse Fisharebest\Webtrees\Tree; 34ce42304aSGreg Roachuse Illuminate\Database\Capsule\Manager as DB; 35ce42304aSGreg Roachuse Illuminate\Database\Query\Builder; 36ce42304aSGreg Roachuse Illuminate\Support\Collection; 37ce42304aSGreg Roachuse Throwable; 38ce42304aSGreg Roach 39ce42304aSGreg Roachuse function addcslashes; 40ce42304aSGreg Roachuse function asort; 41ce42304aSGreg Roachuse function preg_match; 42ce42304aSGreg Roachuse function preg_quote; 43ce42304aSGreg Roachuse function preg_replace; 44ce42304aSGreg Roachuse function view; 45ce42304aSGreg Roach 46ce42304aSGreg Roach/** 47ce42304aSGreg Roach * Class FixSearchAndReplace 48ce42304aSGreg Roach */ 49ce42304aSGreg Roachclass FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface 50ce42304aSGreg Roach{ 51ce42304aSGreg Roach use ModuleDataFixTrait; 52ce42304aSGreg Roach 53ce42304aSGreg Roach // A regular expression that never matches. 54ce42304aSGreg Roach private const INVALID_REGEX = '/(?!)/'; 55ce42304aSGreg Roach 56ce42304aSGreg Roach /** @var DataFixService */ 57ce42304aSGreg Roach private $data_fix_service; 58ce42304aSGreg Roach 59ce42304aSGreg Roach /** 60ce42304aSGreg Roach * FixMissingDeaths constructor. 61ce42304aSGreg Roach * 62ce42304aSGreg Roach * @param DataFixService $data_fix_service 63ce42304aSGreg Roach */ 64ce42304aSGreg Roach public function __construct(DataFixService $data_fix_service) 65ce42304aSGreg Roach { 66ce42304aSGreg Roach $this->data_fix_service = $data_fix_service; 67ce42304aSGreg Roach } 68ce42304aSGreg Roach 69ce42304aSGreg Roach /** 70ce42304aSGreg Roach * How should this module be identified in the control panel, etc.? 71ce42304aSGreg Roach * 72ce42304aSGreg Roach * @return string 73ce42304aSGreg Roach */ 74ce42304aSGreg Roach public function title(): string 75ce42304aSGreg Roach { 76ce42304aSGreg Roach /* I18N: Name of a module */ 77ce42304aSGreg Roach return I18N::translate('Search and replace'); 78ce42304aSGreg Roach } 79ce42304aSGreg Roach 80ce42304aSGreg Roach /** 81ce42304aSGreg Roach * A sentence describing what this module does. 82ce42304aSGreg Roach * 83ce42304aSGreg Roach * @return string 84ce42304aSGreg Roach */ 85ce42304aSGreg Roach public function description(): string 86ce42304aSGreg Roach { 87ce42304aSGreg Roach /* I18N: Description of a “Data fix” module */ 88ce42304aSGreg Roach return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.'); 89ce42304aSGreg Roach } 90ce42304aSGreg Roach 91ce42304aSGreg Roach /** 92ce42304aSGreg Roach * Options form. 93ce42304aSGreg Roach * 94ce42304aSGreg Roach * @param Tree $tree 95ce42304aSGreg Roach * 96ce42304aSGreg Roach * @return string 97ce42304aSGreg Roach */ 98ce42304aSGreg Roach public function fixOptions(Tree $tree): string 99ce42304aSGreg Roach { 100ce42304aSGreg Roach $methods = [ 101ce42304aSGreg Roach 'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'), 102ce42304aSGreg Roach 'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'), 103ce42304aSGreg Roach 'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'), 104ce42304aSGreg Roach /* I18N: http://en.wikipedia.org/wiki/Regular_expression */ 1052ab7d347SGreg Roach 'regex' => I18N::translate('Regular expression'), 106ce42304aSGreg Roach ]; 107ce42304aSGreg Roach 108ce42304aSGreg Roach $types = [ 109ce42304aSGreg Roach Family::RECORD_TYPE => I18N::translate('Families'), 110ce42304aSGreg Roach Individual::RECORD_TYPE => I18N::translate('Individuals'), 111ce42304aSGreg Roach Media::RECORD_TYPE => I18N::translate('Media objects'), 112ce42304aSGreg Roach Note::RECORD_TYPE => I18N::translate('Notes'), 113ce42304aSGreg Roach Repository::RECORD_TYPE => I18N::translate('Repositories'), 114ce42304aSGreg Roach Source::RECORD_TYPE => I18N::translate('Sources'), 115ce42304aSGreg Roach Submitter::RECORD_TYPE => I18N::translate('Submitters'), 116ce42304aSGreg Roach ]; 117ce42304aSGreg Roach 118ce42304aSGreg Roach asort($types); 119ce42304aSGreg Roach 120ce42304aSGreg Roach return view('modules/fix-search-and-replace/options', [ 121ce42304aSGreg Roach 'default_method' => 'exact', 122ce42304aSGreg Roach 'default_type' => Individual::RECORD_TYPE, 123ce42304aSGreg Roach 'methods' => $methods, 124ce42304aSGreg Roach 'types' => $types, 125ce42304aSGreg Roach ]); 126ce42304aSGreg Roach } 127ce42304aSGreg Roach 128ce42304aSGreg Roach /** 129ce42304aSGreg Roach * A list of all records that need examining. This may include records 130ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 131ce42304aSGreg Roach * 132ce42304aSGreg Roach * @param Tree $tree 133ce42304aSGreg Roach * @param array<string,string> $params 134ce42304aSGreg Roach * 135ce42304aSGreg Roach * @return Collection<string>|null 136ce42304aSGreg Roach */ 137ce42304aSGreg Roach protected function familiesToFix(Tree $tree, array $params): ?Collection 138ce42304aSGreg Roach { 139ce42304aSGreg Roach if ($params['type'] !== Family::RECORD_TYPE || $params['search'] === '') { 140ce42304aSGreg Roach return null; 141ce42304aSGreg Roach } 142ce42304aSGreg Roach 143ce42304aSGreg Roach $query = DB::table('families')->where('f_file', '=', $tree->id()); 144ce42304aSGreg Roach $this->recordQuery($query, 'f_gedcom', $params); 145ce42304aSGreg Roach 146ce42304aSGreg Roach return $query->pluck('f_id'); 147ce42304aSGreg Roach } 148ce42304aSGreg Roach 149ce42304aSGreg Roach /** 150ce42304aSGreg Roach * A list of all records that need examining. This may include records 151ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 152ce42304aSGreg Roach * 153ce42304aSGreg Roach * @param Tree $tree 154ce42304aSGreg Roach * @param array<string,string> $params 155ce42304aSGreg Roach * 156ce42304aSGreg Roach * @return Collection<string>|null 157ce42304aSGreg Roach */ 158ce42304aSGreg Roach protected function individualsToFix(Tree $tree, array $params): ?Collection 159ce42304aSGreg Roach { 160ce42304aSGreg Roach if ($params['type'] !== Individual::RECORD_TYPE || $params['search'] === '') { 161ce42304aSGreg Roach return null; 162ce42304aSGreg Roach } 163ce42304aSGreg Roach 164ce42304aSGreg Roach $query = DB::table('individuals') 165ce42304aSGreg Roach ->where('i_file', '=', $tree->id()); 166ce42304aSGreg Roach 167ce42304aSGreg Roach $this->recordQuery($query, 'i_gedcom', $params); 168ce42304aSGreg Roach 169ce42304aSGreg Roach return $query->pluck('i_id'); 170ce42304aSGreg Roach } 171ce42304aSGreg Roach 172ce42304aSGreg Roach /** 173ce42304aSGreg Roach * A list of all records that need examining. This may include records 174ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 175ce42304aSGreg Roach * 176ce42304aSGreg Roach * @param Tree $tree 177ce42304aSGreg Roach * @param array<string,string> $params 178ce42304aSGreg Roach * 179ce42304aSGreg Roach * @return Collection<string>|null 180ce42304aSGreg Roach */ 181ce42304aSGreg Roach protected function mediaToFix(Tree $tree, array $params): ?Collection 182ce42304aSGreg Roach { 183ce42304aSGreg Roach if ($params['type'] !== Media::RECORD_TYPE || $params['search'] === '') { 184ce42304aSGreg Roach return null; 185ce42304aSGreg Roach } 186ce42304aSGreg Roach 187ce42304aSGreg Roach $query = DB::table('media') 188ce42304aSGreg Roach ->where('m_file', '=', $tree->id()); 189ce42304aSGreg Roach 190ce42304aSGreg Roach $this->recordQuery($query, 'm_gedcom', $params); 191ce42304aSGreg Roach 192ce42304aSGreg Roach return $query->pluck('m_id'); 193ce42304aSGreg Roach } 194ce42304aSGreg Roach 195ce42304aSGreg Roach /** 196ce42304aSGreg Roach * A list of all records that need examining. This may include records 197ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 198ce42304aSGreg Roach * 199ce42304aSGreg Roach * @param Tree $tree 200ce42304aSGreg Roach * @param array<string,string> $params 201ce42304aSGreg Roach * 202ce42304aSGreg Roach * @return Collection<string>|null 203ce42304aSGreg Roach */ 204ce42304aSGreg Roach protected function notesToFix(Tree $tree, array $params): ?Collection 205ce42304aSGreg Roach { 206ce42304aSGreg Roach if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') { 207ce42304aSGreg Roach return null; 208ce42304aSGreg Roach } 209ce42304aSGreg Roach 210ce42304aSGreg Roach $query = DB::table('other') 211ce42304aSGreg Roach ->where('o_file', '=', $tree->id()) 212ce42304aSGreg Roach ->where('o_type', '=', Note::RECORD_TYPE); 213ce42304aSGreg Roach 214ce42304aSGreg Roach $this->recordQuery($query, 'o_gedcom', $params); 215ce42304aSGreg Roach 216ce42304aSGreg Roach return $query->pluck('o_id'); 217ce42304aSGreg Roach } 218ce42304aSGreg Roach 219ce42304aSGreg Roach /** 220ce42304aSGreg Roach * A list of all records that need examining. This may include records 221ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 222ce42304aSGreg Roach * 223ce42304aSGreg Roach * @param Tree $tree 224ce42304aSGreg Roach * @param array<string,string> $params 225ce42304aSGreg Roach * 226ce42304aSGreg Roach * @return Collection<string>|null 227ce42304aSGreg Roach */ 228ce42304aSGreg Roach protected function repositoriesToFix(Tree $tree, array $params): ?Collection 229ce42304aSGreg Roach { 230ce42304aSGreg Roach if ($params['type'] !== Repository::RECORD_TYPE || $params['search'] === '') { 231ce42304aSGreg Roach return null; 232ce42304aSGreg Roach } 233ce42304aSGreg Roach 234ce42304aSGreg Roach $query = DB::table('other') 235ce42304aSGreg Roach ->where('o_file', '=', $tree->id()) 236ce42304aSGreg Roach ->where('o_type', '=', Repository::RECORD_TYPE); 237ce42304aSGreg Roach 238ce42304aSGreg Roach $this->recordQuery($query, 'o_gedcom', $params); 239ce42304aSGreg Roach 240ce42304aSGreg Roach return $query->pluck('o_id'); 241ce42304aSGreg Roach } 242ce42304aSGreg Roach 243ce42304aSGreg Roach /** 244ce42304aSGreg Roach * A list of all records that need examining. This may include records 245ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 246ce42304aSGreg Roach * 247ce42304aSGreg Roach * @param Tree $tree 248ce42304aSGreg Roach * @param array<string,string> $params 249ce42304aSGreg Roach * 250ce42304aSGreg Roach * @return Collection<string>|null 251ce42304aSGreg Roach */ 252ce42304aSGreg Roach protected function sourcesToFix(Tree $tree, array $params): ?Collection 253ce42304aSGreg Roach { 254ce42304aSGreg Roach if ($params['type'] !== Source::RECORD_TYPE || $params['search'] === '') { 255ce42304aSGreg Roach return null; 256ce42304aSGreg Roach } 257ce42304aSGreg Roach 2587684867eSGreg Roach $query = $this->sourcesToFixQuery($tree, $params); 259ce42304aSGreg Roach 260ce42304aSGreg Roach $this->recordQuery($query, 's_gedcom', $params); 261ce42304aSGreg Roach 262ce42304aSGreg Roach return $query->pluck('s_id'); 263ce42304aSGreg Roach } 264ce42304aSGreg Roach 265ce42304aSGreg Roach /** 266ce42304aSGreg Roach * A list of all records that need examining. This may include records 267ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 268ce42304aSGreg Roach * 269ce42304aSGreg Roach * @param Tree $tree 270ce42304aSGreg Roach * @param array<string,string> $params 271ce42304aSGreg Roach * 272ce42304aSGreg Roach * @return Collection<string>|null 273ce42304aSGreg Roach */ 274ce42304aSGreg Roach protected function submittersToFix(Tree $tree, array $params): ?Collection 275ce42304aSGreg Roach { 276ce42304aSGreg Roach if ($params['type'] !== Submitter::RECORD_TYPE || $params['search'] === '') { 277ce42304aSGreg Roach return null; 278ce42304aSGreg Roach } 279ce42304aSGreg Roach 2807684867eSGreg Roach $query = $this->submittersToFixQuery($tree, $params); 281ce42304aSGreg Roach 282ce42304aSGreg Roach $this->recordQuery($query, 'o_gedcom', $params); 283ce42304aSGreg Roach 284ce42304aSGreg Roach return $query->pluck('o_id'); 285ce42304aSGreg Roach } 286ce42304aSGreg Roach 287ce42304aSGreg Roach /** 288ce42304aSGreg Roach * Does a record need updating? 289ce42304aSGreg Roach * 290ce42304aSGreg Roach * @param GedcomRecord $record 291ce42304aSGreg Roach * @param array<string,string> $params 292ce42304aSGreg Roach * 293ce42304aSGreg Roach * @return bool 294ce42304aSGreg Roach */ 295ce42304aSGreg Roach public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool 296ce42304aSGreg Roach { 297ce42304aSGreg Roach return preg_match($this->createRegex($params), $record->gedcom()) === 1; 298ce42304aSGreg Roach } 299ce42304aSGreg Roach 300ce42304aSGreg Roach /** 301ce42304aSGreg Roach * Show the changes we would make 302ce42304aSGreg Roach * 303ce42304aSGreg Roach * @param GedcomRecord $record 304ce42304aSGreg Roach * @param array<string,string> $params 305ce42304aSGreg Roach * 306ce42304aSGreg Roach * @return string 307ce42304aSGreg Roach */ 308ce42304aSGreg Roach public function previewUpdate(GedcomRecord $record, array $params): string 309ce42304aSGreg Roach { 310ce42304aSGreg Roach $old = $record->gedcom(); 311ce42304aSGreg Roach $new = $this->updateGedcom($record, $params); 312ce42304aSGreg Roach 313ce42304aSGreg Roach return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new); 314ce42304aSGreg Roach } 315ce42304aSGreg Roach 316ce42304aSGreg Roach /** 317ce42304aSGreg Roach * Fix a record 318ce42304aSGreg Roach * 319ce42304aSGreg Roach * @param GedcomRecord $record 320ce42304aSGreg Roach * @param array<string,string> $params 321ce42304aSGreg Roach * 322ce42304aSGreg Roach * @return void 323ce42304aSGreg Roach */ 324ce42304aSGreg Roach public function updateRecord(GedcomRecord $record, array $params): void 325ce42304aSGreg Roach { 326ce42304aSGreg Roach $record->updateRecord($this->updateGedcom($record, $params), false); 327ce42304aSGreg Roach } 328ce42304aSGreg Roach 329ce42304aSGreg Roach /** 330ce42304aSGreg Roach * @param GedcomRecord $record 331ce42304aSGreg Roach * @param array<string,string> $params 332ce42304aSGreg Roach * 333ce42304aSGreg Roach * @return string 334ce42304aSGreg Roach */ 335ce42304aSGreg Roach private function updateGedcom(GedcomRecord $record, array $params): string 336ce42304aSGreg Roach { 337ce42304aSGreg Roach // Allow "\n" to indicate a line-feed in replacement text. 338ce42304aSGreg Roach // Back-references such as $1, $2 are handled automatically. 33949d0de55SGreg Roach $replace = strtr($params['replace'], ['\n' => "\n"]); 340ce42304aSGreg Roach 341ce42304aSGreg Roach $regex = $this->createRegex($params); 342ce42304aSGreg Roach 343ce42304aSGreg Roach return preg_replace($regex, $replace, $record->gedcom()); 344ce42304aSGreg Roach } 345ce42304aSGreg Roach 346ce42304aSGreg Roach /** 347ce42304aSGreg Roach * Create a regular expression from the search pattern. 348ce42304aSGreg Roach * 349ce42304aSGreg Roach * @param array<string,string> $params 350ce42304aSGreg Roach * 351ce42304aSGreg Roach * @return string 352ce42304aSGreg Roach */ 353ce42304aSGreg Roach private function createRegex(array $params): string 354ce42304aSGreg Roach { 355ce42304aSGreg Roach $search = $params['search']; 356ce42304aSGreg Roach $method = $params['method']; 357ce42304aSGreg Roach $case = $params['case']; 358ce42304aSGreg Roach 359ce42304aSGreg Roach switch ($method) { 360ce42304aSGreg Roach case 'exact': 361681f0c68SGreg Roach return '/' . preg_quote($search, '/') . '/u' . $case; 362ce42304aSGreg Roach 363ce42304aSGreg Roach case 'words': 364681f0c68SGreg Roach return '/\b' . preg_quote($search, '/') . '\b/u' . $case; 365ce42304aSGreg Roach 366ce42304aSGreg Roach case 'wildcards': 367681f0c68SGreg Roach return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case; 368ce42304aSGreg Roach 369ce42304aSGreg Roach case 'regex': 370a4f494bbSGreg Roach $regex = '/' . addcslashes($search, '/') . '/u' . $case; 371ce42304aSGreg Roach 372ce42304aSGreg Roach try { 373ce42304aSGreg Roach // A valid regex on an empty string returns zero. 374ce42304aSGreg Roach // An invalid regex on an empty string returns false and throws a warning. 375ce42304aSGreg Roach preg_match($regex, ''); 376ce42304aSGreg Roach } catch (Throwable $ex) { 377ce42304aSGreg Roach $regex = self::INVALID_REGEX; 378ce42304aSGreg Roach } 379ce42304aSGreg Roach 380ce42304aSGreg Roach return $regex; 381ce42304aSGreg Roach } 382ce42304aSGreg Roach 383ce42304aSGreg Roach throw new HttpNotFoundException(); 384ce42304aSGreg Roach } 385ce42304aSGreg Roach 386ce42304aSGreg Roach /** 387ce42304aSGreg Roach * Create a regular expression from the search pattern. 388ce42304aSGreg Roach * 389ce42304aSGreg Roach * @param Builder $query 390ce42304aSGreg Roach * @param string $column 391ce42304aSGreg Roach * @param array<string,string> $params 392ce42304aSGreg Roach * 393ce42304aSGreg Roach * @return void 394ce42304aSGreg Roach */ 395ce42304aSGreg Roach private function recordQuery(Builder $query, string $column, array $params): void 396ce42304aSGreg Roach { 397ce42304aSGreg Roach $search = $params['search']; 398ce42304aSGreg Roach $method = $params['method']; 399b5961194SGreg Roach $like = '%' . addcslashes($search, '\\%_') . '%'; 400ce42304aSGreg Roach 401ce42304aSGreg Roach switch ($method) { 402ce42304aSGreg Roach case 'exact': 403ce42304aSGreg Roach case 'words': 404b5961194SGreg Roach $query->where($column, 'LIKE', $like); 405ce42304aSGreg Roach break; 406ce42304aSGreg Roach 407ce42304aSGreg Roach case 'wildcards': 408ce42304aSGreg Roach $like = strtr($like, ['?' => '_', '*' => '%']); 409b5961194SGreg Roach $query->where($column, 'LIKE', $like); 410ce42304aSGreg Roach break; 411ce42304aSGreg Roach 412ce42304aSGreg Roach case 'regex': 4139a92a0c1SGreg Roach // Substituting newlines seems to be necessary on *some* versions 4149a92a0c1SGreg Roach //.of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0). 4159a92a0c1SGreg Roach $search = strtr($search, ['\n' => "\n"]); 4169a92a0c1SGreg Roach 417ce42304aSGreg Roach switch (DB::connection()->getDriverName()) { 418ce42304aSGreg Roach case 'sqlite': 419ce42304aSGreg Roach case 'mysql': 420ce42304aSGreg Roach $query->where($column, 'REGEXP', $search); 421ce42304aSGreg Roach break; 422ce42304aSGreg Roach 423ce42304aSGreg Roach case 'pgsql': 424ce42304aSGreg Roach $query->where($column, '~', $search); 425ce42304aSGreg Roach break; 426ce42304aSGreg Roach 427ce42304aSGreg Roach case 'sqlsvr': 428ce42304aSGreg Roach // Not available 429ce42304aSGreg Roach break; 430ce42304aSGreg Roach } 431ce42304aSGreg Roach break; 432ce42304aSGreg Roach } 433ce42304aSGreg Roach } 434ce42304aSGreg Roach} 435