1ce42304aSGreg Roach<?php 2ce42304aSGreg Roach 3ce42304aSGreg Roach/** 4ce42304aSGreg Roach * webtrees: online genealogy 5ce42304aSGreg Roach * Copyright (C) 2019 webtrees development team 6ce42304aSGreg Roach * This program is free software: you can redistribute it and/or modify 7ce42304aSGreg Roach * it under the terms of the GNU General Public License as published by 8ce42304aSGreg Roach * the Free Software Foundation, either version 3 of the License, or 9ce42304aSGreg Roach * (at your option) any later version. 10ce42304aSGreg Roach * This program is distributed in the hope that it will be useful, 11ce42304aSGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 12ce42304aSGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13ce42304aSGreg Roach * GNU General Public License for more details. 14ce42304aSGreg Roach * You should have received a copy of the GNU General Public License 15ce42304aSGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 16ce42304aSGreg Roach */ 17ce42304aSGreg Roach 18ce42304aSGreg Roachdeclare(strict_types=1); 19ce42304aSGreg Roach 20ce42304aSGreg Roachnamespace Fisharebest\Webtrees\Module; 21ce42304aSGreg Roach 22ce42304aSGreg Roachuse Fisharebest\Webtrees\Exceptions\HttpNotFoundException; 23ce42304aSGreg Roachuse Fisharebest\Webtrees\Family; 24ce42304aSGreg Roachuse Fisharebest\Webtrees\GedcomRecord; 25ce42304aSGreg Roachuse Fisharebest\Webtrees\I18N; 26ce42304aSGreg Roachuse Fisharebest\Webtrees\Individual; 27ce42304aSGreg Roachuse Fisharebest\Webtrees\Media; 28ce42304aSGreg Roachuse Fisharebest\Webtrees\Note; 29ce42304aSGreg Roachuse Fisharebest\Webtrees\Repository; 30ce42304aSGreg Roachuse Fisharebest\Webtrees\Services\DataFixService; 31ce42304aSGreg Roachuse Fisharebest\Webtrees\Source; 32ce42304aSGreg Roachuse Fisharebest\Webtrees\Submitter; 33ce42304aSGreg Roachuse Fisharebest\Webtrees\Tree; 34ce42304aSGreg Roachuse Illuminate\Database\Capsule\Manager as DB; 35ce42304aSGreg Roachuse Illuminate\Database\Query\Builder; 36ce42304aSGreg Roachuse Illuminate\Support\Collection; 37ce42304aSGreg Roachuse Throwable; 38ce42304aSGreg Roach 39ce42304aSGreg Roachuse function addcslashes; 40ce42304aSGreg Roachuse function asort; 41ce42304aSGreg Roachuse function preg_match; 42ce42304aSGreg Roachuse function preg_quote; 43ce42304aSGreg Roachuse function preg_replace; 44ce42304aSGreg Roachuse function view; 45ce42304aSGreg Roach 46ce42304aSGreg Roach/** 47ce42304aSGreg Roach * Class FixSearchAndReplace 48ce42304aSGreg Roach */ 49ce42304aSGreg Roachclass FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface 50ce42304aSGreg Roach{ 51ce42304aSGreg Roach use ModuleDataFixTrait; 52ce42304aSGreg Roach 53ce42304aSGreg Roach // A regular expression that never matches. 54ce42304aSGreg Roach private const INVALID_REGEX = '/(?!)/'; 55ce42304aSGreg Roach 56ce42304aSGreg Roach /** @var DataFixService */ 57ce42304aSGreg Roach private $data_fix_service; 58ce42304aSGreg Roach 59ce42304aSGreg Roach /** 60ce42304aSGreg Roach * FixMissingDeaths constructor. 61ce42304aSGreg Roach * 62ce42304aSGreg Roach * @param DataFixService $data_fix_service 63ce42304aSGreg Roach */ 64ce42304aSGreg Roach public function __construct(DataFixService $data_fix_service) 65ce42304aSGreg Roach { 66ce42304aSGreg Roach $this->data_fix_service = $data_fix_service; 67ce42304aSGreg Roach } 68ce42304aSGreg Roach 69ce42304aSGreg Roach /** 70ce42304aSGreg Roach * How should this module be identified in the control panel, etc.? 71ce42304aSGreg Roach * 72ce42304aSGreg Roach * @return string 73ce42304aSGreg Roach */ 74ce42304aSGreg Roach public function title(): string 75ce42304aSGreg Roach { 76ce42304aSGreg Roach /* I18N: Name of a module */ 77ce42304aSGreg Roach return I18N::translate('Search and replace'); 78ce42304aSGreg Roach } 79ce42304aSGreg Roach 80ce42304aSGreg Roach /** 81ce42304aSGreg Roach * A sentence describing what this module does. 82ce42304aSGreg Roach * 83ce42304aSGreg Roach * @return string 84ce42304aSGreg Roach */ 85ce42304aSGreg Roach public function description(): string 86ce42304aSGreg Roach { 87ce42304aSGreg Roach /* I18N: Description of a “Data fix” module */ 88ce42304aSGreg Roach return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.'); 89ce42304aSGreg Roach } 90ce42304aSGreg Roach 91ce42304aSGreg Roach /** 92ce42304aSGreg Roach * Options form. 93ce42304aSGreg Roach * 94ce42304aSGreg Roach * @param Tree $tree 95ce42304aSGreg Roach * 96ce42304aSGreg Roach * @return string 97ce42304aSGreg Roach */ 98ce42304aSGreg Roach public function fixOptions(Tree $tree): string 99ce42304aSGreg Roach { 100ce42304aSGreg Roach $methods = [ 101ce42304aSGreg Roach 'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'), 102ce42304aSGreg Roach 'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'), 103ce42304aSGreg Roach 'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'), 104ce42304aSGreg Roach /* I18N: http://en.wikipedia.org/wiki/Regular_expression */ 1052ab7d347SGreg Roach 'regex' => I18N::translate('Regular expression'), 106ce42304aSGreg Roach ]; 107ce42304aSGreg Roach 108ce42304aSGreg Roach $types = [ 109ce42304aSGreg Roach Family::RECORD_TYPE => I18N::translate('Families'), 110ce42304aSGreg Roach Individual::RECORD_TYPE => I18N::translate('Individuals'), 111ce42304aSGreg Roach Media::RECORD_TYPE => I18N::translate('Media objects'), 112ce42304aSGreg Roach Note::RECORD_TYPE => I18N::translate('Notes'), 113ce42304aSGreg Roach Repository::RECORD_TYPE => I18N::translate('Repositories'), 114ce42304aSGreg Roach Source::RECORD_TYPE => I18N::translate('Sources'), 115ce42304aSGreg Roach Submitter::RECORD_TYPE => I18N::translate('Submitters'), 116ce42304aSGreg Roach ]; 117ce42304aSGreg Roach 118ce42304aSGreg Roach asort($types); 119ce42304aSGreg Roach 120ce42304aSGreg Roach return view('modules/fix-search-and-replace/options', [ 121ce42304aSGreg Roach 'default_method' => 'exact', 122ce42304aSGreg Roach 'default_type' => Individual::RECORD_TYPE, 123ce42304aSGreg Roach 'methods' => $methods, 124ce42304aSGreg Roach 'types' => $types, 125ce42304aSGreg Roach ]); 126ce42304aSGreg Roach } 127ce42304aSGreg Roach 128ce42304aSGreg Roach /** 129ce42304aSGreg Roach * A list of all records that need examining. This may include records 130ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 131ce42304aSGreg Roach * 132ce42304aSGreg Roach * @param Tree $tree 133ce42304aSGreg Roach * @param array<string,string> $params 134ce42304aSGreg Roach * 135ce42304aSGreg Roach * @return Collection<string>|null 136ce42304aSGreg Roach */ 137ce42304aSGreg Roach protected function familiesToFix(Tree $tree, array $params): ?Collection 138ce42304aSGreg Roach { 139ce42304aSGreg Roach if ($params['type'] !== Family::RECORD_TYPE || $params['search'] === '') { 140ce42304aSGreg Roach return null; 141ce42304aSGreg Roach } 142ce42304aSGreg Roach 143ce42304aSGreg Roach $query = DB::table('families')->where('f_file', '=', $tree->id()); 144ce42304aSGreg Roach $this->recordQuery($query, 'f_gedcom', $params); 145ce42304aSGreg Roach 146ce42304aSGreg Roach return $query->pluck('f_id'); 147ce42304aSGreg Roach } 148ce42304aSGreg Roach 149ce42304aSGreg Roach /** 150ce42304aSGreg Roach * A list of all records that need examining. This may include records 151ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 152ce42304aSGreg Roach * 153ce42304aSGreg Roach * @param Tree $tree 154ce42304aSGreg Roach * @param array<string,string> $params 155ce42304aSGreg Roach * 156ce42304aSGreg Roach * @return Collection<string>|null 157ce42304aSGreg Roach */ 158ce42304aSGreg Roach protected function individualsToFix(Tree $tree, array $params): ?Collection 159ce42304aSGreg Roach { 160ce42304aSGreg Roach if ($params['type'] !== Individual::RECORD_TYPE || $params['search'] === '') { 161ce42304aSGreg Roach return null; 162ce42304aSGreg Roach } 163ce42304aSGreg Roach 164ce42304aSGreg Roach $query = DB::table('individuals') 165ce42304aSGreg Roach ->where('i_file', '=', $tree->id()); 166ce42304aSGreg Roach 167ce42304aSGreg Roach $this->recordQuery($query, 'i_gedcom', $params); 168ce42304aSGreg Roach 169ce42304aSGreg Roach return $query->pluck('i_id'); 170ce42304aSGreg Roach } 171ce42304aSGreg Roach 172ce42304aSGreg Roach /** 173ce42304aSGreg Roach * A list of all records that need examining. This may include records 174ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 175ce42304aSGreg Roach * 176ce42304aSGreg Roach * @param Tree $tree 177ce42304aSGreg Roach * @param array<string,string> $params 178ce42304aSGreg Roach * 179ce42304aSGreg Roach * @return Collection<string>|null 180ce42304aSGreg Roach */ 181ce42304aSGreg Roach protected function mediaToFix(Tree $tree, array $params): ?Collection 182ce42304aSGreg Roach { 183ce42304aSGreg Roach if ($params['type'] !== Media::RECORD_TYPE || $params['search'] === '') { 184ce42304aSGreg Roach return null; 185ce42304aSGreg Roach } 186ce42304aSGreg Roach 187ce42304aSGreg Roach $query = DB::table('media') 188ce42304aSGreg Roach ->where('m_file', '=', $tree->id()); 189ce42304aSGreg Roach 190ce42304aSGreg Roach $this->recordQuery($query, 'm_gedcom', $params); 191ce42304aSGreg Roach 192ce42304aSGreg Roach return $query->pluck('m_id'); 193ce42304aSGreg Roach } 194ce42304aSGreg Roach 195ce42304aSGreg Roach /** 196ce42304aSGreg Roach * A list of all records that need examining. This may include records 197ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 198ce42304aSGreg Roach * 199ce42304aSGreg Roach * @param Tree $tree 200ce42304aSGreg Roach * @param array<string,string> $params 201ce42304aSGreg Roach * 202ce42304aSGreg Roach * @return Collection<string>|null 203ce42304aSGreg Roach */ 204ce42304aSGreg Roach protected function notesToFix(Tree $tree, array $params): ?Collection 205ce42304aSGreg Roach { 206ce42304aSGreg Roach if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') { 207ce42304aSGreg Roach return null; 208ce42304aSGreg Roach } 209ce42304aSGreg Roach 210ce42304aSGreg Roach $query = DB::table('other') 211ce42304aSGreg Roach ->where('o_file', '=', $tree->id()) 212ce42304aSGreg Roach ->where('o_type', '=', Note::RECORD_TYPE); 213ce42304aSGreg Roach 214ce42304aSGreg Roach $this->recordQuery($query, 'o_gedcom', $params); 215ce42304aSGreg Roach 216ce42304aSGreg Roach return $query->pluck('o_id'); 217ce42304aSGreg Roach } 218ce42304aSGreg Roach 219ce42304aSGreg Roach /** 220ce42304aSGreg Roach * A list of all records that need examining. This may include records 221ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 222ce42304aSGreg Roach * 223ce42304aSGreg Roach * @param Tree $tree 224ce42304aSGreg Roach * @param array<string,string> $params 225ce42304aSGreg Roach * 226ce42304aSGreg Roach * @return Collection<string>|null 227ce42304aSGreg Roach */ 228ce42304aSGreg Roach protected function repositoriesToFix(Tree $tree, array $params): ?Collection 229ce42304aSGreg Roach { 230ce42304aSGreg Roach if ($params['type'] !== Repository::RECORD_TYPE || $params['search'] === '') { 231ce42304aSGreg Roach return null; 232ce42304aSGreg Roach } 233ce42304aSGreg Roach 234ce42304aSGreg Roach $query = DB::table('other') 235ce42304aSGreg Roach ->where('o_file', '=', $tree->id()) 236ce42304aSGreg Roach ->where('o_type', '=', Repository::RECORD_TYPE); 237ce42304aSGreg Roach 238ce42304aSGreg Roach $this->recordQuery($query, 'o_gedcom', $params); 239ce42304aSGreg Roach 240ce42304aSGreg Roach return $query->pluck('o_id'); 241ce42304aSGreg Roach } 242ce42304aSGreg Roach 243ce42304aSGreg Roach /** 244ce42304aSGreg Roach * A list of all records that need examining. This may include records 245ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 246ce42304aSGreg Roach * 247ce42304aSGreg Roach * @param Tree $tree 248ce42304aSGreg Roach * @param array<string,string> $params 249ce42304aSGreg Roach * 250ce42304aSGreg Roach * @return Collection<string>|null 251ce42304aSGreg Roach */ 252ce42304aSGreg Roach protected function sourcesToFix(Tree $tree, array $params): ?Collection 253ce42304aSGreg Roach { 254ce42304aSGreg Roach if ($params['type'] !== Source::RECORD_TYPE || $params['search'] === '') { 255ce42304aSGreg Roach return null; 256ce42304aSGreg Roach } 257ce42304aSGreg Roach 258ce42304aSGreg Roach $query = DB::table('sources') 259ce42304aSGreg Roach ->where('s_file', '=', $tree->id()); 260ce42304aSGreg Roach 261ce42304aSGreg Roach $this->recordQuery($query, 's_gedcom', $params); 262ce42304aSGreg Roach 263ce42304aSGreg Roach return $query->pluck('s_id'); 264ce42304aSGreg Roach } 265ce42304aSGreg Roach 266ce42304aSGreg Roach /** 267ce42304aSGreg Roach * A list of all records that need examining. This may include records 268ce42304aSGreg Roach * that do not need updating, if we can't detect this quickly using SQL. 269ce42304aSGreg Roach * 270ce42304aSGreg Roach * @param Tree $tree 271ce42304aSGreg Roach * @param array<string,string> $params 272ce42304aSGreg Roach * 273ce42304aSGreg Roach * @return Collection<string>|null 274ce42304aSGreg Roach */ 275ce42304aSGreg Roach protected function submittersToFix(Tree $tree, array $params): ?Collection 276ce42304aSGreg Roach { 277ce42304aSGreg Roach if ($params['type'] !== Submitter::RECORD_TYPE || $params['search'] === '') { 278ce42304aSGreg Roach return null; 279ce42304aSGreg Roach } 280ce42304aSGreg Roach 281ce42304aSGreg Roach $query = DB::table('other') 282ce42304aSGreg Roach ->where('o_file', '=', $tree->id()) 283ce42304aSGreg Roach ->where('o_type', '=', Submitter::RECORD_TYPE); 284ce42304aSGreg Roach 285ce42304aSGreg Roach $this->recordQuery($query, 'o_gedcom', $params); 286ce42304aSGreg Roach 287ce42304aSGreg Roach return $query->pluck('o_id'); 288ce42304aSGreg Roach } 289ce42304aSGreg Roach 290ce42304aSGreg Roach /** 291ce42304aSGreg Roach * Does a record need updating? 292ce42304aSGreg Roach * 293ce42304aSGreg Roach * @param GedcomRecord $record 294ce42304aSGreg Roach * @param array<string,string> $params 295ce42304aSGreg Roach * 296ce42304aSGreg Roach * @return bool 297ce42304aSGreg Roach */ 298ce42304aSGreg Roach public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool 299ce42304aSGreg Roach { 300ce42304aSGreg Roach return preg_match($this->createRegex($params), $record->gedcom()) === 1; 301ce42304aSGreg Roach } 302ce42304aSGreg Roach 303ce42304aSGreg Roach /** 304ce42304aSGreg Roach * Show the changes we would make 305ce42304aSGreg Roach * 306ce42304aSGreg Roach * @param GedcomRecord $record 307ce42304aSGreg Roach * @param array<string,string> $params 308ce42304aSGreg Roach * 309ce42304aSGreg Roach * @return string 310ce42304aSGreg Roach */ 311ce42304aSGreg Roach public function previewUpdate(GedcomRecord $record, array $params): string 312ce42304aSGreg Roach { 313ce42304aSGreg Roach $old = $record->gedcom(); 314ce42304aSGreg Roach $new = $this->updateGedcom($record, $params); 315ce42304aSGreg Roach 316ce42304aSGreg Roach return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new); 317ce42304aSGreg Roach } 318ce42304aSGreg Roach 319ce42304aSGreg Roach /** 320ce42304aSGreg Roach * Fix a record 321ce42304aSGreg Roach * 322ce42304aSGreg Roach * @param GedcomRecord $record 323ce42304aSGreg Roach * @param array<string,string> $params 324ce42304aSGreg Roach * 325ce42304aSGreg Roach * @return void 326ce42304aSGreg Roach */ 327ce42304aSGreg Roach public function updateRecord(GedcomRecord $record, array $params): void 328ce42304aSGreg Roach { 329ce42304aSGreg Roach $record->updateRecord($this->updateGedcom($record, $params), false); 330ce42304aSGreg Roach } 331ce42304aSGreg Roach 332ce42304aSGreg Roach /** 333ce42304aSGreg Roach * @param GedcomRecord $record 334ce42304aSGreg Roach * @param array<string,string> $params 335ce42304aSGreg Roach * 336ce42304aSGreg Roach * @return string 337ce42304aSGreg Roach */ 338ce42304aSGreg Roach private function updateGedcom(GedcomRecord $record, array $params): string 339ce42304aSGreg Roach { 340ce42304aSGreg Roach // Allow "\n" to indicate a line-feed in replacement text. 341ce42304aSGreg Roach // Back-references such as $1, $2 are handled automatically. 342*49d0de55SGreg Roach $replace = strtr($params['replace'], ['\n' => "\n"]); 343ce42304aSGreg Roach 344ce42304aSGreg Roach $regex = $this->createRegex($params); 345ce42304aSGreg Roach 346ce42304aSGreg Roach return preg_replace($regex, $replace, $record->gedcom()); 347ce42304aSGreg Roach } 348ce42304aSGreg Roach 349ce42304aSGreg Roach /** 350ce42304aSGreg Roach * Create a regular expression from the search pattern. 351ce42304aSGreg Roach * 352ce42304aSGreg Roach * @param array<string,string> $params 353ce42304aSGreg Roach * 354ce42304aSGreg Roach * @return string 355ce42304aSGreg Roach */ 356ce42304aSGreg Roach private function createRegex(array $params): string 357ce42304aSGreg Roach { 358ce42304aSGreg Roach $search = $params['search']; 359ce42304aSGreg Roach $method = $params['method']; 360ce42304aSGreg Roach $case = $params['case']; 361ce42304aSGreg Roach 362ce42304aSGreg Roach switch ($method) { 363ce42304aSGreg Roach case 'exact': 364ce42304aSGreg Roach return '/' . preg_quote($search, '/') . '/' . $case; 365ce42304aSGreg Roach 366ce42304aSGreg Roach case 'words': 367ce42304aSGreg Roach return '/\b' . preg_quote($search, '/') . '\b/' . $case; 368ce42304aSGreg Roach 369ce42304aSGreg Roach case 'wildcards': 370ce42304aSGreg Roach return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/' . $case; 371ce42304aSGreg Roach 372ce42304aSGreg Roach case 'regex': 373ce42304aSGreg Roach $regex = '/' . addcslashes($search, '/') . '/' . $case; 374ce42304aSGreg Roach 375ce42304aSGreg Roach try { 376ce42304aSGreg Roach // A valid regex on an empty string returns zero. 377ce42304aSGreg Roach // An invalid regex on an empty string returns false and throws a warning. 378ce42304aSGreg Roach preg_match($regex, ''); 379ce42304aSGreg Roach } catch (Throwable $ex) { 380ce42304aSGreg Roach $regex = self::INVALID_REGEX; 381ce42304aSGreg Roach } 382ce42304aSGreg Roach 383ce42304aSGreg Roach return $regex; 384ce42304aSGreg Roach } 385ce42304aSGreg Roach 386ce42304aSGreg Roach throw new HttpNotFoundException(); 387ce42304aSGreg Roach } 388ce42304aSGreg Roach 389ce42304aSGreg Roach /** 390ce42304aSGreg Roach * Create a regular expression from the search pattern. 391ce42304aSGreg Roach * 392ce42304aSGreg Roach * @param Builder $query 393ce42304aSGreg Roach * @param string $column 394ce42304aSGreg Roach * @param array<string,string> $params 395ce42304aSGreg Roach * 396ce42304aSGreg Roach * @return void 397ce42304aSGreg Roach */ 398ce42304aSGreg Roach private function recordQuery(Builder $query, string $column, array $params): void 399ce42304aSGreg Roach { 400ce42304aSGreg Roach $search = $params['search']; 401ce42304aSGreg Roach $method = $params['method']; 402ce42304aSGreg Roach $like = addcslashes($search, '_%\\'); 403ce42304aSGreg Roach 404ce42304aSGreg Roach switch ($method) { 405ce42304aSGreg Roach case 'exact': 406ce42304aSGreg Roach case 'words': 4072ab7d347SGreg Roach $query->where($column, 'LIKE', '%' . $like . '%'); 408ce42304aSGreg Roach break; 409ce42304aSGreg Roach 410ce42304aSGreg Roach case 'wildcards': 411ce42304aSGreg Roach $like = strtr($like, ['?' => '_', '*' => '%']); 4122ab7d347SGreg Roach $query->where($column, 'LIKE', '%' . $like . '%'); 413ce42304aSGreg Roach break; 414ce42304aSGreg Roach 415ce42304aSGreg Roach case 'regex': 416ce42304aSGreg Roach switch (DB::connection()->getDriverName()) { 417ce42304aSGreg Roach case 'sqlite': 418ce42304aSGreg Roach case 'mysql': 419ce42304aSGreg Roach $query->where($column, 'REGEXP', $search); 420ce42304aSGreg Roach break; 421ce42304aSGreg Roach 422ce42304aSGreg Roach case 'pgsql': 423ce42304aSGreg Roach $query->where($column, '~', $search); 424ce42304aSGreg Roach break; 425ce42304aSGreg Roach 426ce42304aSGreg Roach case 'sqlsvr': 427ce42304aSGreg Roach // Not available 428ce42304aSGreg Roach break; 429ce42304aSGreg Roach } 430ce42304aSGreg Roach break; 431ce42304aSGreg Roach } 432ce42304aSGreg Roach } 433ce42304aSGreg Roach} 434