. */ declare(strict_types=1); namespace Fisharebest\Webtrees\Module; use Fisharebest\Webtrees\DB; use Fisharebest\Webtrees\Family; use Fisharebest\Webtrees\GedcomRecord; use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; use Fisharebest\Webtrees\I18N; use Fisharebest\Webtrees\Individual; use Fisharebest\Webtrees\Location; use Fisharebest\Webtrees\Media; use Fisharebest\Webtrees\Note; use Fisharebest\Webtrees\Repository; use Fisharebest\Webtrees\Services\DataFixService; use Fisharebest\Webtrees\Source; use Fisharebest\Webtrees\Submitter; use Fisharebest\Webtrees\Tree; use Illuminate\Database\Query\Builder; use Illuminate\Support\Collection; use Throwable; use function addcslashes; use function asort; use function preg_match; use function preg_quote; use function preg_replace; use function view; /** * Class FixSearchAndReplace */ class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface { use ModuleDataFixTrait; // A regular expression that never matches. private const INVALID_REGEX = '/(?!)/'; private DataFixService $data_fix_service; /** * @param DataFixService $data_fix_service */ public function __construct(DataFixService $data_fix_service) { $this->data_fix_service = $data_fix_service; } /** * How should this module be identified in the control panel, etc.? * * @return string */ public function title(): string { /* I18N: Name of a module */ return I18N::translate('Search and replace'); } /** * A sentence describing what this module does. * * @return string */ public function description(): string { /* I18N: Description of a “Data fix” module */ return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.'); } /** * Options form. * * @param Tree $tree * * @return string */ public function fixOptions(Tree $tree): string { $methods = [ 'exact' => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'), 'words' => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'), 'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'), /* I18N: https://en.wikipedia.org/wiki/Regular_expression */ 'regex' => I18N::translate('Regular expression'), ]; $types = [ Family::RECORD_TYPE => I18N::translate('Families'), Individual::RECORD_TYPE => I18N::translate('Individuals'), Location::RECORD_TYPE => I18N::translate('Locations'), Media::RECORD_TYPE => I18N::translate('Media objects'), Note::RECORD_TYPE => I18N::translate('Notes'), Repository::RECORD_TYPE => I18N::translate('Repositories'), Source::RECORD_TYPE => I18N::translate('Sources'), Submitter::RECORD_TYPE => I18N::translate('Submitters'), ]; asort($types); return view('modules/fix-search-and-replace/options', [ 'default_method' => 'exact', 'default_type' => Individual::RECORD_TYPE, 'methods' => $methods, 'types' => $types, ]); } /** * A list of all records that need examining. This may include records * that do not need updating, if we can't detect this quickly using SQL. * * @param Tree $tree * @param array $params * * @return Collection|null */ protected function familiesToFix(Tree $tree, array $params): Collection|null { if ($params['type'] !== Family::RECORD_TYPE || $params['search-for'] === '') { return null; } $query = DB::table('families')->where('f_file', '=', $tree->id()); $this->recordQuery($query, 'f_gedcom', $params); return $query->pluck('f_id'); } /** * A list of all records that need examining. This may include records * that do not need updating, if we can't detect this quickly using SQL. * * @param Tree $tree * @param array $params * * @return Collection|null */ protected function individualsToFix(Tree $tree, array $params): Collection|null { if ($params['type'] !== Individual::RECORD_TYPE || $params['search-for'] === '') { return null; } $query = DB::table('individuals') ->where('i_file', '=', $tree->id()); $this->recordQuery($query, 'i_gedcom', $params); return $query->pluck('i_id'); } /** * A list of all records that need examining. This may include records * that do not need updating, if we can't detect this quickly using SQL. * * @param Tree $tree * @param array $params * * @return Collection|null */ protected function locationsToFix(Tree $tree, array $params): Collection|null { if ($params['type'] !== Location::RECORD_TYPE || $params['search-for'] === '') { return null; } $query = DB::table('other') ->where('o_file', '=', $tree->id()) ->where('o_type', '=', Location::RECORD_TYPE); $this->recordQuery($query, 'o_gedcom', $params); return $query->pluck('o_id'); } /** * A list of all records that need examining. This may include records * that do not need updating, if we can't detect this quickly using SQL. * * @param Tree $tree * @param array $params * * @return Collection|null */ protected function mediaToFix(Tree $tree, array $params): Collection|null { if ($params['type'] !== Media::RECORD_TYPE || $params['search-for'] === '') { return null; } $query = DB::table('media') ->where('m_file', '=', $tree->id()); $this->recordQuery($query, 'm_gedcom', $params); return $query->pluck('m_id'); } /** * A list of all records that need examining. This may include records * that do not need updating, if we can't detect this quickly using SQL. * * @param Tree $tree * @param array $params * * @return Collection|null */ protected function notesToFix(Tree $tree, array $params): Collection|null { if ($params['type'] !== Note::RECORD_TYPE || $params['search-for'] === '') { return null; } $query = DB::table('other') ->where('o_file', '=', $tree->id()) ->where('o_type', '=', Note::RECORD_TYPE); $this->recordQuery($query, 'o_gedcom', $params); return $query->pluck('o_id'); } /** * A list of all records that need examining. This may include records * that do not need updating, if we can't detect this quickly using SQL. * * @param Tree $tree * @param array $params * * @return Collection|null */ protected function repositoriesToFix(Tree $tree, array $params): Collection|null { if ($params['type'] !== Repository::RECORD_TYPE || $params['search-for'] === '') { return null; } $query = DB::table('other') ->where('o_file', '=', $tree->id()) ->where('o_type', '=', Repository::RECORD_TYPE); $this->recordQuery($query, 'o_gedcom', $params); return $query->pluck('o_id'); } /** * A list of all records that need examining. This may include records * that do not need updating, if we can't detect this quickly using SQL. * * @param Tree $tree * @param array $params * * @return Collection|null */ protected function sourcesToFix(Tree $tree, array $params): Collection|null { if ($params['type'] !== Source::RECORD_TYPE || $params['search-for'] === '') { return null; } $query = $this->sourcesToFixQuery($tree, $params); $this->recordQuery($query, 's_gedcom', $params); return $query->pluck('s_id'); } /** * A list of all records that need examining. This may include records * that do not need updating, if we can't detect this quickly using SQL. * * @param Tree $tree * @param array $params * * @return Collection|null */ protected function submittersToFix(Tree $tree, array $params): Collection|null { if ($params['type'] !== Submitter::RECORD_TYPE || $params['search-for'] === '') { return null; } $query = $this->submittersToFixQuery($tree, $params); $this->recordQuery($query, 'o_gedcom', $params); return $query->pluck('o_id'); } /** * Does a record need updating? * * @param GedcomRecord $record * @param array $params * * @return bool */ public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool { return preg_match($this->createRegex($params), $record->gedcom()) === 1; } /** * Show the changes we would make * * @param GedcomRecord $record * @param array $params * * @return string */ public function previewUpdate(GedcomRecord $record, array $params): string { $old = $record->gedcom(); $new = $this->updateGedcom($record, $params); return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new); } /** * Fix a record * * @param GedcomRecord $record * @param array $params * * @return void */ public function updateRecord(GedcomRecord $record, array $params): void { $record->updateRecord($this->updateGedcom($record, $params), false); } /** * @param GedcomRecord $record * @param array $params * * @return string */ private function updateGedcom(GedcomRecord $record, array $params): string { // Allow "\n" to indicate a line-feed in replacement text. // Back-references such as $1, $2 are handled automatically. $replace = strtr($params['replace-with'], ['\n' => "\n"]); $regex = $this->createRegex($params); return preg_replace($regex, $replace, $record->gedcom()); } /** * Create a regular expression from the search pattern. * * @param array $params * * @return string */ private function createRegex(array $params): string { $search = $params['search-for']; $method = $params['method']; $case = $params['case']; switch ($method) { case 'exact': return '/' . preg_quote($search, '/') . '/u' . $case; case 'words': return '/\b' . preg_quote($search, '/') . '\b/u' . $case; case 'wildcards': return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case; case 'regex': $regex = '/' . addcslashes($search, '/') . '/u' . $case; try { // A valid regex on an empty string returns zero. // An invalid regex on an empty string returns false and throws a warning. preg_match($regex, ''); } catch (Throwable) { $regex = self::INVALID_REGEX; } return $regex; } throw new HttpNotFoundException(); } /** * Create a regular expression from the search pattern. * * @param Builder $query * @param string $column * @param array $params * * @return void */ private function recordQuery(Builder $query, string $column, array $params): void { $search = $params['search-for']; $method = $params['method']; $like = '%' . addcslashes($search, '\\%_') . '%'; switch ($method) { case 'exact': case 'words': $query->where($column, 'LIKE', $like); break; case 'wildcards': $like = strtr($like, ['?' => '_', '*' => '%']); $query->where($column, 'LIKE', $like); break; case 'regex': // Substituting newlines seems to be necessary on *some* versions // of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0). $search = strtr($search, ['\n' => "\n"]); switch (DB::driverName()) { case 'sqlite': case 'mysql': $query->where($column, 'REGEXP', $search); break; case 'pgsql': $query->where($column, '~', $search); break; case 'sqlsrv': // Not available break; } break; } } }