xref: /webtrees/app/Module/FixSearchAndReplace.php (revision 5f52b64188c4f443eebeb985bb53134753a7560f)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fisharebest\Webtrees\DB;
23use Fisharebest\Webtrees\Family;
24use Fisharebest\Webtrees\GedcomRecord;
25use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
26use Fisharebest\Webtrees\I18N;
27use Fisharebest\Webtrees\Individual;
28use Fisharebest\Webtrees\Location;
29use Fisharebest\Webtrees\Media;
30use Fisharebest\Webtrees\Note;
31use Fisharebest\Webtrees\Repository;
32use Fisharebest\Webtrees\Services\DataFixService;
33use Fisharebest\Webtrees\Source;
34use Fisharebest\Webtrees\Submitter;
35use Fisharebest\Webtrees\Tree;
36use Illuminate\Database\Query\Builder;
37use Illuminate\Support\Collection;
38use Throwable;
39
40use function addcslashes;
41use function asort;
42use function preg_match;
43use function preg_quote;
44use function preg_replace;
45use function view;
46
47/**
48 * Class FixSearchAndReplace
49 */
50class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface
51{
52    use ModuleDataFixTrait;
53
54    // A regular expression that never matches.
55    private const INVALID_REGEX = '/(?!)/';
56
57    private DataFixService $data_fix_service;
58
59    /**
60     * @param DataFixService $data_fix_service
61     */
62    public function __construct(DataFixService $data_fix_service)
63    {
64        $this->data_fix_service = $data_fix_service;
65    }
66
67    /**
68     * How should this module be identified in the control panel, etc.?
69     *
70     * @return string
71     */
72    public function title(): string
73    {
74        /* I18N: Name of a module */
75        return I18N::translate('Search and replace');
76    }
77
78    public function description(): string
79    {
80        /* I18N: Description of a “Data fix” module */
81        return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.');
82    }
83
84    /**
85     * Options form.
86     *
87     * @param Tree $tree
88     *
89     * @return string
90     */
91    public function fixOptions(Tree $tree): string
92    {
93        $methods = [
94            'exact'     => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'),
95            'words'     => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'),
96            'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'),
97            /* I18N: https://en.wikipedia.org/wiki/Regular_expression */
98            'regex'     => I18N::translate('Regular expression'),
99        ];
100
101        $types = [
102            Family::RECORD_TYPE     => I18N::translate('Families'),
103            Individual::RECORD_TYPE => I18N::translate('Individuals'),
104            Location::RECORD_TYPE   => I18N::translate('Locations'),
105            Media::RECORD_TYPE      => I18N::translate('Media objects'),
106            Note::RECORD_TYPE       => I18N::translate('Notes'),
107            Repository::RECORD_TYPE => I18N::translate('Repositories'),
108            Source::RECORD_TYPE     => I18N::translate('Sources'),
109            Submitter::RECORD_TYPE  => I18N::translate('Submitters'),
110        ];
111
112        asort($types);
113
114        return view('modules/fix-search-and-replace/options', [
115            'default_method' => 'exact',
116            'default_type'   => Individual::RECORD_TYPE,
117            'methods'        => $methods,
118            'types'          => $types,
119        ]);
120    }
121
122    /**
123     * A list of all records that need examining.  This may include records
124     * that do not need updating, if we can't detect this quickly using SQL.
125     *
126     * @param Tree                 $tree
127     * @param array<string,string> $params
128     *
129     * @return Collection<int,string>|null
130     */
131    protected function familiesToFix(Tree $tree, array $params): Collection|null
132    {
133        if ($params['type'] !== Family::RECORD_TYPE || $params['search-for'] === '') {
134            return null;
135        }
136
137        $query = DB::table('families')->where('f_file', '=', $tree->id());
138        $this->recordQuery($query, 'f_gedcom', $params);
139
140        return $query->pluck('f_id');
141    }
142
143    /**
144     * A list of all records that need examining.  This may include records
145     * that do not need updating, if we can't detect this quickly using SQL.
146     *
147     * @param Tree                 $tree
148     * @param array<string,string> $params
149     *
150     * @return Collection<int,string>|null
151     */
152    protected function individualsToFix(Tree $tree, array $params): Collection|null
153    {
154        if ($params['type'] !== Individual::RECORD_TYPE || $params['search-for'] === '') {
155            return null;
156        }
157
158        $query = DB::table('individuals')
159            ->where('i_file', '=', $tree->id());
160
161        $this->recordQuery($query, 'i_gedcom', $params);
162
163        return $query->pluck('i_id');
164    }
165
166    /**
167     * A list of all records that need examining.  This may include records
168     * that do not need updating, if we can't detect this quickly using SQL.
169     *
170     * @param Tree                 $tree
171     * @param array<string,string> $params
172     *
173     * @return Collection<int,string>|null
174     */
175    protected function locationsToFix(Tree $tree, array $params): Collection|null
176    {
177        if ($params['type'] !== Location::RECORD_TYPE || $params['search-for'] === '') {
178            return null;
179        }
180
181        $query = DB::table('other')
182            ->where('o_file', '=', $tree->id())
183            ->where('o_type', '=', Location::RECORD_TYPE);
184
185        $this->recordQuery($query, 'o_gedcom', $params);
186
187        return $query->pluck('o_id');
188    }
189
190    /**
191     * A list of all records that need examining.  This may include records
192     * that do not need updating, if we can't detect this quickly using SQL.
193     *
194     * @param Tree                 $tree
195     * @param array<string,string> $params
196     *
197     * @return Collection<int,string>|null
198     */
199    protected function mediaToFix(Tree $tree, array $params): Collection|null
200    {
201        if ($params['type'] !== Media::RECORD_TYPE || $params['search-for'] === '') {
202            return null;
203        }
204
205        $query = DB::table('media')
206            ->where('m_file', '=', $tree->id());
207
208        $this->recordQuery($query, 'm_gedcom', $params);
209
210        return $query->pluck('m_id');
211    }
212
213    /**
214     * A list of all records that need examining.  This may include records
215     * that do not need updating, if we can't detect this quickly using SQL.
216     *
217     * @param Tree                 $tree
218     * @param array<string,string> $params
219     *
220     * @return Collection<int,string>|null
221     */
222    protected function notesToFix(Tree $tree, array $params): Collection|null
223    {
224        if ($params['type'] !== Note::RECORD_TYPE || $params['search-for'] === '') {
225            return null;
226        }
227
228        $query = DB::table('other')
229            ->where('o_file', '=', $tree->id())
230            ->where('o_type', '=', Note::RECORD_TYPE);
231
232        $this->recordQuery($query, 'o_gedcom', $params);
233
234        return $query->pluck('o_id');
235    }
236
237    /**
238     * A list of all records that need examining.  This may include records
239     * that do not need updating, if we can't detect this quickly using SQL.
240     *
241     * @param Tree                 $tree
242     * @param array<string,string> $params
243     *
244     * @return Collection<int,string>|null
245     */
246    protected function repositoriesToFix(Tree $tree, array $params): Collection|null
247    {
248        if ($params['type'] !== Repository::RECORD_TYPE || $params['search-for'] === '') {
249            return null;
250        }
251
252        $query = DB::table('other')
253            ->where('o_file', '=', $tree->id())
254            ->where('o_type', '=', Repository::RECORD_TYPE);
255
256        $this->recordQuery($query, 'o_gedcom', $params);
257
258        return $query->pluck('o_id');
259    }
260
261    /**
262     * A list of all records that need examining.  This may include records
263     * that do not need updating, if we can't detect this quickly using SQL.
264     *
265     * @param Tree                 $tree
266     * @param array<string,string> $params
267     *
268     * @return Collection<int,string>|null
269     */
270    protected function sourcesToFix(Tree $tree, array $params): Collection|null
271    {
272        if ($params['type'] !== Source::RECORD_TYPE || $params['search-for'] === '') {
273            return null;
274        }
275
276        $query = $this->sourcesToFixQuery($tree, $params);
277
278        $this->recordQuery($query, 's_gedcom', $params);
279
280        return $query->pluck('s_id');
281    }
282
283    /**
284     * A list of all records that need examining.  This may include records
285     * that do not need updating, if we can't detect this quickly using SQL.
286     *
287     * @param Tree                 $tree
288     * @param array<string,string> $params
289     *
290     * @return Collection<int,string>|null
291     */
292    protected function submittersToFix(Tree $tree, array $params): Collection|null
293    {
294        if ($params['type'] !== Submitter::RECORD_TYPE || $params['search-for'] === '') {
295            return null;
296        }
297
298        $query = $this->submittersToFixQuery($tree, $params);
299
300        $this->recordQuery($query, 'o_gedcom', $params);
301
302        return $query->pluck('o_id');
303    }
304
305    /**
306     * Does a record need updating?
307     *
308     * @param GedcomRecord         $record
309     * @param array<string,string> $params
310     *
311     * @return bool
312     */
313    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
314    {
315        return preg_match($this->createRegex($params), $record->gedcom()) === 1;
316    }
317
318    /**
319     * Show the changes we would make
320     *
321     * @param GedcomRecord         $record
322     * @param array<string,string> $params
323     *
324     * @return string
325     */
326    public function previewUpdate(GedcomRecord $record, array $params): string
327    {
328        $old = $record->gedcom();
329        $new = $this->updateGedcom($record, $params);
330
331        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
332    }
333
334    /**
335     * Fix a record
336     *
337     * @param GedcomRecord         $record
338     * @param array<string,string> $params
339     *
340     * @return void
341     */
342    public function updateRecord(GedcomRecord $record, array $params): void
343    {
344        $record->updateRecord($this->updateGedcom($record, $params), false);
345    }
346
347    /**
348     * @param GedcomRecord         $record
349     * @param array<string,string> $params
350     *
351     * @return string
352     */
353    private function updateGedcom(GedcomRecord $record, array $params): string
354    {
355        // Allow "\n" to indicate a line-feed in replacement text.
356        // Back-references such as $1, $2 are handled automatically.
357        $replace = strtr($params['replace-with'], ['\n' => "\n"]);
358
359        $regex = $this->createRegex($params);
360
361        return preg_replace($regex, $replace, $record->gedcom());
362    }
363
364    /**
365     * Create a regular expression from the search pattern.
366     *
367     * @param array<string,string> $params
368     *
369     * @return string
370     */
371    private function createRegex(array $params): string
372    {
373        $search = $params['search-for'];
374        $method = $params['method'];
375        $case   = $params['case'];
376
377        switch ($method) {
378            case 'exact':
379                return '/' . preg_quote($search, '/') . '/u' . $case;
380
381            case 'words':
382                return '/\b' . preg_quote($search, '/') . '\b/u' . $case;
383
384            case 'wildcards':
385                return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case;
386
387            case 'regex':
388                $regex = '/' . addcslashes($search, '/') . '/u' . $case;
389
390                try {
391                    // A valid regex on an empty string returns zero.
392                    // An invalid regex on an empty string returns false and throws a warning.
393                    preg_match($regex, '');
394                } catch (Throwable) {
395                    $regex = self::INVALID_REGEX;
396                }
397
398                return $regex;
399        }
400
401        throw new HttpNotFoundException();
402    }
403
404    /**
405     * Create a regular expression from the search pattern.
406     *
407     * @param Builder              $query
408     * @param string               $column
409     * @param array<string,string> $params
410     *
411     * @return void
412     */
413    private function recordQuery(Builder $query, string $column, array $params): void
414    {
415        $search = $params['search-for'];
416        $method = $params['method'];
417        $like   = '%' . addcslashes($search, '\\%_') . '%';
418
419        switch ($method) {
420            case 'exact':
421            case 'words':
422                $query->where($column, 'LIKE', $like);
423                break;
424
425            case 'wildcards':
426                $like = strtr($like, ['?' => '_', '*' => '%']);
427                $query->where($column, 'LIKE', $like);
428                break;
429
430            case 'regex':
431                // Substituting newlines seems to be necessary on *some* versions
432                // of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0).
433                $search = strtr($search, ['\n' => "\n"]);
434
435                $query->where($column, DB::regexOperator(), $search);
436                break;
437        }
438    }
439}
440