xref: /webtrees/app/Module/FixSearchAndReplace.php (revision 18fd0859d876caec952296f28638ed0844e10712)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fisharebest\Webtrees\DB;
23use Fisharebest\Webtrees\Family;
24use Fisharebest\Webtrees\GedcomRecord;
25use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
26use Fisharebest\Webtrees\I18N;
27use Fisharebest\Webtrees\Individual;
28use Fisharebest\Webtrees\Location;
29use Fisharebest\Webtrees\Media;
30use Fisharebest\Webtrees\Note;
31use Fisharebest\Webtrees\Repository;
32use Fisharebest\Webtrees\Services\DataFixService;
33use Fisharebest\Webtrees\Source;
34use Fisharebest\Webtrees\Submitter;
35use Fisharebest\Webtrees\Tree;
36use Illuminate\Database\Query\Builder;
37use Illuminate\Support\Collection;
38use Throwable;
39
40use function addcslashes;
41use function asort;
42use function preg_match;
43use function preg_quote;
44use function preg_replace;
45use function view;
46
47/**
48 * Class FixSearchAndReplace
49 */
50class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface
51{
52    use ModuleDataFixTrait;
53
54    // A regular expression that never matches.
55    private const INVALID_REGEX = '/(?!)/';
56
57    private DataFixService $data_fix_service;
58
59    /**
60     * @param DataFixService $data_fix_service
61     */
62    public function __construct(DataFixService $data_fix_service)
63    {
64        $this->data_fix_service = $data_fix_service;
65    }
66
67    /**
68     * How should this module be identified in the control panel, etc.?
69     *
70     * @return string
71     */
72    public function title(): string
73    {
74        /* I18N: Name of a module */
75        return I18N::translate('Search and replace');
76    }
77
78    /**
79     * A sentence describing what this module does.
80     *
81     * @return string
82     */
83    public function description(): string
84    {
85        /* I18N: Description of a “Data fix” module */
86        return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.');
87    }
88
89    /**
90     * Options form.
91     *
92     * @param Tree $tree
93     *
94     * @return string
95     */
96    public function fixOptions(Tree $tree): string
97    {
98        $methods = [
99            'exact'     => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'),
100            'words'     => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'),
101            'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'),
102            /* I18N: https://en.wikipedia.org/wiki/Regular_expression */
103            'regex'     => I18N::translate('Regular expression'),
104        ];
105
106        $types = [
107            Family::RECORD_TYPE     => I18N::translate('Families'),
108            Individual::RECORD_TYPE => I18N::translate('Individuals'),
109            Location::RECORD_TYPE   => I18N::translate('Locations'),
110            Media::RECORD_TYPE      => I18N::translate('Media objects'),
111            Note::RECORD_TYPE       => I18N::translate('Notes'),
112            Repository::RECORD_TYPE => I18N::translate('Repositories'),
113            Source::RECORD_TYPE     => I18N::translate('Sources'),
114            Submitter::RECORD_TYPE  => I18N::translate('Submitters'),
115        ];
116
117        asort($types);
118
119        return view('modules/fix-search-and-replace/options', [
120            'default_method' => 'exact',
121            'default_type'   => Individual::RECORD_TYPE,
122            'methods'        => $methods,
123            'types'          => $types,
124        ]);
125    }
126
127    /**
128     * A list of all records that need examining.  This may include records
129     * that do not need updating, if we can't detect this quickly using SQL.
130     *
131     * @param Tree                 $tree
132     * @param array<string,string> $params
133     *
134     * @return Collection<int,string>|null
135     */
136    protected function familiesToFix(Tree $tree, array $params): Collection|null
137    {
138        if ($params['type'] !== Family::RECORD_TYPE || $params['search-for'] === '') {
139            return null;
140        }
141
142        $query = DB::table('families')->where('f_file', '=', $tree->id());
143        $this->recordQuery($query, 'f_gedcom', $params);
144
145        return $query->pluck('f_id');
146    }
147
148    /**
149     * A list of all records that need examining.  This may include records
150     * that do not need updating, if we can't detect this quickly using SQL.
151     *
152     * @param Tree                 $tree
153     * @param array<string,string> $params
154     *
155     * @return Collection<int,string>|null
156     */
157    protected function individualsToFix(Tree $tree, array $params): Collection|null
158    {
159        if ($params['type'] !== Individual::RECORD_TYPE || $params['search-for'] === '') {
160            return null;
161        }
162
163        $query = DB::table('individuals')
164            ->where('i_file', '=', $tree->id());
165
166        $this->recordQuery($query, 'i_gedcom', $params);
167
168        return $query->pluck('i_id');
169    }
170
171    /**
172     * A list of all records that need examining.  This may include records
173     * that do not need updating, if we can't detect this quickly using SQL.
174     *
175     * @param Tree                 $tree
176     * @param array<string,string> $params
177     *
178     * @return Collection<int,string>|null
179     */
180    protected function locationsToFix(Tree $tree, array $params): Collection|null
181    {
182        if ($params['type'] !== Location::RECORD_TYPE || $params['search-for'] === '') {
183            return null;
184        }
185
186        $query = DB::table('other')
187            ->where('o_file', '=', $tree->id())
188            ->where('o_type', '=', Location::RECORD_TYPE);
189
190        $this->recordQuery($query, 'o_gedcom', $params);
191
192        return $query->pluck('o_id');
193    }
194
195    /**
196     * A list of all records that need examining.  This may include records
197     * that do not need updating, if we can't detect this quickly using SQL.
198     *
199     * @param Tree                 $tree
200     * @param array<string,string> $params
201     *
202     * @return Collection<int,string>|null
203     */
204    protected function mediaToFix(Tree $tree, array $params): Collection|null
205    {
206        if ($params['type'] !== Media::RECORD_TYPE || $params['search-for'] === '') {
207            return null;
208        }
209
210        $query = DB::table('media')
211            ->where('m_file', '=', $tree->id());
212
213        $this->recordQuery($query, 'm_gedcom', $params);
214
215        return $query->pluck('m_id');
216    }
217
218    /**
219     * A list of all records that need examining.  This may include records
220     * that do not need updating, if we can't detect this quickly using SQL.
221     *
222     * @param Tree                 $tree
223     * @param array<string,string> $params
224     *
225     * @return Collection<int,string>|null
226     */
227    protected function notesToFix(Tree $tree, array $params): Collection|null
228    {
229        if ($params['type'] !== Note::RECORD_TYPE || $params['search-for'] === '') {
230            return null;
231        }
232
233        $query = DB::table('other')
234            ->where('o_file', '=', $tree->id())
235            ->where('o_type', '=', Note::RECORD_TYPE);
236
237        $this->recordQuery($query, 'o_gedcom', $params);
238
239        return $query->pluck('o_id');
240    }
241
242    /**
243     * A list of all records that need examining.  This may include records
244     * that do not need updating, if we can't detect this quickly using SQL.
245     *
246     * @param Tree                 $tree
247     * @param array<string,string> $params
248     *
249     * @return Collection<int,string>|null
250     */
251    protected function repositoriesToFix(Tree $tree, array $params): Collection|null
252    {
253        if ($params['type'] !== Repository::RECORD_TYPE || $params['search-for'] === '') {
254            return null;
255        }
256
257        $query = DB::table('other')
258            ->where('o_file', '=', $tree->id())
259            ->where('o_type', '=', Repository::RECORD_TYPE);
260
261        $this->recordQuery($query, 'o_gedcom', $params);
262
263        return $query->pluck('o_id');
264    }
265
266    /**
267     * A list of all records that need examining.  This may include records
268     * that do not need updating, if we can't detect this quickly using SQL.
269     *
270     * @param Tree                 $tree
271     * @param array<string,string> $params
272     *
273     * @return Collection<int,string>|null
274     */
275    protected function sourcesToFix(Tree $tree, array $params): Collection|null
276    {
277        if ($params['type'] !== Source::RECORD_TYPE || $params['search-for'] === '') {
278            return null;
279        }
280
281        $query = $this->sourcesToFixQuery($tree, $params);
282
283        $this->recordQuery($query, 's_gedcom', $params);
284
285        return $query->pluck('s_id');
286    }
287
288    /**
289     * A list of all records that need examining.  This may include records
290     * that do not need updating, if we can't detect this quickly using SQL.
291     *
292     * @param Tree                 $tree
293     * @param array<string,string> $params
294     *
295     * @return Collection<int,string>|null
296     */
297    protected function submittersToFix(Tree $tree, array $params): Collection|null
298    {
299        if ($params['type'] !== Submitter::RECORD_TYPE || $params['search-for'] === '') {
300            return null;
301        }
302
303        $query = $this->submittersToFixQuery($tree, $params);
304
305        $this->recordQuery($query, 'o_gedcom', $params);
306
307        return $query->pluck('o_id');
308    }
309
310    /**
311     * Does a record need updating?
312     *
313     * @param GedcomRecord         $record
314     * @param array<string,string> $params
315     *
316     * @return bool
317     */
318    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
319    {
320        return preg_match($this->createRegex($params), $record->gedcom()) === 1;
321    }
322
323    /**
324     * Show the changes we would make
325     *
326     * @param GedcomRecord         $record
327     * @param array<string,string> $params
328     *
329     * @return string
330     */
331    public function previewUpdate(GedcomRecord $record, array $params): string
332    {
333        $old = $record->gedcom();
334        $new = $this->updateGedcom($record, $params);
335
336        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
337    }
338
339    /**
340     * Fix a record
341     *
342     * @param GedcomRecord         $record
343     * @param array<string,string> $params
344     *
345     * @return void
346     */
347    public function updateRecord(GedcomRecord $record, array $params): void
348    {
349        $record->updateRecord($this->updateGedcom($record, $params), false);
350    }
351
352    /**
353     * @param GedcomRecord         $record
354     * @param array<string,string> $params
355     *
356     * @return string
357     */
358    private function updateGedcom(GedcomRecord $record, array $params): string
359    {
360        // Allow "\n" to indicate a line-feed in replacement text.
361        // Back-references such as $1, $2 are handled automatically.
362        $replace = strtr($params['replace-with'], ['\n' => "\n"]);
363
364        $regex = $this->createRegex($params);
365
366        return preg_replace($regex, $replace, $record->gedcom());
367    }
368
369    /**
370     * Create a regular expression from the search pattern.
371     *
372     * @param array<string,string> $params
373     *
374     * @return string
375     */
376    private function createRegex(array $params): string
377    {
378        $search = $params['search-for'];
379        $method = $params['method'];
380        $case   = $params['case'];
381
382        switch ($method) {
383            case 'exact':
384                return '/' . preg_quote($search, '/') . '/u' . $case;
385
386            case 'words':
387                return '/\b' . preg_quote($search, '/') . '\b/u' . $case;
388
389            case 'wildcards':
390                return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case;
391
392            case 'regex':
393                $regex = '/' . addcslashes($search, '/') . '/u' . $case;
394
395                try {
396                    // A valid regex on an empty string returns zero.
397                    // An invalid regex on an empty string returns false and throws a warning.
398                    preg_match($regex, '');
399                } catch (Throwable) {
400                    $regex = self::INVALID_REGEX;
401                }
402
403                return $regex;
404        }
405
406        throw new HttpNotFoundException();
407    }
408
409    /**
410     * Create a regular expression from the search pattern.
411     *
412     * @param Builder              $query
413     * @param string               $column
414     * @param array<string,string> $params
415     *
416     * @return void
417     */
418    private function recordQuery(Builder $query, string $column, array $params): void
419    {
420        $search = $params['search-for'];
421        $method = $params['method'];
422        $like   = '%' . addcslashes($search, '\\%_') . '%';
423
424        switch ($method) {
425            case 'exact':
426            case 'words':
427                $query->where($column, 'LIKE', $like);
428                break;
429
430            case 'wildcards':
431                $like = strtr($like, ['?' => '_', '*' => '%']);
432                $query->where($column, 'LIKE', $like);
433                break;
434
435            case 'regex':
436                // Substituting newlines seems to be necessary on *some* versions
437                // of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0).
438                $search = strtr($search, ['\n' => "\n"]);
439
440                $query->where($column, DB::regexOperator(), $search);
441                break;
442        }
443    }
444}
445