xref: /webtrees/app/Module/FixSearchAndReplace.php (revision 2ebcf907ed34213f816592af04e6c160335d6311)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fisharebest\Webtrees\Family;
23use Fisharebest\Webtrees\GedcomRecord;
24use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
25use Fisharebest\Webtrees\I18N;
26use Fisharebest\Webtrees\Individual;
27use Fisharebest\Webtrees\Location;
28use Fisharebest\Webtrees\Media;
29use Fisharebest\Webtrees\Note;
30use Fisharebest\Webtrees\Repository;
31use Fisharebest\Webtrees\Services\DataFixService;
32use Fisharebest\Webtrees\Source;
33use Fisharebest\Webtrees\Submitter;
34use Fisharebest\Webtrees\Tree;
35use Illuminate\Database\Capsule\Manager as DB;
36use Illuminate\Database\Query\Builder;
37use Illuminate\Support\Collection;
38use Throwable;
39
40use function addcslashes;
41use function asort;
42use function preg_match;
43use function preg_quote;
44use function preg_replace;
45use function view;
46
47/**
48 * Class FixSearchAndReplace
49 */
50class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface
51{
52    use ModuleDataFixTrait;
53
54    // A regular expression that never matches.
55    private const INVALID_REGEX = '/(?!)/';
56
57    /** @var DataFixService */
58    private $data_fix_service;
59
60    /**
61     * FixMissingDeaths constructor.
62     *
63     * @param DataFixService $data_fix_service
64     */
65    public function __construct(DataFixService $data_fix_service)
66    {
67        $this->data_fix_service = $data_fix_service;
68    }
69
70    /**
71     * How should this module be identified in the control panel, etc.?
72     *
73     * @return string
74     */
75    public function title(): string
76    {
77        /* I18N: Name of a module */
78        return I18N::translate('Search and replace');
79    }
80
81    /**
82     * A sentence describing what this module does.
83     *
84     * @return string
85     */
86    public function description(): string
87    {
88        /* I18N: Description of a “Data fix” module */
89        return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.');
90    }
91
92    /**
93     * Options form.
94     *
95     * @param Tree $tree
96     *
97     * @return string
98     */
99    public function fixOptions(Tree $tree): string
100    {
101        $methods = [
102            'exact'     => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'),
103            'words'     => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'),
104            'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'),
105            /* I18N: https://en.wikipedia.org/wiki/Regular_expression */
106            'regex'     => I18N::translate('Regular expression'),
107        ];
108
109        $types = [
110            Family::RECORD_TYPE     => I18N::translate('Families'),
111            Individual::RECORD_TYPE => I18N::translate('Individuals'),
112            Location::RECORD_TYPE   => I18N::translate('Locations'),
113            Media::RECORD_TYPE      => I18N::translate('Media objects'),
114            Note::RECORD_TYPE       => I18N::translate('Notes'),
115            Repository::RECORD_TYPE => I18N::translate('Repositories'),
116            Source::RECORD_TYPE     => I18N::translate('Sources'),
117            Submitter::RECORD_TYPE  => I18N::translate('Submitters'),
118        ];
119
120        asort($types);
121
122        return view('modules/fix-search-and-replace/options', [
123            'default_method' => 'exact',
124            'default_type'   => Individual::RECORD_TYPE,
125            'methods'        => $methods,
126            'types'          => $types,
127        ]);
128    }
129
130    /**
131     * A list of all records that need examining.  This may include records
132     * that do not need updating, if we can't detect this quickly using SQL.
133     *
134     * @param Tree                 $tree
135     * @param array<string,string> $params
136     *
137     * @return Collection<string>|null
138     */
139    protected function familiesToFix(Tree $tree, array $params): ?Collection
140    {
141        if ($params['type'] !== Family::RECORD_TYPE || $params['search'] === '') {
142            return null;
143        }
144
145        $query = DB::table('families')->where('f_file', '=', $tree->id());
146        $this->recordQuery($query, 'f_gedcom', $params);
147
148        return $query->pluck('f_id');
149    }
150
151    /**
152     * A list of all records that need examining.  This may include records
153     * that do not need updating, if we can't detect this quickly using SQL.
154     *
155     * @param Tree                 $tree
156     * @param array<string,string> $params
157     *
158     * @return Collection<string>|null
159     */
160    protected function individualsToFix(Tree $tree, array $params): ?Collection
161    {
162        if ($params['type'] !== Individual::RECORD_TYPE || $params['search'] === '') {
163            return null;
164        }
165
166        $query = DB::table('individuals')
167            ->where('i_file', '=', $tree->id());
168
169        $this->recordQuery($query, 'i_gedcom', $params);
170
171        return $query->pluck('i_id');
172    }
173
174    /**
175     * A list of all records that need examining.  This may include records
176     * that do not need updating, if we can't detect this quickly using SQL.
177     *
178     * @param Tree                 $tree
179     * @param array<string,string> $params
180     *
181     * @return Collection<string>|null
182     */
183    protected function locationsToFix(Tree $tree, array $params): ?Collection
184    {
185        if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') {
186            return null;
187        }
188
189        $query = DB::table('other')
190            ->where('o_file', '=', $tree->id())
191            ->where('o_type', '=', Location::RECORD_TYPE);
192
193        $this->recordQuery($query, 'o_gedcom', $params);
194
195        return $query->pluck('o_id');
196    }
197
198    /**
199     * A list of all records that need examining.  This may include records
200     * that do not need updating, if we can't detect this quickly using SQL.
201     *
202     * @param Tree                 $tree
203     * @param array<string,string> $params
204     *
205     * @return Collection<string>|null
206     */
207    protected function mediaToFix(Tree $tree, array $params): ?Collection
208    {
209        if ($params['type'] !== Media::RECORD_TYPE || $params['search'] === '') {
210            return null;
211        }
212
213        $query = DB::table('media')
214            ->where('m_file', '=', $tree->id());
215
216        $this->recordQuery($query, 'm_gedcom', $params);
217
218        return $query->pluck('m_id');
219    }
220
221    /**
222     * A list of all records that need examining.  This may include records
223     * that do not need updating, if we can't detect this quickly using SQL.
224     *
225     * @param Tree                 $tree
226     * @param array<string,string> $params
227     *
228     * @return Collection<string>|null
229     */
230    protected function notesToFix(Tree $tree, array $params): ?Collection
231    {
232        if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') {
233            return null;
234        }
235
236        $query = DB::table('other')
237            ->where('o_file', '=', $tree->id())
238            ->where('o_type', '=', Note::RECORD_TYPE);
239
240        $this->recordQuery($query, 'o_gedcom', $params);
241
242        return $query->pluck('o_id');
243    }
244
245    /**
246     * A list of all records that need examining.  This may include records
247     * that do not need updating, if we can't detect this quickly using SQL.
248     *
249     * @param Tree                 $tree
250     * @param array<string,string> $params
251     *
252     * @return Collection<string>|null
253     */
254    protected function repositoriesToFix(Tree $tree, array $params): ?Collection
255    {
256        if ($params['type'] !== Repository::RECORD_TYPE || $params['search'] === '') {
257            return null;
258        }
259
260        $query = DB::table('other')
261            ->where('o_file', '=', $tree->id())
262            ->where('o_type', '=', Repository::RECORD_TYPE);
263
264        $this->recordQuery($query, 'o_gedcom', $params);
265
266        return $query->pluck('o_id');
267    }
268
269    /**
270     * A list of all records that need examining.  This may include records
271     * that do not need updating, if we can't detect this quickly using SQL.
272     *
273     * @param Tree                 $tree
274     * @param array<string,string> $params
275     *
276     * @return Collection<string>|null
277     */
278    protected function sourcesToFix(Tree $tree, array $params): ?Collection
279    {
280        if ($params['type'] !== Source::RECORD_TYPE || $params['search'] === '') {
281            return null;
282        }
283
284        $query = $this->sourcesToFixQuery($tree, $params);
285
286        $this->recordQuery($query, 's_gedcom', $params);
287
288        return $query->pluck('s_id');
289    }
290
291    /**
292     * A list of all records that need examining.  This may include records
293     * that do not need updating, if we can't detect this quickly using SQL.
294     *
295     * @param Tree                 $tree
296     * @param array<string,string> $params
297     *
298     * @return Collection<string>|null
299     */
300    protected function submittersToFix(Tree $tree, array $params): ?Collection
301    {
302        if ($params['type'] !== Submitter::RECORD_TYPE || $params['search'] === '') {
303            return null;
304        }
305
306        $query = $this->submittersToFixQuery($tree, $params);
307
308        $this->recordQuery($query, 'o_gedcom', $params);
309
310        return $query->pluck('o_id');
311    }
312
313    /**
314     * Does a record need updating?
315     *
316     * @param GedcomRecord         $record
317     * @param array<string,string> $params
318     *
319     * @return bool
320     */
321    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
322    {
323        return preg_match($this->createRegex($params), $record->gedcom()) === 1;
324    }
325
326    /**
327     * Show the changes we would make
328     *
329     * @param GedcomRecord         $record
330     * @param array<string,string> $params
331     *
332     * @return string
333     */
334    public function previewUpdate(GedcomRecord $record, array $params): string
335    {
336        $old = $record->gedcom();
337        $new = $this->updateGedcom($record, $params);
338
339        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
340    }
341
342    /**
343     * Fix a record
344     *
345     * @param GedcomRecord         $record
346     * @param array<string,string> $params
347     *
348     * @return void
349     */
350    public function updateRecord(GedcomRecord $record, array $params): void
351    {
352        $record->updateRecord($this->updateGedcom($record, $params), false);
353    }
354
355    /**
356     * @param GedcomRecord         $record
357     * @param array<string,string> $params
358     *
359     * @return string
360     */
361    private function updateGedcom(GedcomRecord $record, array $params): string
362    {
363        // Allow "\n" to indicate a line-feed in replacement text.
364        // Back-references such as $1, $2 are handled automatically.
365        $replace = strtr($params['replace'], ['\n' => "\n"]);
366
367        $regex = $this->createRegex($params);
368
369        return preg_replace($regex, $replace, $record->gedcom());
370    }
371
372    /**
373     * Create a regular expression from the search pattern.
374     *
375     * @param array<string,string> $params
376     *
377     * @return string
378     */
379    private function createRegex(array $params): string
380    {
381        $search = $params['search'];
382        $method = $params['method'];
383        $case   = $params['case'];
384
385        switch ($method) {
386            case 'exact':
387                return '/' . preg_quote($search, '/') . '/u' . $case;
388
389            case 'words':
390                return '/\b' . preg_quote($search, '/') . '\b/u' . $case;
391
392            case 'wildcards':
393                return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case;
394
395            case 'regex':
396                $regex = '/' . addcslashes($search, '/') . '/u' . $case;
397
398                try {
399                    // A valid regex on an empty string returns zero.
400                    // An invalid regex on an empty string returns false and throws a warning.
401                    preg_match($regex, '');
402                } catch (Throwable $ex) {
403                    $regex = self::INVALID_REGEX;
404                }
405
406                return $regex;
407        }
408
409        throw new HttpNotFoundException();
410    }
411
412    /**
413     * Create a regular expression from the search pattern.
414     *
415     * @param Builder              $query
416     * @param string               $column
417     * @param array<string,string> $params
418     *
419     * @return void
420     */
421    private function recordQuery(Builder $query, string $column, array $params): void
422    {
423        $search = $params['search'];
424        $method = $params['method'];
425        $like   = '%' . addcslashes($search, '\\%_') . '%';
426
427        switch ($method) {
428            case 'exact':
429            case 'words':
430                $query->where($column, 'LIKE', $like);
431                break;
432
433            case 'wildcards':
434                $like = strtr($like, ['?' => '_', '*' => '%']);
435                $query->where($column, 'LIKE', $like);
436                break;
437
438            case 'regex':
439                // Substituting newlines seems to be necessary on *some* versions
440                //.of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0).
441                $search = strtr($search, ['\n' => "\n"]);
442
443                switch (DB::connection()->getDriverName()) {
444                    case 'sqlite':
445                    case 'mysql':
446                        $query->where($column, 'REGEXP', $search);
447                        break;
448
449                    case 'pgsql':
450                        $query->where($column, '~', $search);
451                        break;
452
453                    case 'sqlsvr':
454                        // Not available
455                        break;
456                }
457                break;
458        }
459    }
460}
461