xref: /webtrees/app/Module/FixSearchAndReplace.php (revision 5bfc689774bb9a6401271c4ed15a6d50652c991b)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fisharebest\Webtrees\Family;
23use Fisharebest\Webtrees\GedcomRecord;
24use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
25use Fisharebest\Webtrees\I18N;
26use Fisharebest\Webtrees\Individual;
27use Fisharebest\Webtrees\Location;
28use Fisharebest\Webtrees\Media;
29use Fisharebest\Webtrees\Note;
30use Fisharebest\Webtrees\Repository;
31use Fisharebest\Webtrees\Services\DataFixService;
32use Fisharebest\Webtrees\Source;
33use Fisharebest\Webtrees\Submitter;
34use Fisharebest\Webtrees\Tree;
35use Illuminate\Database\Capsule\Manager as DB;
36use Illuminate\Database\Query\Builder;
37use Illuminate\Support\Collection;
38use Throwable;
39
40use function addcslashes;
41use function asort;
42use function preg_match;
43use function preg_quote;
44use function preg_replace;
45use function view;
46
47/**
48 * Class FixSearchAndReplace
49 */
50class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface
51{
52    use ModuleDataFixTrait;
53
54    // A regular expression that never matches.
55    private const INVALID_REGEX = '/(?!)/';
56
57    private DataFixService $data_fix_service;
58
59    /**
60     * FixMissingDeaths constructor.
61     *
62     * @param DataFixService $data_fix_service
63     */
64    public function __construct(DataFixService $data_fix_service)
65    {
66        $this->data_fix_service = $data_fix_service;
67    }
68
69    /**
70     * How should this module be identified in the control panel, etc.?
71     *
72     * @return string
73     */
74    public function title(): string
75    {
76        /* I18N: Name of a module */
77        return I18N::translate('Search and replace');
78    }
79
80    /**
81     * A sentence describing what this module does.
82     *
83     * @return string
84     */
85    public function description(): string
86    {
87        /* I18N: Description of a “Data fix” module */
88        return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.');
89    }
90
91    /**
92     * Options form.
93     *
94     * @param Tree $tree
95     *
96     * @return string
97     */
98    public function fixOptions(Tree $tree): string
99    {
100        $methods = [
101            'exact'     => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'),
102            'words'     => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'),
103            'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'),
104            /* I18N: https://en.wikipedia.org/wiki/Regular_expression */
105            'regex'     => I18N::translate('Regular expression'),
106        ];
107
108        $types = [
109            Family::RECORD_TYPE     => I18N::translate('Families'),
110            Individual::RECORD_TYPE => I18N::translate('Individuals'),
111            Location::RECORD_TYPE   => I18N::translate('Locations'),
112            Media::RECORD_TYPE      => I18N::translate('Media objects'),
113            Note::RECORD_TYPE       => I18N::translate('Notes'),
114            Repository::RECORD_TYPE => I18N::translate('Repositories'),
115            Source::RECORD_TYPE     => I18N::translate('Sources'),
116            Submitter::RECORD_TYPE  => I18N::translate('Submitters'),
117        ];
118
119        asort($types);
120
121        return view('modules/fix-search-and-replace/options', [
122            'default_method' => 'exact',
123            'default_type'   => Individual::RECORD_TYPE,
124            'methods'        => $methods,
125            'types'          => $types,
126        ]);
127    }
128
129    /**
130     * A list of all records that need examining.  This may include records
131     * that do not need updating, if we can't detect this quickly using SQL.
132     *
133     * @param Tree                 $tree
134     * @param array<string,string> $params
135     *
136     * @return Collection<int,string>|null
137     */
138    protected function familiesToFix(Tree $tree, array $params): ?Collection
139    {
140        if ($params['type'] !== Family::RECORD_TYPE || $params['search'] === '') {
141            return null;
142        }
143
144        $query = DB::table('families')->where('f_file', '=', $tree->id());
145        $this->recordQuery($query, 'f_gedcom', $params);
146
147        return $query->pluck('f_id');
148    }
149
150    /**
151     * A list of all records that need examining.  This may include records
152     * that do not need updating, if we can't detect this quickly using SQL.
153     *
154     * @param Tree                 $tree
155     * @param array<string,string> $params
156     *
157     * @return Collection<int,string>|null
158     */
159    protected function individualsToFix(Tree $tree, array $params): ?Collection
160    {
161        if ($params['type'] !== Individual::RECORD_TYPE || $params['search'] === '') {
162            return null;
163        }
164
165        $query = DB::table('individuals')
166            ->where('i_file', '=', $tree->id());
167
168        $this->recordQuery($query, 'i_gedcom', $params);
169
170        return $query->pluck('i_id');
171    }
172
173    /**
174     * A list of all records that need examining.  This may include records
175     * that do not need updating, if we can't detect this quickly using SQL.
176     *
177     * @param Tree                 $tree
178     * @param array<string,string> $params
179     *
180     * @return Collection<int,string>|null
181     */
182    protected function locationsToFix(Tree $tree, array $params): ?Collection
183    {
184        if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') {
185            return null;
186        }
187
188        $query = DB::table('other')
189            ->where('o_file', '=', $tree->id())
190            ->where('o_type', '=', Location::RECORD_TYPE);
191
192        $this->recordQuery($query, 'o_gedcom', $params);
193
194        return $query->pluck('o_id');
195    }
196
197    /**
198     * A list of all records that need examining.  This may include records
199     * that do not need updating, if we can't detect this quickly using SQL.
200     *
201     * @param Tree                 $tree
202     * @param array<string,string> $params
203     *
204     * @return Collection<int,string>|null
205     */
206    protected function mediaToFix(Tree $tree, array $params): ?Collection
207    {
208        if ($params['type'] !== Media::RECORD_TYPE || $params['search'] === '') {
209            return null;
210        }
211
212        $query = DB::table('media')
213            ->where('m_file', '=', $tree->id());
214
215        $this->recordQuery($query, 'm_gedcom', $params);
216
217        return $query->pluck('m_id');
218    }
219
220    /**
221     * A list of all records that need examining.  This may include records
222     * that do not need updating, if we can't detect this quickly using SQL.
223     *
224     * @param Tree                 $tree
225     * @param array<string,string> $params
226     *
227     * @return Collection<int,string>|null
228     */
229    protected function notesToFix(Tree $tree, array $params): ?Collection
230    {
231        if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') {
232            return null;
233        }
234
235        $query = DB::table('other')
236            ->where('o_file', '=', $tree->id())
237            ->where('o_type', '=', Note::RECORD_TYPE);
238
239        $this->recordQuery($query, 'o_gedcom', $params);
240
241        return $query->pluck('o_id');
242    }
243
244    /**
245     * A list of all records that need examining.  This may include records
246     * that do not need updating, if we can't detect this quickly using SQL.
247     *
248     * @param Tree                 $tree
249     * @param array<string,string> $params
250     *
251     * @return Collection<int,string>|null
252     */
253    protected function repositoriesToFix(Tree $tree, array $params): ?Collection
254    {
255        if ($params['type'] !== Repository::RECORD_TYPE || $params['search'] === '') {
256            return null;
257        }
258
259        $query = DB::table('other')
260            ->where('o_file', '=', $tree->id())
261            ->where('o_type', '=', Repository::RECORD_TYPE);
262
263        $this->recordQuery($query, 'o_gedcom', $params);
264
265        return $query->pluck('o_id');
266    }
267
268    /**
269     * A list of all records that need examining.  This may include records
270     * that do not need updating, if we can't detect this quickly using SQL.
271     *
272     * @param Tree                 $tree
273     * @param array<string,string> $params
274     *
275     * @return Collection<int,string>|null
276     */
277    protected function sourcesToFix(Tree $tree, array $params): ?Collection
278    {
279        if ($params['type'] !== Source::RECORD_TYPE || $params['search'] === '') {
280            return null;
281        }
282
283        $query = $this->sourcesToFixQuery($tree, $params);
284
285        $this->recordQuery($query, 's_gedcom', $params);
286
287        return $query->pluck('s_id');
288    }
289
290    /**
291     * A list of all records that need examining.  This may include records
292     * that do not need updating, if we can't detect this quickly using SQL.
293     *
294     * @param Tree                 $tree
295     * @param array<string,string> $params
296     *
297     * @return Collection<int,string>|null
298     */
299    protected function submittersToFix(Tree $tree, array $params): ?Collection
300    {
301        if ($params['type'] !== Submitter::RECORD_TYPE || $params['search'] === '') {
302            return null;
303        }
304
305        $query = $this->submittersToFixQuery($tree, $params);
306
307        $this->recordQuery($query, 'o_gedcom', $params);
308
309        return $query->pluck('o_id');
310    }
311
312    /**
313     * Does a record need updating?
314     *
315     * @param GedcomRecord         $record
316     * @param array<string,string> $params
317     *
318     * @return bool
319     */
320    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
321    {
322        return preg_match($this->createRegex($params), $record->gedcom()) === 1;
323    }
324
325    /**
326     * Show the changes we would make
327     *
328     * @param GedcomRecord         $record
329     * @param array<string,string> $params
330     *
331     * @return string
332     */
333    public function previewUpdate(GedcomRecord $record, array $params): string
334    {
335        $old = $record->gedcom();
336        $new = $this->updateGedcom($record, $params);
337
338        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
339    }
340
341    /**
342     * Fix a record
343     *
344     * @param GedcomRecord         $record
345     * @param array<string,string> $params
346     *
347     * @return void
348     */
349    public function updateRecord(GedcomRecord $record, array $params): void
350    {
351        $record->updateRecord($this->updateGedcom($record, $params), false);
352    }
353
354    /**
355     * @param GedcomRecord         $record
356     * @param array<string,string> $params
357     *
358     * @return string
359     */
360    private function updateGedcom(GedcomRecord $record, array $params): string
361    {
362        // Allow "\n" to indicate a line-feed in replacement text.
363        // Back-references such as $1, $2 are handled automatically.
364        $replace = strtr($params['replace'], ['\n' => "\n"]);
365
366        $regex = $this->createRegex($params);
367
368        return preg_replace($regex, $replace, $record->gedcom());
369    }
370
371    /**
372     * Create a regular expression from the search pattern.
373     *
374     * @param array<string,string> $params
375     *
376     * @return string
377     */
378    private function createRegex(array $params): string
379    {
380        $search = $params['search'];
381        $method = $params['method'];
382        $case   = $params['case'];
383
384        switch ($method) {
385            case 'exact':
386                return '/' . preg_quote($search, '/') . '/u' . $case;
387
388            case 'words':
389                return '/\b' . preg_quote($search, '/') . '\b/u' . $case;
390
391            case 'wildcards':
392                return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case;
393
394            case 'regex':
395                $regex = '/' . addcslashes($search, '/') . '/u' . $case;
396
397                try {
398                    // A valid regex on an empty string returns zero.
399                    // An invalid regex on an empty string returns false and throws a warning.
400                    preg_match($regex, '');
401                } catch (Throwable $ex) {
402                    $regex = self::INVALID_REGEX;
403                }
404
405                return $regex;
406        }
407
408        throw new HttpNotFoundException();
409    }
410
411    /**
412     * Create a regular expression from the search pattern.
413     *
414     * @param Builder              $query
415     * @param string               $column
416     * @param array<string,string> $params
417     *
418     * @return void
419     */
420    private function recordQuery(Builder $query, string $column, array $params): void
421    {
422        $search = $params['search'];
423        $method = $params['method'];
424        $like   = '%' . addcslashes($search, '\\%_') . '%';
425
426        switch ($method) {
427            case 'exact':
428            case 'words':
429                $query->where($column, 'LIKE', $like);
430                break;
431
432            case 'wildcards':
433                $like = strtr($like, ['?' => '_', '*' => '%']);
434                $query->where($column, 'LIKE', $like);
435                break;
436
437            case 'regex':
438                // Substituting newlines seems to be necessary on *some* versions
439                //.of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0).
440                $search = strtr($search, ['\n' => "\n"]);
441
442                switch (DB::connection()->getDriverName()) {
443                    case 'sqlite':
444                    case 'mysql':
445                        $query->where($column, 'REGEXP', $search);
446                        break;
447
448                    case 'pgsql':
449                        $query->where($column, '~', $search);
450                        break;
451
452                    case 'sqlsvr':
453                        // Not available
454                        break;
455                }
456                break;
457        }
458    }
459}
460