xref: /webtrees/app/Module/FixSearchAndReplace.php (revision 92657c8a5b15f2b73e568e36bd57c604210a6361)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fisharebest\Webtrees\Exceptions\HttpNotFoundException;
23use Fisharebest\Webtrees\Family;
24use Fisharebest\Webtrees\GedcomRecord;
25use Fisharebest\Webtrees\I18N;
26use Fisharebest\Webtrees\Individual;
27use Fisharebest\Webtrees\Media;
28use Fisharebest\Webtrees\Note;
29use Fisharebest\Webtrees\Repository;
30use Fisharebest\Webtrees\Services\DataFixService;
31use Fisharebest\Webtrees\Source;
32use Fisharebest\Webtrees\Submitter;
33use Fisharebest\Webtrees\Tree;
34use Illuminate\Database\Capsule\Manager as DB;
35use Illuminate\Database\Query\Builder;
36use Illuminate\Support\Collection;
37use Throwable;
38
39use function addcslashes;
40use function asort;
41use function preg_match;
42use function preg_quote;
43use function preg_replace;
44use function view;
45
46/**
47 * Class FixSearchAndReplace
48 */
49class FixSearchAndReplace extends AbstractModule implements ModuleDataFixInterface
50{
51    use ModuleDataFixTrait;
52
53    // A regular expression that never matches.
54    private const INVALID_REGEX = '/(?!)/';
55
56    /** @var DataFixService */
57    private $data_fix_service;
58
59    /**
60     * FixMissingDeaths constructor.
61     *
62     * @param DataFixService $data_fix_service
63     */
64    public function __construct(DataFixService $data_fix_service)
65    {
66        $this->data_fix_service = $data_fix_service;
67    }
68
69    /**
70     * How should this module be identified in the control panel, etc.?
71     *
72     * @return string
73     */
74    public function title(): string
75    {
76        /* I18N: Name of a module */
77        return I18N::translate('Search and replace');
78    }
79
80    /**
81     * A sentence describing what this module does.
82     *
83     * @return string
84     */
85    public function description(): string
86    {
87        /* I18N: Description of a “Data fix” module */
88        return I18N::translate('Search and replace text, using simple searches or advanced pattern matching.');
89    }
90
91    /**
92     * Options form.
93     *
94     * @param Tree $tree
95     *
96     * @return string
97     */
98    public function fixOptions(Tree $tree): string
99    {
100        $methods = [
101            'exact'     => I18N::translate('Match the exact text, even if it occurs in the middle of a word.'),
102            'words'     => I18N::translate('Match the exact text, unless it occurs in the middle of a word.'),
103            'wildcards' => I18N::translate('Use a “?” to match a single character, use “*” to match zero or more characters.'),
104            /* I18N: http://en.wikipedia.org/wiki/Regular_expression */
105            'regex'     => I18N::translate('Regular expression'),
106        ];
107
108        $types = [
109            Family::RECORD_TYPE     => I18N::translate('Families'),
110            Individual::RECORD_TYPE => I18N::translate('Individuals'),
111            Media::RECORD_TYPE      => I18N::translate('Media objects'),
112            Note::RECORD_TYPE       => I18N::translate('Notes'),
113            Repository::RECORD_TYPE => I18N::translate('Repositories'),
114            Source::RECORD_TYPE     => I18N::translate('Sources'),
115            Submitter::RECORD_TYPE  => I18N::translate('Submitters'),
116        ];
117
118        asort($types);
119
120        return view('modules/fix-search-and-replace/options', [
121            'default_method' => 'exact',
122            'default_type'   => Individual::RECORD_TYPE,
123            'methods'        => $methods,
124            'types'          => $types,
125        ]);
126    }
127
128    /**
129     * A list of all records that need examining.  This may include records
130     * that do not need updating, if we can't detect this quickly using SQL.
131     *
132     * @param Tree                 $tree
133     * @param array<string,string> $params
134     *
135     * @return Collection<string>|null
136     */
137    protected function familiesToFix(Tree $tree, array $params): ?Collection
138    {
139        if ($params['type'] !== Family::RECORD_TYPE || $params['search'] === '') {
140            return null;
141        }
142
143        $query = DB::table('families')->where('f_file', '=', $tree->id());
144        $this->recordQuery($query, 'f_gedcom', $params);
145
146        return $query->pluck('f_id');
147    }
148
149    /**
150     * A list of all records that need examining.  This may include records
151     * that do not need updating, if we can't detect this quickly using SQL.
152     *
153     * @param Tree                 $tree
154     * @param array<string,string> $params
155     *
156     * @return Collection<string>|null
157     */
158    protected function individualsToFix(Tree $tree, array $params): ?Collection
159    {
160        if ($params['type'] !== Individual::RECORD_TYPE || $params['search'] === '') {
161            return null;
162        }
163
164        $query = DB::table('individuals')
165            ->where('i_file', '=', $tree->id());
166
167        $this->recordQuery($query, 'i_gedcom', $params);
168
169        return $query->pluck('i_id');
170    }
171
172    /**
173     * A list of all records that need examining.  This may include records
174     * that do not need updating, if we can't detect this quickly using SQL.
175     *
176     * @param Tree                 $tree
177     * @param array<string,string> $params
178     *
179     * @return Collection<string>|null
180     */
181    protected function locationsToFix(Tree $tree, array $params): ?Collection
182    {
183        if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') {
184            return null;
185        }
186
187        $query = DB::table('other')
188            ->where('o_file', '=', $tree->id())
189            ->where('o_type', '=', Location::RECORD_TYPE);
190
191        $this->recordQuery($query, 'o_gedcom', $params);
192
193        return $query->pluck('o_id');
194    }
195
196    /**
197     * A list of all records that need examining.  This may include records
198     * that do not need updating, if we can't detect this quickly using SQL.
199     *
200     * @param Tree                 $tree
201     * @param array<string,string> $params
202     *
203     * @return Collection<string>|null
204     */
205    protected function mediaToFix(Tree $tree, array $params): ?Collection
206    {
207        if ($params['type'] !== Media::RECORD_TYPE || $params['search'] === '') {
208            return null;
209        }
210
211        $query = DB::table('media')
212            ->where('m_file', '=', $tree->id());
213
214        $this->recordQuery($query, 'm_gedcom', $params);
215
216        return $query->pluck('m_id');
217    }
218
219    /**
220     * A list of all records that need examining.  This may include records
221     * that do not need updating, if we can't detect this quickly using SQL.
222     *
223     * @param Tree                 $tree
224     * @param array<string,string> $params
225     *
226     * @return Collection<string>|null
227     */
228    protected function notesToFix(Tree $tree, array $params): ?Collection
229    {
230        if ($params['type'] !== Note::RECORD_TYPE || $params['search'] === '') {
231            return null;
232        }
233
234        $query = DB::table('other')
235            ->where('o_file', '=', $tree->id())
236            ->where('o_type', '=', Note::RECORD_TYPE);
237
238        $this->recordQuery($query, 'o_gedcom', $params);
239
240        return $query->pluck('o_id');
241    }
242
243    /**
244     * A list of all records that need examining.  This may include records
245     * that do not need updating, if we can't detect this quickly using SQL.
246     *
247     * @param Tree                 $tree
248     * @param array<string,string> $params
249     *
250     * @return Collection<string>|null
251     */
252    protected function repositoriesToFix(Tree $tree, array $params): ?Collection
253    {
254        if ($params['type'] !== Repository::RECORD_TYPE || $params['search'] === '') {
255            return null;
256        }
257
258        $query = DB::table('other')
259            ->where('o_file', '=', $tree->id())
260            ->where('o_type', '=', Repository::RECORD_TYPE);
261
262        $this->recordQuery($query, 'o_gedcom', $params);
263
264        return $query->pluck('o_id');
265    }
266
267    /**
268     * A list of all records that need examining.  This may include records
269     * that do not need updating, if we can't detect this quickly using SQL.
270     *
271     * @param Tree                 $tree
272     * @param array<string,string> $params
273     *
274     * @return Collection<string>|null
275     */
276    protected function sourcesToFix(Tree $tree, array $params): ?Collection
277    {
278        if ($params['type'] !== Source::RECORD_TYPE || $params['search'] === '') {
279            return null;
280        }
281
282        $query = $this->sourcesToFixQuery($tree, $params);
283
284        $this->recordQuery($query, 's_gedcom', $params);
285
286        return $query->pluck('s_id');
287    }
288
289    /**
290     * A list of all records that need examining.  This may include records
291     * that do not need updating, if we can't detect this quickly using SQL.
292     *
293     * @param Tree                 $tree
294     * @param array<string,string> $params
295     *
296     * @return Collection<string>|null
297     */
298    protected function submittersToFix(Tree $tree, array $params): ?Collection
299    {
300        if ($params['type'] !== Submitter::RECORD_TYPE || $params['search'] === '') {
301            return null;
302        }
303
304        $query = $this->submittersToFixQuery($tree, $params);
305
306        $this->recordQuery($query, 'o_gedcom', $params);
307
308        return $query->pluck('o_id');
309    }
310
311    /**
312     * Does a record need updating?
313     *
314     * @param GedcomRecord         $record
315     * @param array<string,string> $params
316     *
317     * @return bool
318     */
319    public function doesRecordNeedUpdate(GedcomRecord $record, array $params): bool
320    {
321        return preg_match($this->createRegex($params), $record->gedcom()) === 1;
322    }
323
324    /**
325     * Show the changes we would make
326     *
327     * @param GedcomRecord         $record
328     * @param array<string,string> $params
329     *
330     * @return string
331     */
332    public function previewUpdate(GedcomRecord $record, array $params): string
333    {
334        $old = $record->gedcom();
335        $new = $this->updateGedcom($record, $params);
336
337        return $this->data_fix_service->gedcomDiff($record->tree(), $old, $new);
338    }
339
340    /**
341     * Fix a record
342     *
343     * @param GedcomRecord         $record
344     * @param array<string,string> $params
345     *
346     * @return void
347     */
348    public function updateRecord(GedcomRecord $record, array $params): void
349    {
350        $record->updateRecord($this->updateGedcom($record, $params), false);
351    }
352
353    /**
354     * @param GedcomRecord         $record
355     * @param array<string,string> $params
356     *
357     * @return string
358     */
359    private function updateGedcom(GedcomRecord $record, array $params): string
360    {
361        // Allow "\n" to indicate a line-feed in replacement text.
362        // Back-references such as $1, $2 are handled automatically.
363        $replace = strtr($params['replace'], ['\n' => "\n"]);
364
365        $regex = $this->createRegex($params);
366
367        return preg_replace($regex, $replace, $record->gedcom());
368    }
369
370    /**
371     * Create a regular expression from the search pattern.
372     *
373     * @param array<string,string> $params
374     *
375     * @return string
376     */
377    private function createRegex(array $params): string
378    {
379        $search = $params['search'];
380        $method = $params['method'];
381        $case   = $params['case'];
382
383        switch ($method) {
384            case 'exact':
385                return '/' . preg_quote($search, '/') . '/u' . $case;
386
387            case 'words':
388                return '/\b' . preg_quote($search, '/') . '\b/u' . $case;
389
390            case 'wildcards':
391                return '/\b' . strtr(preg_quote($search, '/'), ['\*' => '.*', '\?' => '.']) . '\b/u' . $case;
392
393            case 'regex':
394                $regex = '/' . addcslashes($search, '/') . '/u' . $case;
395
396                try {
397                    // A valid regex on an empty string returns zero.
398                    // An invalid regex on an empty string returns false and throws a warning.
399                    preg_match($regex, '');
400                } catch (Throwable $ex) {
401                    $regex = self::INVALID_REGEX;
402                }
403
404                return $regex;
405        }
406
407        throw new HttpNotFoundException();
408    }
409
410    /**
411     * Create a regular expression from the search pattern.
412     *
413     * @param Builder              $query
414     * @param string               $column
415     * @param array<string,string> $params
416     *
417     * @return void
418     */
419    private function recordQuery(Builder $query, string $column, array $params): void
420    {
421        $search = $params['search'];
422        $method = $params['method'];
423        $like   = '%' . addcslashes($search, '\\%_') . '%';
424
425        switch ($method) {
426            case 'exact':
427            case 'words':
428                $query->where($column, 'LIKE', $like);
429                break;
430
431            case 'wildcards':
432                $like = strtr($like, ['?' => '_', '*' => '%']);
433                $query->where($column, 'LIKE', $like);
434                break;
435
436            case 'regex':
437                // Substituting newlines seems to be necessary on *some* versions
438                //.of MySQL (e.g. 5.7), and harmless on others (e.g. 8.0).
439                $search = strtr($search, ['\n' => "\n"]);
440
441                switch (DB::connection()->getDriverName()) {
442                    case 'sqlite':
443                    case 'mysql':
444                        $query->where($column, 'REGEXP', $search);
445                        break;
446
447                    case 'pgsql':
448                        $query->where($column, '~', $search);
449                        break;
450
451                    case 'sqlsvr':
452                        // Not available
453                        break;
454                }
455                break;
456        }
457    }
458}
459