xref: /webtrees/app/Services/SearchService.php (revision cfb84ff6ed17b5cbf0afb82e6dc7c9f1f86e4432)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Family;
25use Fisharebest\Webtrees\Gedcom;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Http\Exceptions\HttpServiceUnavailableException;
28use Fisharebest\Webtrees\I18N;
29use Fisharebest\Webtrees\Individual;
30use Fisharebest\Webtrees\Location;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Place;
34use Fisharebest\Webtrees\Registry;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\SharedNote;
37use Fisharebest\Webtrees\Soundex;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submission;
40use Fisharebest\Webtrees\Submitter;
41use Fisharebest\Webtrees\Tree;
42use Illuminate\Database\Capsule\Manager as DB;
43use Illuminate\Database\Query\Builder;
44use Illuminate\Database\Query\Expression;
45use Illuminate\Database\Query\JoinClause;
46use Illuminate\Support\Collection;
47
48use function addcslashes;
49use function array_filter;
50use function array_map;
51use function array_unique;
52use function explode;
53use function implode;
54use function mb_stripos;
55use function preg_match;
56use function preg_quote;
57use function preg_replace;
58use function str_ends_with;
59use function str_starts_with;
60
61use const PHP_INT_MAX;
62
63/**
64 * Search trees for genealogy records.
65 */
66class SearchService
67{
68    // Do not attempt to show search results larger than this/
69    protected const MAX_SEARCH_RESULTS = 5000;
70
71    private TreeService $tree_service;
72
73    /**
74     * SearchService constructor.
75     *
76     * @param TreeService $tree_service
77     */
78    public function __construct(
79        TreeService $tree_service
80    ) {
81        $this->tree_service = $tree_service;
82    }
83
84    /**
85     * @param array<Tree>   $trees
86     * @param array<string> $search
87     *
88     * @return Collection<int,Family>
89     */
90    public function searchFamilies(array $trees, array $search): Collection
91    {
92        $query = DB::table('families');
93
94        $this->whereTrees($query, 'f_file', $trees);
95        $this->whereSearch($query, 'f_gedcom', $search);
96
97        return $query
98            ->get()
99            ->each($this->rowLimiter())
100            ->map($this->familyRowMapper())
101            ->filter(GedcomRecord::accessFilter())
102            ->filter($this->rawGedcomFilter($search));
103    }
104
105    /**
106     * Search for families by name.
107     *
108     * @param array<Tree>   $trees
109     * @param array<string> $search
110     * @param int           $offset
111     * @param int           $limit
112     *
113     * @return Collection<int,Family>
114     */
115    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
116    {
117        $query = DB::table('families')
118            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
119                $join
120                    ->on('husb_name.n_file', '=', 'families.f_file')
121                    ->on('husb_name.n_id', '=', 'families.f_husb')
122                    ->where('husb_name.n_type', '<>', '_MARNM');
123            })
124            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
125                $join
126                    ->on('wife_name.n_file', '=', 'families.f_file')
127                    ->on('wife_name.n_id', '=', 'families.f_wife')
128                    ->where('wife_name.n_type', '<>', '_MARNM');
129            });
130
131        $prefix = DB::connection()->getTablePrefix();
132        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
133
134        $this->whereTrees($query, 'f_file', $trees);
135        $this->whereSearch($query, $field, $search);
136
137        $query
138            ->orderBy('husb_name.n_sort')
139            ->orderBy('wife_name.n_sort')
140            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
141
142        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
143    }
144
145    /**
146     * @param Place $place
147     *
148     * @return Collection<int,Family>
149     */
150    public function searchFamiliesInPlace(Place $place): Collection
151    {
152        return DB::table('families')
153            ->join('placelinks', static function (JoinClause $query) {
154                $query
155                    ->on('families.f_file', '=', 'placelinks.pl_file')
156                    ->on('families.f_id', '=', 'placelinks.pl_gid');
157            })
158            ->where('f_file', '=', $place->tree()->id())
159            ->where('pl_p_id', '=', $place->id())
160            ->select(['families.*'])
161            ->get()
162            ->each($this->rowLimiter())
163            ->map($this->familyRowMapper())
164            ->filter(GedcomRecord::accessFilter());
165    }
166
167    /**
168     * @param array<Tree>   $trees
169     * @param array<string> $search
170     *
171     * @return Collection<int,Individual>
172     */
173    public function searchIndividuals(array $trees, array $search): Collection
174    {
175        $query = DB::table('individuals');
176
177        $this->whereTrees($query, 'i_file', $trees);
178        $this->whereSearch($query, 'i_gedcom', $search);
179
180        return $query
181            ->get()
182            ->each($this->rowLimiter())
183            ->map($this->individualRowMapper())
184            ->filter(GedcomRecord::accessFilter())
185            ->filter($this->rawGedcomFilter($search));
186    }
187
188    /**
189     * Search for individuals by name.
190     *
191     * @param array<Tree>   $trees
192     * @param array<string> $search
193     * @param int           $offset
194     * @param int           $limit
195     *
196     * @return Collection<int,Individual>
197     */
198    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
199    {
200        $query = DB::table('individuals')
201            ->join('name', static function (JoinClause $join): void {
202                $join
203                    ->on('name.n_file', '=', 'individuals.i_file')
204                    ->on('name.n_id', '=', 'individuals.i_id');
205            })
206            ->orderBy('n_sort')
207            ->select(['individuals.*', 'n_sort']);
208
209        $this->whereTrees($query, 'i_file', $trees);
210        $this->whereSearch($query, 'n_full', $search);
211
212        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
213    }
214
215    /**
216     * @param Place $place
217     *
218     * @return Collection<int,Individual>
219     */
220    public function searchIndividualsInPlace(Place $place): Collection
221    {
222        return DB::table('individuals')
223            ->join('placelinks', static function (JoinClause $join) {
224                $join
225                    ->on('i_file', '=', 'pl_file')
226                    ->on('i_id', '=', 'pl_gid');
227            })
228            ->where('i_file', '=', $place->tree()->id())
229            ->where('pl_p_id', '=', $place->id())
230            ->select(['individuals.*'])
231            ->get()
232            ->each($this->rowLimiter())
233            ->map($this->individualRowMapper())
234            ->filter(GedcomRecord::accessFilter());
235    }
236
237    /**
238     * Search for submissions.
239     *
240     * @param array<Tree>   $trees
241     * @param array<string> $search
242     * @param int           $offset
243     * @param int           $limit
244     *
245     * @return Collection<int,Location>
246     */
247    public function searchLocations(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
248    {
249        $query = DB::table('other')
250            ->where('o_type', '=', Location::RECORD_TYPE);
251
252        $this->whereTrees($query, 'o_file', $trees);
253        $this->whereSearch($query, 'o_gedcom', $search);
254
255        return $this->paginateQuery($query, $this->locationRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
256    }
257
258    /**
259     * Search for media objects.
260     *
261     * @param array<Tree>   $trees
262     * @param array<string> $search
263     * @param int           $offset
264     * @param int           $limit
265     *
266     * @return Collection<int,Media>
267     */
268    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
269    {
270        $query = DB::table('media');
271
272        $this->whereTrees($query, 'media.m_file', $trees);
273        $this->whereSearch($query, 'm_gedcom', $search);
274
275        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
276    }
277
278    /**
279     * Search for notes.
280     *
281     * @param array<Tree>   $trees
282     * @param array<string> $search
283     * @param int           $offset
284     * @param int           $limit
285     *
286     * @return Collection<int,Note>
287     */
288    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
289    {
290        $query = DB::table('other')
291            ->where('o_type', '=', Note::RECORD_TYPE);
292
293        $this->whereTrees($query, 'o_file', $trees);
294        $this->whereSearch($query, 'o_gedcom', $search);
295
296        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
297    }
298
299    /**
300     * Search for notes.
301     *
302     * @param array<Tree>   $trees
303     * @param array<string> $search
304     * @param int           $offset
305     * @param int           $limit
306     *
307     * @return Collection<int,SharedNote>
308     */
309    public function searchSharedNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
310    {
311        $query = DB::table('other')
312            ->where('o_type', '=', SharedNote::RECORD_TYPE);
313
314        $this->whereTrees($query, 'o_file', $trees);
315        $this->whereSearch($query, 'o_gedcom', $search);
316
317        return $this->paginateQuery($query, $this->sharedNoteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
318    }
319
320    /**
321     * Search for repositories.
322     *
323     * @param array<Tree>   $trees
324     * @param array<string> $search
325     * @param int           $offset
326     * @param int           $limit
327     *
328     * @return Collection<int,Repository>
329     */
330    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
331    {
332        $query = DB::table('other')
333            ->where('o_type', '=', Repository::RECORD_TYPE);
334
335        $this->whereTrees($query, 'o_file', $trees);
336        $this->whereSearch($query, 'o_gedcom', $search);
337
338        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
339    }
340
341    /**
342     * Search for sources.
343     *
344     * @param array<Tree>   $trees
345     * @param array<string> $search
346     * @param int      $offset
347     * @param int      $limit
348     *
349     * @return Collection<int,Source>
350     */
351    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
352    {
353        $query = DB::table('sources');
354
355        $this->whereTrees($query, 's_file', $trees);
356        $this->whereSearch($query, 's_gedcom', $search);
357
358        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
359    }
360
361    /**
362     * Search for sources by name.
363     *
364     * @param array<Tree>   $trees
365     * @param array<string> $search
366     * @param int           $offset
367     * @param int           $limit
368     *
369     * @return Collection<int,Source>
370     */
371    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
372    {
373        $query = DB::table('sources')
374            ->orderBy('s_name');
375
376        $this->whereTrees($query, 's_file', $trees);
377        $this->whereSearch($query, 's_name', $search);
378
379        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
380    }
381
382    /**
383     * Search for sources.
384     *
385     * @param array<Tree>   $trees
386     * @param array<string> $search
387     * @param int           $offset
388     * @param int           $limit
389     *
390     * @return Collection<int,string>
391     */
392    public function searchSurnames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
393    {
394        $query = DB::table('name');
395
396        $this->whereTrees($query, 'n_file', $trees);
397        $this->whereSearch($query, 'n_surname', $search);
398
399        return $query
400            ->groupBy(['n_surname'])
401            ->orderBy('n_surname')
402            ->skip($offset)
403            ->take($limit)
404            ->pluck('n_surname');
405    }
406
407    /**
408     * Search for submissions.
409     *
410     * @param array<Tree>   $trees
411     * @param array<string> $search
412     * @param int           $offset
413     * @param int           $limit
414     *
415     * @return Collection<int,Submission>
416     */
417    public function searchSubmissions(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
418    {
419        $query = DB::table('other')
420            ->where('o_type', '=', Submission::RECORD_TYPE);
421
422        $this->whereTrees($query, 'o_file', $trees);
423        $this->whereSearch($query, 'o_gedcom', $search);
424
425        return $this->paginateQuery($query, $this->submissionRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
426    }
427
428    /**
429     * Search for submitters.
430     *
431     * @param array<Tree>   $trees
432     * @param array<string> $search
433     * @param int           $offset
434     * @param int           $limit
435     *
436     * @return Collection<int,Submitter>
437     */
438    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
439    {
440        $query = DB::table('other')
441            ->where('o_type', '=', Submitter::RECORD_TYPE);
442
443        $this->whereTrees($query, 'o_file', $trees);
444        $this->whereSearch($query, 'o_gedcom', $search);
445
446        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
447    }
448
449    /**
450     * Search for places.
451     *
452     * @param Tree   $tree
453     * @param string $search
454     * @param int    $offset
455     * @param int    $limit
456     *
457     * @return Collection<int,Place>
458     */
459    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
460    {
461        $query = DB::table('places AS p0')
462            ->where('p0.p_file', '=', $tree->id())
463            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
464            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
465            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
466            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
467            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
468            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
469            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
470            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
471            ->orderBy('p0.p_place')
472            ->orderBy('p1.p_place')
473            ->orderBy('p2.p_place')
474            ->orderBy('p3.p_place')
475            ->orderBy('p4.p_place')
476            ->orderBy('p5.p_place')
477            ->orderBy('p6.p_place')
478            ->orderBy('p7.p_place')
479            ->orderBy('p8.p_place')
480            ->select([
481                'p0.p_place AS place0',
482                'p1.p_place AS place1',
483                'p2.p_place AS place2',
484                'p3.p_place AS place3',
485                'p4.p_place AS place4',
486                'p5.p_place AS place5',
487                'p6.p_place AS place6',
488                'p7.p_place AS place7',
489                'p8.p_place AS place8',
490            ]);
491
492        // Filter each level of the hierarchy.
493        foreach (explode(',', $search, 9) as $level => $string) {
494            $query->where('p' . $level . '.p_place', 'LIKE', '%' . addcslashes($string, '\\%_') . '%');
495        }
496
497        $row_mapper = static function (object $row) use ($tree): Place {
498            $place = implode(', ', array_filter((array) $row));
499
500            return new Place($place, $tree);
501        };
502
503        $filter = static function (): bool {
504            return true;
505        };
506
507        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
508    }
509
510    /**
511     * @param array<Tree>          $trees
512     * @param array<string,string> $fields
513     * @param array<string,string> $modifiers
514     *
515     * @return Collection<int,Individual>
516     */
517    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
518    {
519        $fields = array_filter($fields);
520
521        $query = DB::table('individuals')
522            ->select(['individuals.*'])
523            ->distinct();
524
525        $this->whereTrees($query, 'i_file', $trees);
526
527        // Join the following tables
528        $father_name   = false;
529        $mother_name   = false;
530        $spouse_family = false;
531        $indi_name     = false;
532        $indi_dates    = [];
533        $fam_dates     = [];
534        $indi_plac     = false;
535        $fam_plac      = false;
536
537        foreach ($fields as $field_name => $field_value) {
538            if ($field_value !== '') {
539                if (str_starts_with($field_name, 'FATHER:NAME')) {
540                    $father_name = true;
541                } elseif (str_starts_with($field_name, 'MOTHER:NAME')) {
542                    $mother_name = true;
543                } elseif (str_starts_with($field_name, 'INDI:NAME:GIVN')) {
544                    $indi_name = true;
545                } elseif (str_starts_with($field_name, 'INDI:NAME:SURN')) {
546                    $indi_name = true;
547                } elseif (str_starts_with($field_name, 'FAM:')) {
548                    $spouse_family = true;
549                    if (str_ends_with($field_name, ':DATE')) {
550                        $fam_dates[] = explode(':', $field_name)[1];
551                    } elseif (str_ends_with($field_name, ':PLAC')) {
552                        $fam_plac = true;
553                    }
554                } elseif (str_starts_with($field_name, 'INDI:')) {
555                    if (str_ends_with($field_name, ':DATE')) {
556                        $indi_dates[] = explode(':', $field_name)[1];
557                    } elseif (str_ends_with($field_name, ':PLAC')) {
558                        $indi_plac = true;
559                    }
560                }
561            }
562        }
563
564        if ($father_name || $mother_name) {
565            $query->join('link AS l1', static function (JoinClause $join): void {
566                $join
567                    ->on('l1.l_file', '=', 'individuals.i_file')
568                    ->on('l1.l_from', '=', 'individuals.i_id')
569                    ->where('l1.l_type', '=', 'FAMC');
570            });
571
572            if ($father_name) {
573                $query->join('link AS l2', static function (JoinClause $join): void {
574                    $join
575                        ->on('l2.l_file', '=', 'l1.l_file')
576                        ->on('l2.l_from', '=', 'l1.l_to')
577                        ->where('l2.l_type', '=', 'HUSB');
578                });
579                $query->join('name AS father_name', static function (JoinClause $join): void {
580                    $join
581                        ->on('father_name.n_file', '=', 'l2.l_file')
582                        ->on('father_name.n_id', '=', 'l2.l_to');
583                });
584            }
585
586            if ($mother_name) {
587                $query->join('link AS l3', static function (JoinClause $join): void {
588                    $join
589                        ->on('l3.l_file', '=', 'l1.l_file')
590                        ->on('l3.l_from', '=', 'l1.l_to')
591                        ->where('l3.l_type', '=', 'WIFE');
592                });
593                $query->join('name AS mother_name', static function (JoinClause $join): void {
594                    $join
595                        ->on('mother_name.n_file', '=', 'l3.l_file')
596                        ->on('mother_name.n_id', '=', 'l3.l_to');
597                });
598            }
599        }
600
601        if ($spouse_family) {
602            $query->join('link AS l4', static function (JoinClause $join): void {
603                $join
604                    ->on('l4.l_file', '=', 'individuals.i_file')
605                    ->on('l4.l_from', '=', 'individuals.i_id')
606                    ->where('l4.l_type', '=', 'FAMS');
607            });
608            $query->join('families AS spouse_families', static function (JoinClause $join): void {
609                $join
610                    ->on('spouse_families.f_file', '=', 'l4.l_file')
611                    ->on('spouse_families.f_id', '=', 'l4.l_to');
612            });
613        }
614
615        if ($indi_name) {
616            $query->join('name AS individual_name', static function (JoinClause $join): void {
617                $join
618                    ->on('individual_name.n_file', '=', 'individuals.i_file')
619                    ->on('individual_name.n_id', '=', 'individuals.i_id');
620            });
621        }
622
623        foreach (array_unique($indi_dates) as $indi_date) {
624            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
625                $join
626                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
627                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
628            });
629        }
630
631        foreach (array_unique($fam_dates) as $fam_date) {
632            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
633                $join
634                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
635                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
636            });
637        }
638
639        if ($indi_plac) {
640            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
641                $join
642                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
643                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
644            });
645            $query->join('places AS individual_places', static function (JoinClause $join): void {
646                $join
647                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
648                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
649            });
650        }
651
652        if ($fam_plac) {
653            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
654                $join
655                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
656                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
657            });
658            $query->join('places AS family_places', static function (JoinClause $join): void {
659                $join
660                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
661                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
662            });
663        }
664
665        foreach ($fields as $field_name => $field_value) {
666            $parts = explode(':', $field_name . ':::');
667            if (str_starts_with($field_name, 'INDI:NAME:')) {
668                switch ($field_name) {
669                    case 'INDI:NAME:GIVN':
670                        switch ($modifiers[$field_name]) {
671                            case 'EXACT':
672                                $query->where('individual_name.n_givn', '=', $field_value);
673                                break;
674                            case 'BEGINS':
675                                $query->where('individual_name.n_givn', 'LIKE', $field_value . '%');
676                                break;
677                            case 'CONTAINS':
678                                $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
679                                break;
680                            case 'SDX_STD':
681                                $sdx = Soundex::russell($field_value);
682                                if ($sdx !== '') {
683                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
684                                } else {
685                                    // No phonetic content? Use a substring match
686                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
687                                }
688                                break;
689                            case 'SDX': // SDX uses DM by default.
690                            case 'SDX_DM':
691                                $sdx = Soundex::daitchMokotoff($field_value);
692                                if ($sdx !== '') {
693                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
694                                } else {
695                                    // No phonetic content? Use a substring match
696                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
697                                }
698                                break;
699                        }
700                        unset($fields[$field_name]);
701                        break;
702                    case 'INDI:NAME:SURN':
703                        switch ($modifiers[$field_name]) {
704                            case 'EXACT':
705                                $query->where(function (Builder $query) use ($field_value): void {
706                                    $query
707                                        ->where('individual_name.n_surn', '=', $field_value)
708                                        ->orWhere('individual_name.n_surname', '=', $field_value);
709                                });
710                                break;
711                            case 'BEGINS':
712                                $query->where(function (Builder $query) use ($field_value): void {
713                                    $query
714                                        ->where('individual_name.n_surn', 'LIKE', $field_value . '%')
715                                        ->orWhere('individual_name.n_surname', 'LIKE', $field_value . '%');
716                                });
717                                break;
718                            case 'CONTAINS':
719                                $query->where(function (Builder $query) use ($field_value): void {
720                                    $query
721                                        ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
722                                        ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
723                                });
724                                break;
725                            case 'SDX_STD':
726                                $sdx = Soundex::russell($field_value);
727                                if ($sdx !== '') {
728                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
729                                } else {
730                                    // No phonetic content? Use a substring match
731                                    $query->where(function (Builder $query) use ($field_value): void {
732                                        $query
733                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
734                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
735                                    });
736                                }
737                                break;
738                            case 'SDX': // SDX uses DM by default.
739                            case 'SDX_DM':
740                                $sdx = Soundex::daitchMokotoff($field_value);
741                                if ($sdx !== '') {
742                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
743                                } else {
744                                    // No phonetic content? Use a substring match
745                                    $query->where(function (Builder $query) use ($field_value): void {
746                                        $query
747                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
748                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
749                                    });
750                                }
751                                break;
752                        }
753                        unset($fields[$field_name]);
754                        break;
755                    case 'INDI:NAME:NICK':
756                    case 'INDI:NAME:_MARNM':
757                    case 'INDI:NAME:_HEB':
758                    case 'INDI:NAME:_AKA':
759                        $like = "%\n1 NAME%\n2 " . $parts[2] . ' %' . preg_quote($field_value, '/') . '%';
760                        $query->where('individuals.i_gedcom', 'LIKE', $like);
761                        break;
762                }
763            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':DATE')) {
764                $date = new Date($field_value);
765                if ($date->isOK()) {
766                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
767                    $query
768                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
769                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
770                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
771                }
772                unset($fields[$field_name]);
773            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':DATE')) {
774                $date = new Date($field_value);
775                if ($date->isOK()) {
776                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
777                    $query
778                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
779                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
780                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
781                }
782                unset($fields[$field_name]);
783            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
784                // SQL can only link a place to a person/family, not to an event.
785                $query->where('individual_places.p_place', 'LIKE', '%' . $field_value . '%');
786            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
787                // SQL can only link a place to a person/family, not to an event.
788                $query->where('family_places.p_place', 'LIKE', '%' . $field_value . '%');
789            } elseif (str_starts_with($field_name, 'MOTHER:NAME:') || str_starts_with($field_name, 'FATHER:NAME:')) {
790                $table = str_starts_with($field_name, 'FATHER:NAME:') ? 'father_name' : 'mother_name';
791                switch ($parts[2]) {
792                    case 'GIVN':
793                        switch ($modifiers[$field_name]) {
794                            case 'EXACT':
795                                $query->where($table . '.n_givn', '=', $field_value);
796                                break;
797                            case 'BEGINS':
798                                $query->where($table . '.n_givn', 'LIKE', $field_value . '%');
799                                break;
800                            case 'CONTAINS':
801                                $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
802                                break;
803                            case 'SDX_STD':
804                                $sdx = Soundex::russell($field_value);
805                                if ($sdx !== '') {
806                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
807                                } else {
808                                    // No phonetic content? Use a substring match
809                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
810                                }
811                                break;
812                            case 'SDX': // SDX uses DM by default.
813                            case 'SDX_DM':
814                                $sdx = Soundex::daitchMokotoff($field_value);
815                                if ($sdx !== '') {
816                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
817                                } else {
818                                    // No phonetic content? Use a substring match
819                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
820                                }
821                                break;
822                        }
823                        break;
824                    case 'SURN':
825                        switch ($modifiers[$field_name]) {
826                            case 'EXACT':
827                                $query->where($table . '.n_surn', '=', $field_value);
828                                break;
829                            case 'BEGINS':
830                                $query->where($table . '.n_surn', 'LIKE', $field_value . '%');
831                                break;
832                            case 'CONTAINS':
833                                $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
834                                break;
835                            case 'SDX_STD':
836                                $sdx = Soundex::russell($field_value);
837                                if ($sdx !== '') {
838                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
839                                } else {
840                                    // No phonetic content? Use a substring match
841                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
842                                }
843                                break;
844                            case 'SDX': // SDX uses DM by default.
845                            case 'SDX_DM':
846                                $sdx = Soundex::daitchMokotoff($field_value);
847                                if ($sdx !== '') {
848                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
849                                } else {
850                                    // No phonetic content? Use a substring match
851                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
852                                }
853                                break;
854                        }
855                        break;
856                }
857                unset($fields[$field_name]);
858            } elseif (str_starts_with($field_name, 'FAM:')) {
859                // e.g. searches for occupation, religion, note, etc.
860                // Initial matching only.  Need PHP to apply filter.
861                $query->where('spouse_families.f_gedcom', 'LIKE', "%\n1 " . $parts[1] . ' %' . $field_value . '%');
862            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':TYPE')) {
863                // Initial matching only.  Need PHP to apply filter.
864                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[1] . "%\n2 TYPE %" . $field_value . '%');
865            } elseif (str_starts_with($field_name, 'INDI:')) {
866                // e.g. searches for occupation, religion, note, etc.
867                // Initial matching only.  Need PHP to apply filter.
868                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[1] . '%' . $parts[2] . '%' . $field_value . '%');
869            }
870        }
871
872        return $query
873            ->get()
874            ->each($this->rowLimiter())
875            ->map($this->individualRowMapper())
876            ->filter(GedcomRecord::accessFilter())
877            ->filter(static function (Individual $individual) use ($fields): bool {
878                // Check for searches which were only partially matched by SQL
879                foreach ($fields as $field_name => $field_value) {
880                    $parts = explode(':', $field_name . '::::');
881
882                    if (str_starts_with($field_name, 'INDI:NAME:') && $field_name !== 'INDI:NAME:GIVN' && $field_name !== 'INDI:NAME:SURN') {
883                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[2] . ' .*' . preg_quote($field_value, '/') . '/i';
884
885                        if (preg_match($regex, $individual->gedcom())) {
886                            continue;
887                        }
888
889                        return false;
890                    }
891
892                    $regex = '/' . preg_quote($field_value, '/') . '/i';
893
894                    if (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
895                        foreach ($individual->facts([$parts[1]]) as $fact) {
896                            if (preg_match($regex, $fact->place()->gedcomName())) {
897                                continue 2;
898                            }
899                        }
900                        return false;
901                    }
902
903                    if (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
904                        foreach ($individual->spouseFamilies() as $family) {
905                            foreach ($family->facts([$parts[1]]) as $fact) {
906                                if (preg_match($regex, $fact->place()->gedcomName())) {
907                                    continue 3;
908                                }
909                            }
910                        }
911                        return false;
912                    }
913
914                    if ($field_name === 'INDI:FACT:TYPE' || $field_name === 'INDI:EVEN:TYPE' || $field_name === 'INDI:CHAN:_WT_USER') {
915                        foreach ($individual->facts([$parts[1]]) as $fact) {
916                            if (preg_match($regex, $fact->attribute($parts[2]))) {
917                                continue 2;
918                            }
919                        }
920
921                        return false;
922                    }
923
924                    if (str_starts_with($field_name, 'INDI:')) {
925                        foreach ($individual->facts([$parts[1]]) as $fact) {
926                            if (preg_match($regex, $fact->value())) {
927                                continue 2;
928                            }
929                        }
930
931                        return false;
932                    }
933
934                    if (str_starts_with($field_name, 'FAM:')) {
935                        foreach ($individual->spouseFamilies() as $family) {
936                            foreach ($family->facts([$parts[1]]) as $fact) {
937                                if (preg_match($regex, $fact->value())) {
938                                    continue 3;
939                                }
940                            }
941                        }
942                        return false;
943                    }
944                }
945
946                return true;
947            });
948    }
949
950    /**
951     * @param string      $soundex
952     * @param string      $lastname
953     * @param string      $firstname
954     * @param string      $place
955     * @param array<Tree> $search_trees
956     *
957     * @return Collection<int,Individual>
958     */
959    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
960    {
961        switch ($soundex) {
962            default:
963            case 'Russell':
964                $givn_sdx   = Soundex::russell($firstname);
965                $surn_sdx   = Soundex::russell($lastname);
966                $plac_sdx   = Soundex::russell($place);
967                $givn_field = 'n_soundex_givn_std';
968                $surn_field = 'n_soundex_surn_std';
969                $plac_field = 'p_std_soundex';
970                break;
971            case 'DaitchM':
972                $givn_sdx   = Soundex::daitchMokotoff($firstname);
973                $surn_sdx   = Soundex::daitchMokotoff($lastname);
974                $plac_sdx   = Soundex::daitchMokotoff($place);
975                $givn_field = 'n_soundex_givn_dm';
976                $surn_field = 'n_soundex_surn_dm';
977                $plac_field = 'p_dm_soundex';
978                break;
979        }
980
981        // Nothing to search for? Return nothing.
982        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
983            return new Collection();
984        }
985
986        $query = DB::table('individuals')
987            ->select(['individuals.*'])
988            ->distinct();
989
990        $this->whereTrees($query, 'i_file', $search_trees);
991
992        if ($plac_sdx !== '') {
993            $query->join('placelinks', static function (JoinClause $join): void {
994                $join
995                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
996                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
997            });
998            $query->join('places', static function (JoinClause $join): void {
999                $join
1000                    ->on('places.p_file', '=', 'placelinks.pl_file')
1001                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
1002            });
1003
1004            $this->wherePhonetic($query, $plac_field, $plac_sdx);
1005        }
1006
1007        if ($givn_sdx !== '' || $surn_sdx !== '') {
1008            $query->join('name', static function (JoinClause $join): void {
1009                $join
1010                    ->on('name.n_file', '=', 'individuals.i_file')
1011                    ->on('name.n_id', '=', 'individuals.i_id');
1012            });
1013
1014            $this->wherePhonetic($query, $givn_field, $givn_sdx);
1015            $this->wherePhonetic($query, $surn_field, $surn_sdx);
1016        }
1017
1018        return $query
1019            ->get()
1020            ->each($this->rowLimiter())
1021            ->map($this->individualRowMapper())
1022            ->filter(GedcomRecord::accessFilter());
1023    }
1024
1025    /**
1026     * Paginate a search query.
1027     *
1028     * @param Builder $query      Searches the database for the desired records.
1029     * @param Closure $row_mapper Converts a row from the query into a record.
1030     * @param Closure $row_filter
1031     * @param int     $offset     Skip this many rows.
1032     * @param int     $limit      Take this many rows.
1033     *
1034     * @return Collection<int,mixed>
1035     */
1036    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
1037    {
1038        $collection = new Collection();
1039
1040        foreach ($query->cursor() as $row) {
1041            $record = $row_mapper($row);
1042            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
1043            // where individuals have multiple names - and we need to sort results by name.
1044            if ($collection->containsStrict($record)) {
1045                continue;
1046            }
1047            // If the object has a method "canShow()", then use it to filter for privacy.
1048            if ($row_filter($record)) {
1049                if ($offset > 0) {
1050                    $offset--;
1051                } else {
1052                    if ($limit > 0) {
1053                        $collection->push($record);
1054                    }
1055
1056                    $limit--;
1057
1058                    if ($limit === 0) {
1059                        break;
1060                    }
1061                }
1062            }
1063        }
1064
1065
1066        return $collection;
1067    }
1068
1069    /**
1070     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
1071     *
1072     * @param Builder           $query
1073     * @param Expression|string $field
1074     * @param array<string>     $search_terms
1075     */
1076    private function whereSearch(Builder $query, $field, array $search_terms): void
1077    {
1078        if ($field instanceof Expression) {
1079            $field = $field->getValue();
1080        }
1081
1082        foreach ($search_terms as $search_term) {
1083            $query->where(new Expression($field), 'LIKE', '%' . addcslashes($search_term, '\\%_') . '%');
1084        }
1085    }
1086
1087    /**
1088     * Apply soundex search filters to a SQL query column.
1089     *
1090     * @param Builder           $query
1091     * @param Expression|string $field
1092     * @param string            $soundex
1093     */
1094    private function wherePhonetic(Builder $query, $field, string $soundex): void
1095    {
1096        if ($soundex !== '') {
1097            $query->where(static function (Builder $query) use ($soundex, $field): void {
1098                foreach (explode(':', $soundex) as $sdx) {
1099                    $query->orWhere($field, 'LIKE', '%' . $sdx . '%');
1100                }
1101            });
1102        }
1103    }
1104
1105    /**
1106     * @param Builder     $query
1107     * @param string      $tree_id_field
1108     * @param array<Tree> $trees
1109     */
1110    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1111    {
1112        $tree_ids = array_map(static function (Tree $tree): int {
1113            return $tree->id();
1114        }, $trees);
1115
1116        $query->whereIn($tree_id_field, $tree_ids);
1117    }
1118
1119    /**
1120     * Find the media object that uses a particular media file.
1121     *
1122     * @param string $file
1123     *
1124     * @return array<Media>
1125     */
1126    public function findMediaObjectsForMediaFile(string $file): array
1127    {
1128        return DB::table('media')
1129            ->join('media_file', static function (JoinClause $join): void {
1130                $join
1131                    ->on('media_file.m_file', '=', 'media.m_file')
1132                    ->on('media_file.m_id', '=', 'media.m_id');
1133            })
1134            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1135            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1136            ->select(['media.*'])
1137            ->distinct()
1138            ->get()
1139            ->map($this->mediaRowMapper())
1140            ->all();
1141    }
1142
1143    /**
1144     * A closure to filter records by privacy-filtered GEDCOM data.
1145     *
1146     * @param array<string> $search_terms
1147     *
1148     * @return Closure
1149     */
1150    private function rawGedcomFilter(array $search_terms): Closure
1151    {
1152        return static function (GedcomRecord $record) use ($search_terms): bool {
1153            // Ignore non-genealogy fields
1154            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1155
1156            // Ignore matches in links
1157            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1158
1159            // Re-apply the filtering
1160            foreach ($search_terms as $search_term) {
1161                if (mb_stripos($gedcom, $search_term) === false) {
1162                    return false;
1163                }
1164            }
1165
1166            return true;
1167        };
1168    }
1169
1170    /**
1171     * Searching for short or common text can give more results than the system can process.
1172     *
1173     * @param int $limit
1174     *
1175     * @return Closure
1176     */
1177    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1178    {
1179        return static function () use ($limit): void {
1180            static $n = 0;
1181
1182            if (++$n > $limit) {
1183                $message = I18N::translate('The search returned too many results.');
1184
1185                throw new HttpServiceUnavailableException($message);
1186            }
1187        };
1188    }
1189
1190    /**
1191     * Convert a row from any tree in the families table into a family object.
1192     *
1193     * @return Closure
1194     */
1195    private function familyRowMapper(): Closure
1196    {
1197        return function (object $row): Family {
1198            $tree = $this->tree_service->find((int) $row->f_file);
1199
1200            return Registry::familyFactory()->mapper($tree)($row);
1201        };
1202    }
1203
1204    /**
1205     * Convert a row from any tree in the individuals table into an individual object.
1206     *
1207     * @return Closure
1208     */
1209    private function individualRowMapper(): Closure
1210    {
1211        return function (object $row): Individual {
1212            $tree = $this->tree_service->find((int) $row->i_file);
1213
1214            return Registry::individualFactory()->mapper($tree)($row);
1215        };
1216    }
1217
1218    /**
1219     * Convert a row from any tree in the media table into a location object.
1220     *
1221     * @return Closure
1222     */
1223    private function locationRowMapper(): Closure
1224    {
1225        return function (object $row): Location {
1226            $tree = $this->tree_service->find((int) $row->o_file);
1227
1228            return Registry::locationFactory()->mapper($tree)($row);
1229        };
1230    }
1231
1232    /**
1233     * Convert a row from any tree in the media table into an media object.
1234     *
1235     * @return Closure
1236     */
1237    private function mediaRowMapper(): Closure
1238    {
1239        return function (object $row): Media {
1240            $tree = $this->tree_service->find((int) $row->m_file);
1241
1242            return Registry::mediaFactory()->mapper($tree)($row);
1243        };
1244    }
1245
1246    /**
1247     * Convert a row from any tree in the other table into a note object.
1248     *
1249     * @return Closure
1250     */
1251    private function noteRowMapper(): Closure
1252    {
1253        return function (object $row): Note {
1254            $tree = $this->tree_service->find((int) $row->o_file);
1255
1256            return Registry::noteFactory()->mapper($tree)($row);
1257        };
1258    }
1259
1260    /**
1261     * Convert a row from any tree in the other table into a repository object.
1262     *
1263     * @return Closure
1264     */
1265    private function repositoryRowMapper(): Closure
1266    {
1267        return function (object $row): Repository {
1268            $tree = $this->tree_service->find((int) $row->o_file);
1269
1270            return Registry::repositoryFactory()->mapper($tree)($row);
1271        };
1272    }
1273
1274    /**
1275     * Convert a row from any tree in the other table into a note object.
1276     *
1277     * @return Closure
1278     */
1279    private function sharedNoteRowMapper(): Closure
1280    {
1281        return function (object $row): Note {
1282            $tree = $this->tree_service->find((int) $row->o_file);
1283
1284            return Registry::sharedNoteFactory()->mapper($tree)($row);
1285        };
1286    }
1287
1288    /**
1289     * Convert a row from any tree in the sources table into a source object.
1290     *
1291     * @return Closure
1292     */
1293    private function sourceRowMapper(): Closure
1294    {
1295        return function (object $row): Source {
1296            $tree = $this->tree_service->find((int) $row->s_file);
1297
1298            return Registry::sourceFactory()->mapper($tree)($row);
1299        };
1300    }
1301
1302    /**
1303     * Convert a row from any tree in the other table into a submission object.
1304     *
1305     * @return Closure
1306     */
1307    private function submissionRowMapper(): Closure
1308    {
1309        return function (object $row): Submission {
1310            $tree = $this->tree_service->find((int) $row->o_file);
1311
1312            return Registry::submissionFactory()->mapper($tree)($row);
1313        };
1314    }
1315
1316    /**
1317     * Convert a row from any tree in the other table into a submitter object.
1318     *
1319     * @return Closure
1320     */
1321    private function submitterRowMapper(): Closure
1322    {
1323        return function (object $row): Submitter {
1324            $tree = $this->tree_service->find((int) $row->o_file);
1325
1326            return Registry::submitterFactory()->mapper($tree)($row);
1327        };
1328    }
1329}
1330