xref: /webtrees/app/Services/SearchService.php (revision 1b85c7f5de205a5e1708adf7e5d0ea0b9067b473)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\DB;
25use Fisharebest\Webtrees\Family;
26use Fisharebest\Webtrees\Gedcom;
27use Fisharebest\Webtrees\GedcomRecord;
28use Fisharebest\Webtrees\Http\Exceptions\HttpServiceUnavailableException;
29use Fisharebest\Webtrees\I18N;
30use Fisharebest\Webtrees\Individual;
31use Fisharebest\Webtrees\Location;
32use Fisharebest\Webtrees\Media;
33use Fisharebest\Webtrees\Note;
34use Fisharebest\Webtrees\Place;
35use Fisharebest\Webtrees\Registry;
36use Fisharebest\Webtrees\Repository;
37use Fisharebest\Webtrees\SharedNote;
38use Fisharebest\Webtrees\Soundex;
39use Fisharebest\Webtrees\Source;
40use Fisharebest\Webtrees\Submission;
41use Fisharebest\Webtrees\Submitter;
42use Fisharebest\Webtrees\Tree;
43use Illuminate\Database\Query\Builder;
44use Illuminate\Database\Query\Expression;
45use Illuminate\Database\Query\JoinClause;
46use Illuminate\Support\Collection;
47
48use function addcslashes;
49use function array_filter;
50use function array_map;
51use function array_unique;
52use function explode;
53use function implode;
54use function mb_stripos;
55use function preg_match;
56use function preg_quote;
57use function preg_replace;
58use function str_ends_with;
59use function str_starts_with;
60
61use const PHP_INT_MAX;
62
63/**
64 * Search trees for genealogy records.
65 */
66class SearchService
67{
68    // Do not attempt to show search results larger than this/
69    protected const MAX_SEARCH_RESULTS = 5000;
70
71    private TreeService $tree_service;
72
73    /**
74     * @param TreeService $tree_service
75     */
76    public function __construct(
77        TreeService $tree_service
78    ) {
79        $this->tree_service = $tree_service;
80    }
81
82    /**
83     * @param array<Tree>   $trees
84     * @param array<string> $search
85     *
86     * @return Collection<int,Family>
87     */
88    public function searchFamilies(array $trees, array $search): Collection
89    {
90        $query = DB::table('families');
91
92        $this->whereTrees($query, 'f_file', $trees);
93        $this->whereSearch($query, 'f_gedcom', $search);
94
95        return $query
96            ->get()
97            ->each($this->rowLimiter())
98            ->map($this->familyRowMapper())
99            ->filter(GedcomRecord::accessFilter())
100            ->filter($this->rawGedcomFilter($search));
101    }
102
103    /**
104     * Search for families by name.
105     *
106     * @param array<Tree>   $trees
107     * @param array<string> $search
108     * @param int           $offset
109     * @param int           $limit
110     *
111     * @return Collection<int,Family>
112     */
113    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
114    {
115        $query = DB::table('families')
116            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
117                $join
118                    ->on('husb_name.n_file', '=', 'families.f_file')
119                    ->on('husb_name.n_id', '=', 'families.f_husb')
120                    ->where('husb_name.n_type', '<>', '_MARNM');
121            })
122            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
123                $join
124                    ->on('wife_name.n_file', '=', 'families.f_file')
125                    ->on('wife_name.n_id', '=', 'families.f_wife')
126                    ->where('wife_name.n_type', '<>', '_MARNM');
127            });
128
129        $prefix = DB::connection()->getTablePrefix();
130        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
131
132        $this->whereTrees($query, 'f_file', $trees);
133        $this->whereSearch($query, $field, $search);
134
135        $query
136            ->orderBy('husb_name.n_sort')
137            ->orderBy('wife_name.n_sort')
138            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
139
140        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
141    }
142
143    /**
144     * @param Place $place
145     *
146     * @return Collection<int,Family>
147     */
148    public function searchFamiliesInPlace(Place $place): Collection
149    {
150        return DB::table('families')
151            ->join('placelinks', static function (JoinClause $query) {
152                $query
153                    ->on('families.f_file', '=', 'placelinks.pl_file')
154                    ->on('families.f_id', '=', 'placelinks.pl_gid');
155            })
156            ->where('f_file', '=', $place->tree()->id())
157            ->where('pl_p_id', '=', $place->id())
158            ->select(['families.*'])
159            ->get()
160            ->each($this->rowLimiter())
161            ->map($this->familyRowMapper())
162            ->filter(GedcomRecord::accessFilter());
163    }
164
165    /**
166     * @param array<Tree>   $trees
167     * @param array<string> $search
168     *
169     * @return Collection<int,Individual>
170     */
171    public function searchIndividuals(array $trees, array $search): Collection
172    {
173        $query = DB::table('individuals');
174
175        $this->whereTrees($query, 'i_file', $trees);
176        $this->whereSearch($query, 'i_gedcom', $search);
177
178        return $query
179            ->get()
180            ->each($this->rowLimiter())
181            ->map($this->individualRowMapper())
182            ->filter(GedcomRecord::accessFilter())
183            ->filter($this->rawGedcomFilter($search));
184    }
185
186    /**
187     * Search for individuals by name.
188     *
189     * @param array<Tree>   $trees
190     * @param array<string> $search
191     * @param int           $offset
192     * @param int           $limit
193     *
194     * @return Collection<int,Individual>
195     */
196    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
197    {
198        $query = DB::table('individuals')
199            ->join('name', static function (JoinClause $join): void {
200                $join
201                    ->on('name.n_file', '=', 'individuals.i_file')
202                    ->on('name.n_id', '=', 'individuals.i_id');
203            })
204            ->orderBy('n_sort')
205            ->select(['individuals.*', 'n_sort']);
206
207        $this->whereTrees($query, 'i_file', $trees);
208        $this->whereSearch($query, 'n_full', $search);
209
210        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
211    }
212
213    /**
214     * @param Place $place
215     *
216     * @return Collection<int,Individual>
217     */
218    public function searchIndividualsInPlace(Place $place): Collection
219    {
220        return DB::table('individuals')
221            ->join('placelinks', static function (JoinClause $join) {
222                $join
223                    ->on('i_file', '=', 'pl_file')
224                    ->on('i_id', '=', 'pl_gid');
225            })
226            ->where('i_file', '=', $place->tree()->id())
227            ->where('pl_p_id', '=', $place->id())
228            ->select(['individuals.*'])
229            ->get()
230            ->each($this->rowLimiter())
231            ->map($this->individualRowMapper())
232            ->filter(GedcomRecord::accessFilter());
233    }
234
235    /**
236     * Search for submissions.
237     *
238     * @param array<Tree>   $trees
239     * @param array<string> $search
240     * @param int           $offset
241     * @param int           $limit
242     *
243     * @return Collection<int,Location>
244     */
245    public function searchLocations(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
246    {
247        $query = DB::table('other')
248            ->where('o_type', '=', Location::RECORD_TYPE);
249
250        $this->whereTrees($query, 'o_file', $trees);
251        $this->whereSearch($query, 'o_gedcom', $search);
252
253        return $this->paginateQuery($query, $this->locationRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
254    }
255
256    /**
257     * Search for media objects.
258     *
259     * @param array<Tree>   $trees
260     * @param array<string> $search
261     * @param int           $offset
262     * @param int           $limit
263     *
264     * @return Collection<int,Media>
265     */
266    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
267    {
268        $query = DB::table('media');
269
270        $this->whereTrees($query, 'media.m_file', $trees);
271        $this->whereSearch($query, 'm_gedcom', $search);
272
273        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
274    }
275
276    /**
277     * Search for notes.
278     *
279     * @param array<Tree>   $trees
280     * @param array<string> $search
281     * @param int           $offset
282     * @param int           $limit
283     *
284     * @return Collection<int,Note>
285     */
286    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
287    {
288        $query = DB::table('other')
289            ->where('o_type', '=', Note::RECORD_TYPE);
290
291        $this->whereTrees($query, 'o_file', $trees);
292        $this->whereSearch($query, 'o_gedcom', $search);
293
294        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
295    }
296
297    /**
298     * Search for notes.
299     *
300     * @param array<Tree>   $trees
301     * @param array<string> $search
302     * @param int           $offset
303     * @param int           $limit
304     *
305     * @return Collection<int,SharedNote>
306     */
307    public function searchSharedNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
308    {
309        $query = DB::table('other')
310            ->where('o_type', '=', SharedNote::RECORD_TYPE);
311
312        $this->whereTrees($query, 'o_file', $trees);
313        $this->whereSearch($query, 'o_gedcom', $search);
314
315        return $this->paginateQuery($query, $this->sharedNoteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
316    }
317
318    /**
319     * Search for repositories.
320     *
321     * @param array<Tree>   $trees
322     * @param array<string> $search
323     * @param int           $offset
324     * @param int           $limit
325     *
326     * @return Collection<int,Repository>
327     */
328    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
329    {
330        $query = DB::table('other')
331            ->where('o_type', '=', Repository::RECORD_TYPE);
332
333        $this->whereTrees($query, 'o_file', $trees);
334        $this->whereSearch($query, 'o_gedcom', $search);
335
336        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
337    }
338
339    /**
340     * Search for sources.
341     *
342     * @param array<Tree>   $trees
343     * @param array<string> $search
344     * @param int      $offset
345     * @param int      $limit
346     *
347     * @return Collection<int,Source>
348     */
349    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
350    {
351        $query = DB::table('sources');
352
353        $this->whereTrees($query, 's_file', $trees);
354        $this->whereSearch($query, 's_gedcom', $search);
355
356        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
357    }
358
359    /**
360     * Search for sources by name.
361     *
362     * @param array<Tree>   $trees
363     * @param array<string> $search
364     * @param int           $offset
365     * @param int           $limit
366     *
367     * @return Collection<int,Source>
368     */
369    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
370    {
371        $query = DB::table('sources')
372            ->orderBy('s_name');
373
374        $this->whereTrees($query, 's_file', $trees);
375        $this->whereSearch($query, 's_name', $search);
376
377        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
378    }
379
380    /**
381     * Search for sources.
382     *
383     * @param array<Tree>   $trees
384     * @param array<string> $search
385     * @param int           $offset
386     * @param int           $limit
387     *
388     * @return Collection<int,string>
389     */
390    public function searchSurnames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
391    {
392        $query = DB::table('name');
393
394        $this->whereTrees($query, 'n_file', $trees);
395        $this->whereSearch($query, 'n_surname', $search);
396
397        return $query
398            ->groupBy(['n_surname'])
399            ->orderBy('n_surname')
400            ->skip($offset)
401            ->take($limit)
402            ->pluck('n_surname');
403    }
404
405    /**
406     * Search for submissions.
407     *
408     * @param array<Tree>   $trees
409     * @param array<string> $search
410     * @param int           $offset
411     * @param int           $limit
412     *
413     * @return Collection<int,Submission>
414     */
415    public function searchSubmissions(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
416    {
417        $query = DB::table('other')
418            ->where('o_type', '=', Submission::RECORD_TYPE);
419
420        $this->whereTrees($query, 'o_file', $trees);
421        $this->whereSearch($query, 'o_gedcom', $search);
422
423        return $this->paginateQuery($query, $this->submissionRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
424    }
425
426    /**
427     * Search for submitters.
428     *
429     * @param array<Tree>   $trees
430     * @param array<string> $search
431     * @param int           $offset
432     * @param int           $limit
433     *
434     * @return Collection<int,Submitter>
435     */
436    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
437    {
438        $query = DB::table('other')
439            ->where('o_type', '=', Submitter::RECORD_TYPE);
440
441        $this->whereTrees($query, 'o_file', $trees);
442        $this->whereSearch($query, 'o_gedcom', $search);
443
444        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
445    }
446
447    /**
448     * Search for places.
449     *
450     * @param Tree   $tree
451     * @param string $search
452     * @param int    $offset
453     * @param int    $limit
454     *
455     * @return Collection<int,Place>
456     */
457    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
458    {
459        $query = DB::table('places AS p0')
460            ->where('p0.p_file', '=', $tree->id())
461            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
462            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
463            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
464            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
465            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
466            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
467            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
468            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
469            ->orderBy('p0.p_place')
470            ->orderBy('p1.p_place')
471            ->orderBy('p2.p_place')
472            ->orderBy('p3.p_place')
473            ->orderBy('p4.p_place')
474            ->orderBy('p5.p_place')
475            ->orderBy('p6.p_place')
476            ->orderBy('p7.p_place')
477            ->orderBy('p8.p_place')
478            ->select([
479                'p0.p_place AS place0',
480                'p1.p_place AS place1',
481                'p2.p_place AS place2',
482                'p3.p_place AS place3',
483                'p4.p_place AS place4',
484                'p5.p_place AS place5',
485                'p6.p_place AS place6',
486                'p7.p_place AS place7',
487                'p8.p_place AS place8',
488            ]);
489
490        // Filter each level of the hierarchy.
491        foreach (explode(',', $search, 9) as $level => $string) {
492            $query->where('p' . $level . '.p_place', $this->iLike(), '%' . addcslashes($string, '\\%_') . '%');
493        }
494
495        $row_mapper = static function (object $row) use ($tree): Place {
496            $place = implode(', ', array_filter((array) $row));
497
498            return new Place($place, $tree);
499        };
500
501        $filter = static function (): bool {
502            return true;
503        };
504
505        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
506    }
507
508    /**
509     * @param Tree                 $tree
510     * @param array<string,string> $fields
511     * @param array<string,string> $modifiers
512     *
513     * @return Collection<int,Individual>
514     */
515    public function searchIndividualsAdvanced(Tree $tree, array $fields, array $modifiers): Collection
516    {
517        $fields = array_filter($fields, static fn (string $x): bool => $x !== '');
518
519        $query = DB::table('individuals')
520            ->where('i_file', '=', $tree->id())
521            ->select(['individuals.*'])
522            ->distinct();
523
524        // Join the following tables
525        $father_name   = false;
526        $mother_name   = false;
527        $spouse_family = false;
528        $indi_name     = false;
529        $indi_dates    = [];
530        $fam_dates     = [];
531        $indi_plac     = false;
532        $fam_plac      = false;
533
534        foreach ($fields as $field_name => $field_value) {
535            if (str_starts_with($field_name, 'FATHER:NAME')) {
536                $father_name = true;
537            } elseif (str_starts_with($field_name, 'MOTHER:NAME')) {
538                $mother_name = true;
539            } elseif (str_starts_with($field_name, 'INDI:NAME:GIVN')) {
540                $indi_name = true;
541            } elseif (str_starts_with($field_name, 'INDI:NAME:SURN')) {
542                $indi_name = true;
543            } elseif (str_starts_with($field_name, 'FAM:')) {
544                $spouse_family = true;
545                if (str_ends_with($field_name, ':DATE')) {
546                    $fam_dates[] = explode(':', $field_name)[1];
547                } elseif (str_ends_with($field_name, ':PLAC')) {
548                    $fam_plac = true;
549                }
550            } elseif (str_starts_with($field_name, 'INDI:')) {
551                if (str_ends_with($field_name, ':DATE')) {
552                    $indi_dates[] = explode(':', $field_name)[1];
553                } elseif (str_ends_with($field_name, ':PLAC')) {
554                    $indi_plac = true;
555                }
556            }
557        }
558
559        if ($father_name || $mother_name) {
560            $query->join('link AS l1', static function (JoinClause $join): void {
561                $join
562                    ->on('l1.l_file', '=', 'individuals.i_file')
563                    ->on('l1.l_from', '=', 'individuals.i_id')
564                    ->where('l1.l_type', '=', 'FAMC');
565            });
566
567            if ($father_name) {
568                $query->join('link AS l2', static function (JoinClause $join): void {
569                    $join
570                        ->on('l2.l_file', '=', 'l1.l_file')
571                        ->on('l2.l_from', '=', 'l1.l_to')
572                        ->where('l2.l_type', '=', 'HUSB');
573                });
574                $query->join('name AS father_name', static function (JoinClause $join): void {
575                    $join
576                        ->on('father_name.n_file', '=', 'l2.l_file')
577                        ->on('father_name.n_id', '=', 'l2.l_to');
578                });
579            }
580
581            if ($mother_name) {
582                $query->join('link AS l3', static function (JoinClause $join): void {
583                    $join
584                        ->on('l3.l_file', '=', 'l1.l_file')
585                        ->on('l3.l_from', '=', 'l1.l_to')
586                        ->where('l3.l_type', '=', 'WIFE');
587                });
588                $query->join('name AS mother_name', static function (JoinClause $join): void {
589                    $join
590                        ->on('mother_name.n_file', '=', 'l3.l_file')
591                        ->on('mother_name.n_id', '=', 'l3.l_to');
592                });
593            }
594        }
595
596        if ($spouse_family) {
597            $query->join('link AS l4', static function (JoinClause $join): void {
598                $join
599                    ->on('l4.l_file', '=', 'individuals.i_file')
600                    ->on('l4.l_from', '=', 'individuals.i_id')
601                    ->where('l4.l_type', '=', 'FAMS');
602            });
603            $query->join('families AS spouse_families', static function (JoinClause $join): void {
604                $join
605                    ->on('spouse_families.f_file', '=', 'l4.l_file')
606                    ->on('spouse_families.f_id', '=', 'l4.l_to');
607            });
608        }
609
610        if ($indi_name) {
611            $query->join('name AS individual_name', static function (JoinClause $join): void {
612                $join
613                    ->on('individual_name.n_file', '=', 'individuals.i_file')
614                    ->on('individual_name.n_id', '=', 'individuals.i_id');
615            });
616        }
617
618        foreach (array_unique($indi_dates) as $indi_date) {
619            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
620                $join
621                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
622                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
623            });
624        }
625
626        foreach (array_unique($fam_dates) as $fam_date) {
627            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
628                $join
629                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
630                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
631            });
632        }
633
634        if ($indi_plac) {
635            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
636                $join
637                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
638                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
639            });
640            $query->join('places AS individual_places', static function (JoinClause $join): void {
641                $join
642                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
643                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
644            });
645        }
646
647        if ($fam_plac) {
648            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
649                $join
650                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
651                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
652            });
653            $query->join('places AS family_places', static function (JoinClause $join): void {
654                $join
655                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
656                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
657            });
658        }
659
660        foreach ($fields as $field_name => $field_value) {
661            $parts = explode(':', $field_name . ':::');
662            if (str_starts_with($field_name, 'INDI:NAME:')) {
663                switch ($field_name) {
664                    case 'INDI:NAME:GIVN':
665                        switch ($modifiers[$field_name]) {
666                            case 'EXACT':
667                                $query->where('individual_name.n_givn', '=', $field_value);
668                                break;
669                            case 'BEGINS':
670                                $query->where('individual_name.n_givn', $this->iLike(), $field_value . '%');
671                                break;
672                            case 'CONTAINS':
673                                $query->where('individual_name.n_givn', $this->iLike(), '%' . $field_value . '%');
674                                break;
675                            case 'SDX_STD':
676                                $sdx = Soundex::russell($field_value);
677                                if ($sdx !== '') {
678                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
679                                } else {
680                                    // No phonetic content? Use a substring match
681                                    $query->where('individual_name.n_givn', $this->iLike(), '%' . $field_value . '%');
682                                }
683                                break;
684                            case 'SDX': // SDX uses DM by default.
685                            case 'SDX_DM':
686                                $sdx = Soundex::daitchMokotoff($field_value);
687                                if ($sdx !== '') {
688                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
689                                } else {
690                                    // No phonetic content? Use a substring match
691                                    $query->where('individual_name.n_givn', $this->iLike(), '%' . $field_value . '%');
692                                }
693                                break;
694                        }
695                        unset($fields[$field_name]);
696                        break;
697                    case 'INDI:NAME:SURN':
698                        switch ($modifiers[$field_name]) {
699                            case 'EXACT':
700                                $query->where(function (Builder $query) use ($field_value): void {
701                                    $query
702                                        ->where('individual_name.n_surn', '=', $field_value)
703                                        ->orWhere('individual_name.n_surname', '=', $field_value);
704                                });
705                                break;
706                            case 'BEGINS':
707                                $query->where(function (Builder $query) use ($field_value): void {
708                                    $query
709                                        ->where('individual_name.n_surn', $this->iLike(), $field_value . '%')
710                                        ->orWhere('individual_name.n_surname', $this->iLike(), $field_value . '%');
711                                });
712                                break;
713                            case 'CONTAINS':
714                                $query->where(function (Builder $query) use ($field_value): void {
715                                    $query
716                                        ->where('individual_name.n_surn', $this->iLike(), '%' . $field_value . '%')
717                                        ->orWhere('individual_name.n_surname', $this->iLike(), '%' . $field_value . '%');
718                                });
719                                break;
720                            case 'SDX_STD':
721                                $sdx = Soundex::russell($field_value);
722                                if ($sdx !== '') {
723                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
724                                } else {
725                                    // No phonetic content? Use a substring match
726                                    $query->where(function (Builder $query) use ($field_value): void {
727                                        $query
728                                            ->where('individual_name.n_surn', $this->iLike(), '%' . $field_value . '%')
729                                            ->orWhere('individual_name.n_surname', $this->iLike(), '%' . $field_value . '%');
730                                    });
731                                }
732                                break;
733                            case 'SDX': // SDX uses DM by default.
734                            case 'SDX_DM':
735                                $sdx = Soundex::daitchMokotoff($field_value);
736                                if ($sdx !== '') {
737                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
738                                } else {
739                                    // No phonetic content? Use a substring match
740                                    $query->where(function (Builder $query) use ($field_value): void {
741                                        $query
742                                            ->where('individual_name.n_surn', $this->iLike(), '%' . $field_value . '%')
743                                            ->orWhere('individual_name.n_surname', $this->iLike(), '%' . $field_value . '%');
744                                    });
745                                }
746                                break;
747                        }
748                        unset($fields[$field_name]);
749                        break;
750                    case 'INDI:NAME:NICK':
751                    case 'INDI:NAME:_MARNM':
752                    case 'INDI:NAME:_HEB':
753                    case 'INDI:NAME:_AKA':
754                        $like = "%\n1 NAME%\n2 " . $parts[2] . ' %' . preg_quote($field_value, '/') . '%';
755                        $query->where('individuals.i_gedcom', $this->iLike(), $like);
756                        break;
757                }
758            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':DATE')) {
759                $date = new Date($field_value);
760                if ($date->isOK()) {
761                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
762                    $query
763                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
764                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
765                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
766                }
767                unset($fields[$field_name]);
768            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':DATE')) {
769                $date = new Date($field_value);
770                if ($date->isOK()) {
771                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
772                    $query
773                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
774                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
775                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
776                }
777                unset($fields[$field_name]);
778            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
779                // SQL can only link a place to a person/family, not to an event.
780                $query->where('individual_places.p_id', '=', $field_value);
781            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
782                // SQL can only link a place to a person/family, not to an event.
783                $query->where('family_places.p_id', '=', $field_value);
784            } elseif (str_starts_with($field_name, 'MOTHER:NAME:') || str_starts_with($field_name, 'FATHER:NAME:')) {
785                $table = str_starts_with($field_name, 'FATHER:NAME:') ? 'father_name' : 'mother_name';
786                switch ($parts[2]) {
787                    case 'GIVN':
788                        switch ($modifiers[$field_name]) {
789                            case 'EXACT':
790                                $query->where($table . '.n_givn', '=', $field_value);
791                                break;
792                            case 'BEGINS':
793                                $query->where($table . '.n_givn', $this->iLike(), $field_value . '%');
794                                break;
795                            case 'CONTAINS':
796                                $query->where($table . '.n_givn', $this->iLike(), '%' . $field_value . '%');
797                                break;
798                            case 'SDX_STD':
799                                $sdx = Soundex::russell($field_value);
800                                if ($sdx !== '') {
801                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
802                                } else {
803                                    // No phonetic content? Use a substring match
804                                    $query->where($table . '.n_givn', $this->iLike(), '%' . $field_value . '%');
805                                }
806                                break;
807                            case 'SDX': // SDX uses DM by default.
808                            case 'SDX_DM':
809                                $sdx = Soundex::daitchMokotoff($field_value);
810                                if ($sdx !== '') {
811                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
812                                } else {
813                                    // No phonetic content? Use a substring match
814                                    $query->where($table . '.n_givn', $this->iLike(), '%' . $field_value . '%');
815                                }
816                                break;
817                        }
818                        break;
819                    case 'SURN':
820                        switch ($modifiers[$field_name]) {
821                            case 'EXACT':
822                                $query->where($table . '.n_surn', '=', $field_value);
823                                break;
824                            case 'BEGINS':
825                                $query->where($table . '.n_surn', $this->iLike(), $field_value . '%');
826                                break;
827                            case 'CONTAINS':
828                                $query->where($table . '.n_surn', $this->iLike(), '%' . $field_value . '%');
829                                break;
830                            case 'SDX_STD':
831                                $sdx = Soundex::russell($field_value);
832                                if ($sdx !== '') {
833                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
834                                } else {
835                                    // No phonetic content? Use a substring match
836                                    $query->where($table . '.n_surn', $this->iLike(), '%' . $field_value . '%');
837                                }
838                                break;
839                            case 'SDX': // SDX uses DM by default.
840                            case 'SDX_DM':
841                                $sdx = Soundex::daitchMokotoff($field_value);
842                                if ($sdx !== '') {
843                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
844                                } else {
845                                    // No phonetic content? Use a substring match
846                                    $query->where($table . '.n_surn', $this->iLike(), '%' . $field_value . '%');
847                                }
848                                break;
849                        }
850                        break;
851                }
852                unset($fields[$field_name]);
853            } elseif (str_starts_with($field_name, 'FAM:')) {
854                // e.g. searches for occupation, religion, note, etc.
855                // Initial matching only.  Need PHP to apply filter.
856                $query->where('spouse_families.f_gedcom', $this->iLike(), "%\n1 " . $parts[1] . ' %' . $field_value . '%');
857            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':TYPE')) {
858                // Initial matching only.  Need PHP to apply filter.
859                $query->where('individuals.i_gedcom', $this->iLike(), "%\n1 " . $parts[1] . "%\n2 TYPE %" . $field_value . '%');
860            } elseif (str_starts_with($field_name, 'INDI:')) {
861                // e.g. searches for occupation, religion, note, etc.
862                // Initial matching only.  Need PHP to apply filter.
863                $query->where('individuals.i_gedcom', $this->iLike(), "%\n1 " . $parts[1] . '%' . $parts[2] . '%' . $field_value . '%');
864            }
865        }
866
867        return $query
868            ->get()
869            ->each($this->rowLimiter())
870            ->map($this->individualRowMapper())
871            ->filter(GedcomRecord::accessFilter())
872            ->filter(static function (Individual $individual) use ($fields, $tree): bool {
873                // Check for searches which were only partially matched by SQL
874                foreach ($fields as $field_name => $field_value) {
875                    $parts = explode(':', $field_name . '::::');
876
877                    if (str_starts_with($field_name, 'INDI:NAME:') && $field_name !== 'INDI:NAME:GIVN' && $field_name !== 'INDI:NAME:SURN') {
878                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[2] . ' .*' . preg_quote($field_value, '/') . '/i';
879
880                        if (preg_match($regex, $individual->gedcom()) === 1) {
881                            continue;
882                        }
883
884                        return false;
885                    }
886
887                    if (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
888                        $place = Place::find((int) $field_value, $tree);
889
890                        foreach ($individual->facts([$parts[1]]) as $fact) {
891                            if ($fact->place()->gedcomName() === $place->gedcomName() || str_ends_with($fact->place()->gedcomName(), ', ' . $place->gedcomName())) {
892                                continue 2;
893                            }
894                        }
895                        return false;
896                    }
897
898                    if (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
899                        $place = Place::find((int) $field_value, $tree);
900
901                        foreach ($individual->spouseFamilies() as $family) {
902                            foreach ($family->facts([$parts[1]]) as $fact) {
903                                if ($fact->place()->gedcomName() === $place->gedcomName() || str_ends_with($fact->place()->gedcomName(), ', ' . $place->gedcomName())) {
904                                    continue 3;
905                                }
906                            }
907                        }
908                        return false;
909                    }
910
911                    $regex = '/' . preg_quote($field_value, '/') . '/i';
912
913                    if ($field_name === 'INDI:FACT:TYPE' || $field_name === 'INDI:EVEN:TYPE' || $field_name === 'INDI:CHAN:_WT_USER') {
914                        foreach ($individual->facts([$parts[1]]) as $fact) {
915                            if (preg_match($regex, $fact->attribute($parts[2])) === 1) {
916                                continue 2;
917                            }
918                        }
919
920                        return false;
921                    }
922
923                    if (str_starts_with($field_name, 'INDI:')) {
924                        foreach ($individual->facts([$parts[1]]) as $fact) {
925                            if (preg_match($regex, $fact->value()) === 1) {
926                                continue 2;
927                            }
928                        }
929
930                        return false;
931                    }
932
933                    if (str_starts_with($field_name, 'FAM:')) {
934                        foreach ($individual->spouseFamilies() as $family) {
935                            foreach ($family->facts([$parts[1]]) as $fact) {
936                                if (preg_match($regex, $fact->value()) === 1) {
937                                    continue 3;
938                                }
939                            }
940                        }
941                        return false;
942                    }
943                }
944
945                return true;
946            });
947    }
948
949    /**
950     * @param string      $soundex
951     * @param string      $lastname
952     * @param string      $firstname
953     * @param string      $place
954     * @param array<Tree> $search_trees
955     *
956     * @return Collection<int,Individual>
957     */
958    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
959    {
960        switch ($soundex) {
961            default:
962            case 'Russell':
963                $givn_sdx   = Soundex::russell($firstname);
964                $surn_sdx   = Soundex::russell($lastname);
965                $plac_sdx   = Soundex::russell($place);
966                $givn_field = 'n_soundex_givn_std';
967                $surn_field = 'n_soundex_surn_std';
968                $plac_field = 'p_std_soundex';
969                break;
970            case 'DaitchM':
971                $givn_sdx   = Soundex::daitchMokotoff($firstname);
972                $surn_sdx   = Soundex::daitchMokotoff($lastname);
973                $plac_sdx   = Soundex::daitchMokotoff($place);
974                $givn_field = 'n_soundex_givn_dm';
975                $surn_field = 'n_soundex_surn_dm';
976                $plac_field = 'p_dm_soundex';
977                break;
978        }
979
980        // Nothing to search for? Return nothing.
981        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
982            return new Collection();
983        }
984
985        $query = DB::table('individuals')
986            ->select(['individuals.*'])
987            ->distinct();
988
989        $this->whereTrees($query, 'i_file', $search_trees);
990
991        if ($plac_sdx !== '') {
992            $query->join('placelinks', static function (JoinClause $join): void {
993                $join
994                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
995                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
996            });
997            $query->join('places', static function (JoinClause $join): void {
998                $join
999                    ->on('places.p_file', '=', 'placelinks.pl_file')
1000                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
1001            });
1002
1003            $this->wherePhonetic($query, $plac_field, $plac_sdx);
1004        }
1005
1006        if ($givn_sdx !== '' || $surn_sdx !== '') {
1007            $query->join('name', static function (JoinClause $join): void {
1008                $join
1009                    ->on('name.n_file', '=', 'individuals.i_file')
1010                    ->on('name.n_id', '=', 'individuals.i_id');
1011            });
1012
1013            $this->wherePhonetic($query, $givn_field, $givn_sdx);
1014            $this->wherePhonetic($query, $surn_field, $surn_sdx);
1015        }
1016
1017        return $query
1018            ->get()
1019            ->each($this->rowLimiter())
1020            ->map($this->individualRowMapper())
1021            ->filter(GedcomRecord::accessFilter());
1022    }
1023
1024    /**
1025     * Paginate a search query.
1026     *
1027     * @param Builder $query      Searches the database for the desired records.
1028     * @param Closure $row_mapper Converts a row from the query into a record.
1029     * @param Closure $row_filter
1030     * @param int     $offset     Skip this many rows.
1031     * @param int     $limit      Take this many rows.
1032     *
1033     * @return Collection<int,mixed>
1034     */
1035    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
1036    {
1037        $collection = new Collection();
1038
1039        foreach ($query->cursor() as $row) {
1040            $record = $row_mapper($row);
1041            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
1042            // where individuals have multiple names - and we need to sort results by name.
1043            if ($collection->containsStrict($record)) {
1044                continue;
1045            }
1046            // If the object has a method "canShow()", then use it to filter for privacy.
1047            if ($row_filter($record)) {
1048                if ($offset > 0) {
1049                    $offset--;
1050                } else {
1051                    if ($limit > 0) {
1052                        $collection->push($record);
1053                    }
1054
1055                    $limit--;
1056
1057                    if ($limit === 0) {
1058                        break;
1059                    }
1060                }
1061            }
1062        }
1063
1064
1065        return $collection;
1066    }
1067
1068    /**
1069     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
1070     *
1071     * @param Builder           $query
1072     * @param Expression|string $column
1073     * @param array<string>     $search_terms
1074     */
1075    private function whereSearch(Builder $query, Expression|string $column, array $search_terms): void
1076    {
1077        foreach ($search_terms as $search_term) {
1078            $query->where($column, $this->iLike(), '%' . addcslashes($search_term, '\\%_') . '%');
1079        }
1080    }
1081
1082    /**
1083     * Apply soundex search filters to a SQL query column.
1084     *
1085     * @param Builder           $query
1086     * @param Expression|string $field
1087     * @param string            $soundex
1088     */
1089    private function wherePhonetic(Builder $query, $field, string $soundex): void
1090    {
1091        if ($soundex !== '') {
1092            $query->where(function (Builder $query) use ($soundex, $field): void {
1093                foreach (explode(':', $soundex) as $sdx) {
1094                    $query->orWhere($field, $this->iLike(), '%' . $sdx . '%');
1095                }
1096            });
1097        }
1098    }
1099
1100    /**
1101     * @param Builder     $query
1102     * @param string      $tree_id_field
1103     * @param array<Tree> $trees
1104     */
1105    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1106    {
1107        $tree_ids = array_map(static function (Tree $tree): int {
1108            return $tree->id();
1109        }, $trees);
1110
1111        $query->whereIn($tree_id_field, $tree_ids);
1112    }
1113
1114    /**
1115     * Find the media object that uses a particular media file.
1116     *
1117     * @param string $file
1118     *
1119     * @return array<Media>
1120     */
1121    public function findMediaObjectsForMediaFile(string $file): array
1122    {
1123        return DB::table('media')
1124            ->join('media_file', static function (JoinClause $join): void {
1125                $join
1126                    ->on('media_file.m_file', '=', 'media.m_file')
1127                    ->on('media_file.m_id', '=', 'media.m_id');
1128            })
1129            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1130            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1131            ->select(['media.*'])
1132            ->distinct()
1133            ->get()
1134            ->map($this->mediaRowMapper())
1135            ->all();
1136    }
1137
1138    /**
1139     * A closure to filter records by privacy-filtered GEDCOM data.
1140     *
1141     * @param array<string> $search_terms
1142     *
1143     * @return Closure(GedcomRecord):bool
1144     */
1145    private function rawGedcomFilter(array $search_terms): Closure
1146    {
1147        return static function (GedcomRecord $record) use ($search_terms): bool {
1148            // Ignore non-genealogy fields
1149            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1150
1151            // Ignore matches in links
1152            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1153
1154            // Re-apply the filtering
1155            foreach ($search_terms as $search_term) {
1156                if (mb_stripos($gedcom, $search_term) === false) {
1157                    return false;
1158                }
1159            }
1160
1161            return true;
1162        };
1163    }
1164
1165    /**
1166     * Searching for short or common text can give more results than the system can process.
1167     *
1168     * @param int $limit
1169     *
1170     * @return Closure():void
1171     */
1172    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1173    {
1174        return static function () use ($limit): void {
1175            static $n = 0;
1176
1177            if (++$n > $limit) {
1178                $message = I18N::translate('The search returned too many results.');
1179
1180                throw new HttpServiceUnavailableException($message);
1181            }
1182        };
1183    }
1184
1185    /**
1186     * Convert a row from any tree in the families table into a family object.
1187     *
1188     * @return Closure(object):Family
1189     */
1190    private function familyRowMapper(): Closure
1191    {
1192        return function (object $row): Family {
1193            $tree = $this->tree_service->find((int) $row->f_file);
1194
1195            return Registry::familyFactory()->mapper($tree)($row);
1196        };
1197    }
1198
1199    /**
1200     * Convert a row from any tree in the individuals table into an individual object.
1201     *
1202     * @return Closure(object):Individual
1203     */
1204    private function individualRowMapper(): Closure
1205    {
1206        return function (object $row): Individual {
1207            $tree = $this->tree_service->find((int) $row->i_file);
1208
1209            return Registry::individualFactory()->mapper($tree)($row);
1210        };
1211    }
1212
1213    /**
1214     * Convert a row from any tree in the media table into a location object.
1215     *
1216     * @return Closure(object):Location
1217     */
1218    private function locationRowMapper(): Closure
1219    {
1220        return function (object $row): Location {
1221            $tree = $this->tree_service->find((int) $row->o_file);
1222
1223            return Registry::locationFactory()->mapper($tree)($row);
1224        };
1225    }
1226
1227    /**
1228     * Convert a row from any tree in the media table into an media object.
1229     *
1230     * @return Closure(object):Media
1231     */
1232    private function mediaRowMapper(): Closure
1233    {
1234        return function (object $row): Media {
1235            $tree = $this->tree_service->find((int) $row->m_file);
1236
1237            return Registry::mediaFactory()->mapper($tree)($row);
1238        };
1239    }
1240
1241    /**
1242     * Convert a row from any tree in the other table into a note object.
1243     *
1244     * @return Closure:Note
1245     */
1246    private function noteRowMapper(): Closure
1247    {
1248        return function (object $row): Note {
1249            $tree = $this->tree_service->find((int) $row->o_file);
1250
1251            return Registry::noteFactory()->mapper($tree)($row);
1252        };
1253    }
1254
1255    /**
1256     * Convert a row from any tree in the other table into a repository object.
1257     *
1258     * @return Closure:Repository
1259     */
1260    private function repositoryRowMapper(): Closure
1261    {
1262        return function (object $row): Repository {
1263            $tree = $this->tree_service->find((int) $row->o_file);
1264
1265            return Registry::repositoryFactory()->mapper($tree)($row);
1266        };
1267    }
1268
1269    /**
1270     * Convert a row from any tree in the other table into a note object.
1271     *
1272     * @return Closure(object):SharedNote
1273     */
1274    private function sharedNoteRowMapper(): Closure
1275    {
1276        return function (object $row): Note {
1277            $tree = $this->tree_service->find((int) $row->o_file);
1278
1279            return Registry::sharedNoteFactory()->mapper($tree)($row);
1280        };
1281    }
1282
1283    /**
1284     * Convert a row from any tree in the sources table into a source object.
1285     *
1286     * @return Closure:Source
1287     */
1288    private function sourceRowMapper(): Closure
1289    {
1290        return function (object $row): Source {
1291            $tree = $this->tree_service->find((int) $row->s_file);
1292
1293            return Registry::sourceFactory()->mapper($tree)($row);
1294        };
1295    }
1296
1297    /**
1298     * Convert a row from any tree in the other table into a submission object.
1299     *
1300     * @return Closure(object):Submission
1301     */
1302    private function submissionRowMapper(): Closure
1303    {
1304        return function (object $row): Submission {
1305            $tree = $this->tree_service->find((int) $row->o_file);
1306
1307            return Registry::submissionFactory()->mapper($tree)($row);
1308        };
1309    }
1310
1311    /**
1312     * Convert a row from any tree in the other table into a submitter object.
1313     *
1314     * @return Closure(object):Submitter
1315     */
1316    private function submitterRowMapper(): Closure
1317    {
1318        return function (object $row): Submitter {
1319            $tree = $this->tree_service->find((int) $row->o_file);
1320
1321            return Registry::submitterFactory()->mapper($tree)($row);
1322        };
1323    }
1324
1325    /**
1326     * @internal - a better solution would support other RDBMS, probably by using collations.
1327     */
1328    private function iLike(): string
1329    {
1330        if (DB::connection()->getDriverName() === 'pgsql') {
1331            return 'ILIKE';
1332        }
1333
1334        return 'LIKE';
1335    }
1336}
1337