xref: /webtrees/app/Services/SearchService.php (revision e93a8df2f8d797005750082cc3766c0e80799688)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Family;
25use Fisharebest\Webtrees\Gedcom;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Http\Exceptions\HttpServiceUnavailableException;
28use Fisharebest\Webtrees\I18N;
29use Fisharebest\Webtrees\Individual;
30use Fisharebest\Webtrees\Location;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Place;
34use Fisharebest\Webtrees\Registry;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\SharedNote;
37use Fisharebest\Webtrees\Soundex;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submission;
40use Fisharebest\Webtrees\Submitter;
41use Fisharebest\Webtrees\Tree;
42use Illuminate\Database\Capsule\Manager as DB;
43use Illuminate\Database\Query\Builder;
44use Illuminate\Database\Query\Expression;
45use Illuminate\Database\Query\JoinClause;
46use Illuminate\Support\Collection;
47
48use function addcslashes;
49use function array_filter;
50use function array_map;
51use function array_unique;
52use function explode;
53use function implode;
54use function is_string;
55use function mb_stripos;
56use function preg_match;
57use function preg_quote;
58use function preg_replace;
59use function str_ends_with;
60use function str_starts_with;
61
62use const PHP_INT_MAX;
63
64/**
65 * Search trees for genealogy records.
66 */
67class SearchService
68{
69    // Do not attempt to show search results larger than this/
70    protected const MAX_SEARCH_RESULTS = 5000;
71
72    private TreeService $tree_service;
73
74    /**
75     * @param TreeService $tree_service
76     */
77    public function __construct(
78        TreeService $tree_service
79    ) {
80        $this->tree_service = $tree_service;
81    }
82
83    /**
84     * @param array<Tree>   $trees
85     * @param array<string> $search
86     *
87     * @return Collection<int,Family>
88     */
89    public function searchFamilies(array $trees, array $search): Collection
90    {
91        $query = DB::table('families');
92
93        $this->whereTrees($query, 'f_file', $trees);
94        $this->whereSearch($query, 'f_gedcom', $search);
95
96        return $query
97            ->get()
98            ->each($this->rowLimiter())
99            ->map($this->familyRowMapper())
100            ->filter(GedcomRecord::accessFilter())
101            ->filter($this->rawGedcomFilter($search));
102    }
103
104    /**
105     * Search for families by name.
106     *
107     * @param array<Tree>   $trees
108     * @param array<string> $search
109     * @param int           $offset
110     * @param int           $limit
111     *
112     * @return Collection<int,Family>
113     */
114    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
115    {
116        $query = DB::table('families')
117            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
118                $join
119                    ->on('husb_name.n_file', '=', 'families.f_file')
120                    ->on('husb_name.n_id', '=', 'families.f_husb')
121                    ->where('husb_name.n_type', '<>', '_MARNM');
122            })
123            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
124                $join
125                    ->on('wife_name.n_file', '=', 'families.f_file')
126                    ->on('wife_name.n_id', '=', 'families.f_wife')
127                    ->where('wife_name.n_type', '<>', '_MARNM');
128            });
129
130        $prefix = DB::connection()->getTablePrefix();
131        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
132
133        $this->whereTrees($query, 'f_file', $trees);
134        $this->whereSearch($query, $field, $search);
135
136        $query
137            ->orderBy('husb_name.n_sort')
138            ->orderBy('wife_name.n_sort')
139            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
140
141        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
142    }
143
144    /**
145     * @param Place $place
146     *
147     * @return Collection<int,Family>
148     */
149    public function searchFamiliesInPlace(Place $place): Collection
150    {
151        return DB::table('families')
152            ->join('placelinks', static function (JoinClause $query) {
153                $query
154                    ->on('families.f_file', '=', 'placelinks.pl_file')
155                    ->on('families.f_id', '=', 'placelinks.pl_gid');
156            })
157            ->where('f_file', '=', $place->tree()->id())
158            ->where('pl_p_id', '=', $place->id())
159            ->select(['families.*'])
160            ->get()
161            ->each($this->rowLimiter())
162            ->map($this->familyRowMapper())
163            ->filter(GedcomRecord::accessFilter());
164    }
165
166    /**
167     * @param array<Tree>   $trees
168     * @param array<string> $search
169     *
170     * @return Collection<int,Individual>
171     */
172    public function searchIndividuals(array $trees, array $search): Collection
173    {
174        $query = DB::table('individuals');
175
176        $this->whereTrees($query, 'i_file', $trees);
177        $this->whereSearch($query, 'i_gedcom', $search);
178
179        return $query
180            ->get()
181            ->each($this->rowLimiter())
182            ->map($this->individualRowMapper())
183            ->filter(GedcomRecord::accessFilter())
184            ->filter($this->rawGedcomFilter($search));
185    }
186
187    /**
188     * Search for individuals by name.
189     *
190     * @param array<Tree>   $trees
191     * @param array<string> $search
192     * @param int           $offset
193     * @param int           $limit
194     *
195     * @return Collection<int,Individual>
196     */
197    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
198    {
199        $query = DB::table('individuals')
200            ->join('name', static function (JoinClause $join): void {
201                $join
202                    ->on('name.n_file', '=', 'individuals.i_file')
203                    ->on('name.n_id', '=', 'individuals.i_id');
204            })
205            ->orderBy('n_sort')
206            ->select(['individuals.*', 'n_sort']);
207
208        $this->whereTrees($query, 'i_file', $trees);
209        $this->whereSearch($query, 'n_full', $search);
210
211        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
212    }
213
214    /**
215     * @param Place $place
216     *
217     * @return Collection<int,Individual>
218     */
219    public function searchIndividualsInPlace(Place $place): Collection
220    {
221        return DB::table('individuals')
222            ->join('placelinks', static function (JoinClause $join) {
223                $join
224                    ->on('i_file', '=', 'pl_file')
225                    ->on('i_id', '=', 'pl_gid');
226            })
227            ->where('i_file', '=', $place->tree()->id())
228            ->where('pl_p_id', '=', $place->id())
229            ->select(['individuals.*'])
230            ->get()
231            ->each($this->rowLimiter())
232            ->map($this->individualRowMapper())
233            ->filter(GedcomRecord::accessFilter());
234    }
235
236    /**
237     * Search for submissions.
238     *
239     * @param array<Tree>   $trees
240     * @param array<string> $search
241     * @param int           $offset
242     * @param int           $limit
243     *
244     * @return Collection<int,Location>
245     */
246    public function searchLocations(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
247    {
248        $query = DB::table('other')
249            ->where('o_type', '=', Location::RECORD_TYPE);
250
251        $this->whereTrees($query, 'o_file', $trees);
252        $this->whereSearch($query, 'o_gedcom', $search);
253
254        return $this->paginateQuery($query, $this->locationRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
255    }
256
257    /**
258     * Search for media objects.
259     *
260     * @param array<Tree>   $trees
261     * @param array<string> $search
262     * @param int           $offset
263     * @param int           $limit
264     *
265     * @return Collection<int,Media>
266     */
267    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
268    {
269        $query = DB::table('media');
270
271        $this->whereTrees($query, 'media.m_file', $trees);
272        $this->whereSearch($query, 'm_gedcom', $search);
273
274        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
275    }
276
277    /**
278     * Search for notes.
279     *
280     * @param array<Tree>   $trees
281     * @param array<string> $search
282     * @param int           $offset
283     * @param int           $limit
284     *
285     * @return Collection<int,Note>
286     */
287    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
288    {
289        $query = DB::table('other')
290            ->where('o_type', '=', Note::RECORD_TYPE);
291
292        $this->whereTrees($query, 'o_file', $trees);
293        $this->whereSearch($query, 'o_gedcom', $search);
294
295        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
296    }
297
298    /**
299     * Search for notes.
300     *
301     * @param array<Tree>   $trees
302     * @param array<string> $search
303     * @param int           $offset
304     * @param int           $limit
305     *
306     * @return Collection<int,SharedNote>
307     */
308    public function searchSharedNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
309    {
310        $query = DB::table('other')
311            ->where('o_type', '=', SharedNote::RECORD_TYPE);
312
313        $this->whereTrees($query, 'o_file', $trees);
314        $this->whereSearch($query, 'o_gedcom', $search);
315
316        return $this->paginateQuery($query, $this->sharedNoteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
317    }
318
319    /**
320     * Search for repositories.
321     *
322     * @param array<Tree>   $trees
323     * @param array<string> $search
324     * @param int           $offset
325     * @param int           $limit
326     *
327     * @return Collection<int,Repository>
328     */
329    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
330    {
331        $query = DB::table('other')
332            ->where('o_type', '=', Repository::RECORD_TYPE);
333
334        $this->whereTrees($query, 'o_file', $trees);
335        $this->whereSearch($query, 'o_gedcom', $search);
336
337        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
338    }
339
340    /**
341     * Search for sources.
342     *
343     * @param array<Tree>   $trees
344     * @param array<string> $search
345     * @param int      $offset
346     * @param int      $limit
347     *
348     * @return Collection<int,Source>
349     */
350    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
351    {
352        $query = DB::table('sources');
353
354        $this->whereTrees($query, 's_file', $trees);
355        $this->whereSearch($query, 's_gedcom', $search);
356
357        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
358    }
359
360    /**
361     * Search for sources by name.
362     *
363     * @param array<Tree>   $trees
364     * @param array<string> $search
365     * @param int           $offset
366     * @param int           $limit
367     *
368     * @return Collection<int,Source>
369     */
370    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
371    {
372        $query = DB::table('sources')
373            ->orderBy('s_name');
374
375        $this->whereTrees($query, 's_file', $trees);
376        $this->whereSearch($query, 's_name', $search);
377
378        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
379    }
380
381    /**
382     * Search for sources.
383     *
384     * @param array<Tree>   $trees
385     * @param array<string> $search
386     * @param int           $offset
387     * @param int           $limit
388     *
389     * @return Collection<int,string>
390     */
391    public function searchSurnames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
392    {
393        $query = DB::table('name');
394
395        $this->whereTrees($query, 'n_file', $trees);
396        $this->whereSearch($query, 'n_surname', $search);
397
398        return $query
399            ->groupBy(['n_surname'])
400            ->orderBy('n_surname')
401            ->skip($offset)
402            ->take($limit)
403            ->pluck('n_surname');
404    }
405
406    /**
407     * Search for submissions.
408     *
409     * @param array<Tree>   $trees
410     * @param array<string> $search
411     * @param int           $offset
412     * @param int           $limit
413     *
414     * @return Collection<int,Submission>
415     */
416    public function searchSubmissions(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
417    {
418        $query = DB::table('other')
419            ->where('o_type', '=', Submission::RECORD_TYPE);
420
421        $this->whereTrees($query, 'o_file', $trees);
422        $this->whereSearch($query, 'o_gedcom', $search);
423
424        return $this->paginateQuery($query, $this->submissionRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
425    }
426
427    /**
428     * Search for submitters.
429     *
430     * @param array<Tree>   $trees
431     * @param array<string> $search
432     * @param int           $offset
433     * @param int           $limit
434     *
435     * @return Collection<int,Submitter>
436     */
437    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
438    {
439        $query = DB::table('other')
440            ->where('o_type', '=', Submitter::RECORD_TYPE);
441
442        $this->whereTrees($query, 'o_file', $trees);
443        $this->whereSearch($query, 'o_gedcom', $search);
444
445        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
446    }
447
448    /**
449     * Search for places.
450     *
451     * @param Tree   $tree
452     * @param string $search
453     * @param int    $offset
454     * @param int    $limit
455     *
456     * @return Collection<int,Place>
457     */
458    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
459    {
460        $query = DB::table('places AS p0')
461            ->where('p0.p_file', '=', $tree->id())
462            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
463            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
464            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
465            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
466            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
467            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
468            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
469            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
470            ->orderBy('p0.p_place')
471            ->orderBy('p1.p_place')
472            ->orderBy('p2.p_place')
473            ->orderBy('p3.p_place')
474            ->orderBy('p4.p_place')
475            ->orderBy('p5.p_place')
476            ->orderBy('p6.p_place')
477            ->orderBy('p7.p_place')
478            ->orderBy('p8.p_place')
479            ->select([
480                'p0.p_place AS place0',
481                'p1.p_place AS place1',
482                'p2.p_place AS place2',
483                'p3.p_place AS place3',
484                'p4.p_place AS place4',
485                'p5.p_place AS place5',
486                'p6.p_place AS place6',
487                'p7.p_place AS place7',
488                'p8.p_place AS place8',
489            ]);
490
491        // Filter each level of the hierarchy.
492        foreach (explode(',', $search, 9) as $level => $string) {
493            $query->where('p' . $level . '.p_place', 'LIKE', '%' . addcslashes($string, '\\%_') . '%');
494        }
495
496        $row_mapper = static function (object $row) use ($tree): Place {
497            $place = implode(', ', array_filter((array) $row));
498
499            return new Place($place, $tree);
500        };
501
502        $filter = static function (): bool {
503            return true;
504        };
505
506        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
507    }
508
509    /**
510     * @param array<Tree>          $trees
511     * @param array<string,string> $fields
512     * @param array<string,string> $modifiers
513     *
514     * @return Collection<int,Individual>
515     */
516    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
517    {
518        $fields = array_filter($fields, static fn (string $x): bool => $x !== '');
519
520        $query = DB::table('individuals')
521            ->select(['individuals.*'])
522            ->distinct();
523
524        $this->whereTrees($query, 'i_file', $trees);
525
526        // Join the following tables
527        $father_name   = false;
528        $mother_name   = false;
529        $spouse_family = false;
530        $indi_name     = false;
531        $indi_dates    = [];
532        $fam_dates     = [];
533        $indi_plac     = false;
534        $fam_plac      = false;
535
536        foreach ($fields as $field_name => $field_value) {
537            if (str_starts_with($field_name, 'FATHER:NAME')) {
538                $father_name = true;
539            } elseif (str_starts_with($field_name, 'MOTHER:NAME')) {
540                $mother_name = true;
541            } elseif (str_starts_with($field_name, 'INDI:NAME:GIVN')) {
542                $indi_name = true;
543            } elseif (str_starts_with($field_name, 'INDI:NAME:SURN')) {
544                $indi_name = true;
545            } elseif (str_starts_with($field_name, 'FAM:')) {
546                $spouse_family = true;
547                if (str_ends_with($field_name, ':DATE')) {
548                    $fam_dates[] = explode(':', $field_name)[1];
549                } elseif (str_ends_with($field_name, ':PLAC')) {
550                    $fam_plac = true;
551                }
552            } elseif (str_starts_with($field_name, 'INDI:')) {
553                if (str_ends_with($field_name, ':DATE')) {
554                    $indi_dates[] = explode(':', $field_name)[1];
555                } elseif (str_ends_with($field_name, ':PLAC')) {
556                    $indi_plac = true;
557                }
558            }
559        }
560
561        if ($father_name || $mother_name) {
562            $query->join('link AS l1', static function (JoinClause $join): void {
563                $join
564                    ->on('l1.l_file', '=', 'individuals.i_file')
565                    ->on('l1.l_from', '=', 'individuals.i_id')
566                    ->where('l1.l_type', '=', 'FAMC');
567            });
568
569            if ($father_name) {
570                $query->join('link AS l2', static function (JoinClause $join): void {
571                    $join
572                        ->on('l2.l_file', '=', 'l1.l_file')
573                        ->on('l2.l_from', '=', 'l1.l_to')
574                        ->where('l2.l_type', '=', 'HUSB');
575                });
576                $query->join('name AS father_name', static function (JoinClause $join): void {
577                    $join
578                        ->on('father_name.n_file', '=', 'l2.l_file')
579                        ->on('father_name.n_id', '=', 'l2.l_to');
580                });
581            }
582
583            if ($mother_name) {
584                $query->join('link AS l3', static function (JoinClause $join): void {
585                    $join
586                        ->on('l3.l_file', '=', 'l1.l_file')
587                        ->on('l3.l_from', '=', 'l1.l_to')
588                        ->where('l3.l_type', '=', 'WIFE');
589                });
590                $query->join('name AS mother_name', static function (JoinClause $join): void {
591                    $join
592                        ->on('mother_name.n_file', '=', 'l3.l_file')
593                        ->on('mother_name.n_id', '=', 'l3.l_to');
594                });
595            }
596        }
597
598        if ($spouse_family) {
599            $query->join('link AS l4', static function (JoinClause $join): void {
600                $join
601                    ->on('l4.l_file', '=', 'individuals.i_file')
602                    ->on('l4.l_from', '=', 'individuals.i_id')
603                    ->where('l4.l_type', '=', 'FAMS');
604            });
605            $query->join('families AS spouse_families', static function (JoinClause $join): void {
606                $join
607                    ->on('spouse_families.f_file', '=', 'l4.l_file')
608                    ->on('spouse_families.f_id', '=', 'l4.l_to');
609            });
610        }
611
612        if ($indi_name) {
613            $query->join('name AS individual_name', static function (JoinClause $join): void {
614                $join
615                    ->on('individual_name.n_file', '=', 'individuals.i_file')
616                    ->on('individual_name.n_id', '=', 'individuals.i_id');
617            });
618        }
619
620        foreach (array_unique($indi_dates) as $indi_date) {
621            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
622                $join
623                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
624                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
625            });
626        }
627
628        foreach (array_unique($fam_dates) as $fam_date) {
629            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
630                $join
631                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
632                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
633            });
634        }
635
636        if ($indi_plac) {
637            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
638                $join
639                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
640                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
641            });
642            $query->join('places AS individual_places', static function (JoinClause $join): void {
643                $join
644                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
645                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
646            });
647        }
648
649        if ($fam_plac) {
650            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
651                $join
652                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
653                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
654            });
655            $query->join('places AS family_places', static function (JoinClause $join): void {
656                $join
657                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
658                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
659            });
660        }
661
662        foreach ($fields as $field_name => $field_value) {
663            $parts = explode(':', $field_name . ':::');
664            if (str_starts_with($field_name, 'INDI:NAME:')) {
665                switch ($field_name) {
666                    case 'INDI:NAME:GIVN':
667                        switch ($modifiers[$field_name]) {
668                            case 'EXACT':
669                                $query->where('individual_name.n_givn', '=', $field_value);
670                                break;
671                            case 'BEGINS':
672                                $query->where('individual_name.n_givn', 'LIKE', $field_value . '%');
673                                break;
674                            case 'CONTAINS':
675                                $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
676                                break;
677                            case 'SDX_STD':
678                                $sdx = Soundex::russell($field_value);
679                                if ($sdx !== '') {
680                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
681                                } else {
682                                    // No phonetic content? Use a substring match
683                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
684                                }
685                                break;
686                            case 'SDX': // SDX uses DM by default.
687                            case 'SDX_DM':
688                                $sdx = Soundex::daitchMokotoff($field_value);
689                                if ($sdx !== '') {
690                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
691                                } else {
692                                    // No phonetic content? Use a substring match
693                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
694                                }
695                                break;
696                        }
697                        unset($fields[$field_name]);
698                        break;
699                    case 'INDI:NAME:SURN':
700                        switch ($modifiers[$field_name]) {
701                            case 'EXACT':
702                                $query->where(function (Builder $query) use ($field_value): void {
703                                    $query
704                                        ->where('individual_name.n_surn', '=', $field_value)
705                                        ->orWhere('individual_name.n_surname', '=', $field_value);
706                                });
707                                break;
708                            case 'BEGINS':
709                                $query->where(function (Builder $query) use ($field_value): void {
710                                    $query
711                                        ->where('individual_name.n_surn', 'LIKE', $field_value . '%')
712                                        ->orWhere('individual_name.n_surname', 'LIKE', $field_value . '%');
713                                });
714                                break;
715                            case 'CONTAINS':
716                                $query->where(function (Builder $query) use ($field_value): void {
717                                    $query
718                                        ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
719                                        ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
720                                });
721                                break;
722                            case 'SDX_STD':
723                                $sdx = Soundex::russell($field_value);
724                                if ($sdx !== '') {
725                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
726                                } else {
727                                    // No phonetic content? Use a substring match
728                                    $query->where(function (Builder $query) use ($field_value): void {
729                                        $query
730                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
731                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
732                                    });
733                                }
734                                break;
735                            case 'SDX': // SDX uses DM by default.
736                            case 'SDX_DM':
737                                $sdx = Soundex::daitchMokotoff($field_value);
738                                if ($sdx !== '') {
739                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
740                                } else {
741                                    // No phonetic content? Use a substring match
742                                    $query->where(function (Builder $query) use ($field_value): void {
743                                        $query
744                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
745                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
746                                    });
747                                }
748                                break;
749                        }
750                        unset($fields[$field_name]);
751                        break;
752                    case 'INDI:NAME:NICK':
753                    case 'INDI:NAME:_MARNM':
754                    case 'INDI:NAME:_HEB':
755                    case 'INDI:NAME:_AKA':
756                        $like = "%\n1 NAME%\n2 " . $parts[2] . ' %' . preg_quote($field_value, '/') . '%';
757                        $query->where('individuals.i_gedcom', 'LIKE', $like);
758                        break;
759                }
760            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':DATE')) {
761                $date = new Date($field_value);
762                if ($date->isOK()) {
763                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
764                    $query
765                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
766                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
767                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
768                }
769                unset($fields[$field_name]);
770            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':DATE')) {
771                $date = new Date($field_value);
772                if ($date->isOK()) {
773                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
774                    $query
775                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
776                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
777                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
778                }
779                unset($fields[$field_name]);
780            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
781                // SQL can only link a place to a person/family, not to an event.
782                $query->where('individual_places.p_place', 'LIKE', '%' . $field_value . '%');
783            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
784                // SQL can only link a place to a person/family, not to an event.
785                $query->where('family_places.p_place', 'LIKE', '%' . $field_value . '%');
786            } elseif (str_starts_with($field_name, 'MOTHER:NAME:') || str_starts_with($field_name, 'FATHER:NAME:')) {
787                $table = str_starts_with($field_name, 'FATHER:NAME:') ? 'father_name' : 'mother_name';
788                switch ($parts[2]) {
789                    case 'GIVN':
790                        switch ($modifiers[$field_name]) {
791                            case 'EXACT':
792                                $query->where($table . '.n_givn', '=', $field_value);
793                                break;
794                            case 'BEGINS':
795                                $query->where($table . '.n_givn', 'LIKE', $field_value . '%');
796                                break;
797                            case 'CONTAINS':
798                                $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
799                                break;
800                            case 'SDX_STD':
801                                $sdx = Soundex::russell($field_value);
802                                if ($sdx !== '') {
803                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
804                                } else {
805                                    // No phonetic content? Use a substring match
806                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
807                                }
808                                break;
809                            case 'SDX': // SDX uses DM by default.
810                            case 'SDX_DM':
811                                $sdx = Soundex::daitchMokotoff($field_value);
812                                if ($sdx !== '') {
813                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
814                                } else {
815                                    // No phonetic content? Use a substring match
816                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
817                                }
818                                break;
819                        }
820                        break;
821                    case 'SURN':
822                        switch ($modifiers[$field_name]) {
823                            case 'EXACT':
824                                $query->where($table . '.n_surn', '=', $field_value);
825                                break;
826                            case 'BEGINS':
827                                $query->where($table . '.n_surn', 'LIKE', $field_value . '%');
828                                break;
829                            case 'CONTAINS':
830                                $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
831                                break;
832                            case 'SDX_STD':
833                                $sdx = Soundex::russell($field_value);
834                                if ($sdx !== '') {
835                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
836                                } else {
837                                    // No phonetic content? Use a substring match
838                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
839                                }
840                                break;
841                            case 'SDX': // SDX uses DM by default.
842                            case 'SDX_DM':
843                                $sdx = Soundex::daitchMokotoff($field_value);
844                                if ($sdx !== '') {
845                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
846                                } else {
847                                    // No phonetic content? Use a substring match
848                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
849                                }
850                                break;
851                        }
852                        break;
853                }
854                unset($fields[$field_name]);
855            } elseif (str_starts_with($field_name, 'FAM:')) {
856                // e.g. searches for occupation, religion, note, etc.
857                // Initial matching only.  Need PHP to apply filter.
858                $query->where('spouse_families.f_gedcom', 'LIKE', "%\n1 " . $parts[1] . ' %' . $field_value . '%');
859            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':TYPE')) {
860                // Initial matching only.  Need PHP to apply filter.
861                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[1] . "%\n2 TYPE %" . $field_value . '%');
862            } elseif (str_starts_with($field_name, 'INDI:')) {
863                // e.g. searches for occupation, religion, note, etc.
864                // Initial matching only.  Need PHP to apply filter.
865                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[1] . '%' . $parts[2] . '%' . $field_value . '%');
866            }
867        }
868
869        return $query
870            ->get()
871            ->each($this->rowLimiter())
872            ->map($this->individualRowMapper())
873            ->filter(GedcomRecord::accessFilter())
874            ->filter(static function (Individual $individual) use ($fields): bool {
875                // Check for searches which were only partially matched by SQL
876                foreach ($fields as $field_name => $field_value) {
877                    $parts = explode(':', $field_name . '::::');
878
879                    if (str_starts_with($field_name, 'INDI:NAME:') && $field_name !== 'INDI:NAME:GIVN' && $field_name !== 'INDI:NAME:SURN') {
880                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[2] . ' .*' . preg_quote($field_value, '/') . '/i';
881
882                        if (preg_match($regex, $individual->gedcom()) === 1) {
883                            continue;
884                        }
885
886                        return false;
887                    }
888
889                    $regex = '/' . preg_quote($field_value, '/') . '/i';
890
891                    if (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
892                        foreach ($individual->facts([$parts[1]]) as $fact) {
893                            if (preg_match($regex, $fact->place()->gedcomName()) === 1) {
894                                continue 2;
895                            }
896                        }
897                        return false;
898                    }
899
900                    if (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
901                        foreach ($individual->spouseFamilies() as $family) {
902                            foreach ($family->facts([$parts[1]]) as $fact) {
903                                if (preg_match($regex, $fact->place()->gedcomName()) === 1) {
904                                    continue 3;
905                                }
906                            }
907                        }
908                        return false;
909                    }
910
911                    if ($field_name === 'INDI:FACT:TYPE' || $field_name === 'INDI:EVEN:TYPE' || $field_name === 'INDI:CHAN:_WT_USER') {
912                        foreach ($individual->facts([$parts[1]]) as $fact) {
913                            if (preg_match($regex, $fact->attribute($parts[2])) === 1) {
914                                continue 2;
915                            }
916                        }
917
918                        return false;
919                    }
920
921                    if (str_starts_with($field_name, 'INDI:')) {
922                        foreach ($individual->facts([$parts[1]]) as $fact) {
923                            if (preg_match($regex, $fact->value()) === 1) {
924                                continue 2;
925                            }
926                        }
927
928                        return false;
929                    }
930
931                    if (str_starts_with($field_name, 'FAM:')) {
932                        foreach ($individual->spouseFamilies() as $family) {
933                            foreach ($family->facts([$parts[1]]) as $fact) {
934                                if (preg_match($regex, $fact->value()) === 1) {
935                                    continue 3;
936                                }
937                            }
938                        }
939                        return false;
940                    }
941                }
942
943                return true;
944            });
945    }
946
947    /**
948     * @param string      $soundex
949     * @param string      $lastname
950     * @param string      $firstname
951     * @param string      $place
952     * @param array<Tree> $search_trees
953     *
954     * @return Collection<int,Individual>
955     */
956    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
957    {
958        switch ($soundex) {
959            default:
960            case 'Russell':
961                $givn_sdx   = Soundex::russell($firstname);
962                $surn_sdx   = Soundex::russell($lastname);
963                $plac_sdx   = Soundex::russell($place);
964                $givn_field = 'n_soundex_givn_std';
965                $surn_field = 'n_soundex_surn_std';
966                $plac_field = 'p_std_soundex';
967                break;
968            case 'DaitchM':
969                $givn_sdx   = Soundex::daitchMokotoff($firstname);
970                $surn_sdx   = Soundex::daitchMokotoff($lastname);
971                $plac_sdx   = Soundex::daitchMokotoff($place);
972                $givn_field = 'n_soundex_givn_dm';
973                $surn_field = 'n_soundex_surn_dm';
974                $plac_field = 'p_dm_soundex';
975                break;
976        }
977
978        // Nothing to search for? Return nothing.
979        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
980            return new Collection();
981        }
982
983        $query = DB::table('individuals')
984            ->select(['individuals.*'])
985            ->distinct();
986
987        $this->whereTrees($query, 'i_file', $search_trees);
988
989        if ($plac_sdx !== '') {
990            $query->join('placelinks', static function (JoinClause $join): void {
991                $join
992                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
993                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
994            });
995            $query->join('places', static function (JoinClause $join): void {
996                $join
997                    ->on('places.p_file', '=', 'placelinks.pl_file')
998                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
999            });
1000
1001            $this->wherePhonetic($query, $plac_field, $plac_sdx);
1002        }
1003
1004        if ($givn_sdx !== '' || $surn_sdx !== '') {
1005            $query->join('name', static function (JoinClause $join): void {
1006                $join
1007                    ->on('name.n_file', '=', 'individuals.i_file')
1008                    ->on('name.n_id', '=', 'individuals.i_id');
1009            });
1010
1011            $this->wherePhonetic($query, $givn_field, $givn_sdx);
1012            $this->wherePhonetic($query, $surn_field, $surn_sdx);
1013        }
1014
1015        return $query
1016            ->get()
1017            ->each($this->rowLimiter())
1018            ->map($this->individualRowMapper())
1019            ->filter(GedcomRecord::accessFilter());
1020    }
1021
1022    /**
1023     * Paginate a search query.
1024     *
1025     * @param Builder $query      Searches the database for the desired records.
1026     * @param Closure $row_mapper Converts a row from the query into a record.
1027     * @param Closure $row_filter
1028     * @param int     $offset     Skip this many rows.
1029     * @param int     $limit      Take this many rows.
1030     *
1031     * @return Collection<int,mixed>
1032     */
1033    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
1034    {
1035        $collection = new Collection();
1036
1037        foreach ($query->cursor() as $row) {
1038            $record = $row_mapper($row);
1039            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
1040            // where individuals have multiple names - and we need to sort results by name.
1041            if ($collection->containsStrict($record)) {
1042                continue;
1043            }
1044            // If the object has a method "canShow()", then use it to filter for privacy.
1045            if ($row_filter($record)) {
1046                if ($offset > 0) {
1047                    $offset--;
1048                } else {
1049                    if ($limit > 0) {
1050                        $collection->push($record);
1051                    }
1052
1053                    $limit--;
1054
1055                    if ($limit === 0) {
1056                        break;
1057                    }
1058                }
1059            }
1060        }
1061
1062
1063        return $collection;
1064    }
1065
1066    /**
1067     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
1068     *
1069     * @param Builder           $query
1070     * @param Expression|string $column
1071     * @param array<string>     $search_terms
1072     */
1073    private function whereSearch(Builder $query, Expression|string $column, array $search_terms): void
1074    {
1075        foreach ($search_terms as $search_term) {
1076            $query->where($column, 'LIKE', '%' . addcslashes($search_term, '\\%_') . '%');
1077        }
1078    }
1079
1080    /**
1081     * Apply soundex search filters to a SQL query column.
1082     *
1083     * @param Builder           $query
1084     * @param Expression|string $field
1085     * @param string            $soundex
1086     */
1087    private function wherePhonetic(Builder $query, $field, string $soundex): void
1088    {
1089        if ($soundex !== '') {
1090            $query->where(static function (Builder $query) use ($soundex, $field): void {
1091                foreach (explode(':', $soundex) as $sdx) {
1092                    $query->orWhere($field, 'LIKE', '%' . $sdx . '%');
1093                }
1094            });
1095        }
1096    }
1097
1098    /**
1099     * @param Builder     $query
1100     * @param string      $tree_id_field
1101     * @param array<Tree> $trees
1102     */
1103    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1104    {
1105        $tree_ids = array_map(static function (Tree $tree): int {
1106            return $tree->id();
1107        }, $trees);
1108
1109        $query->whereIn($tree_id_field, $tree_ids);
1110    }
1111
1112    /**
1113     * Find the media object that uses a particular media file.
1114     *
1115     * @param string $file
1116     *
1117     * @return array<Media>
1118     */
1119    public function findMediaObjectsForMediaFile(string $file): array
1120    {
1121        return DB::table('media')
1122            ->join('media_file', static function (JoinClause $join): void {
1123                $join
1124                    ->on('media_file.m_file', '=', 'media.m_file')
1125                    ->on('media_file.m_id', '=', 'media.m_id');
1126            })
1127            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1128            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1129            ->select(['media.*'])
1130            ->distinct()
1131            ->get()
1132            ->map($this->mediaRowMapper())
1133            ->all();
1134    }
1135
1136    /**
1137     * A closure to filter records by privacy-filtered GEDCOM data.
1138     *
1139     * @param array<string> $search_terms
1140     *
1141     * @return Closure(GedcomRecord):bool
1142     */
1143    private function rawGedcomFilter(array $search_terms): Closure
1144    {
1145        return static function (GedcomRecord $record) use ($search_terms): bool {
1146            // Ignore non-genealogy fields
1147            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1148
1149            // Ignore matches in links
1150            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1151
1152            // Re-apply the filtering
1153            foreach ($search_terms as $search_term) {
1154                if (mb_stripos($gedcom, $search_term) === false) {
1155                    return false;
1156                }
1157            }
1158
1159            return true;
1160        };
1161    }
1162
1163    /**
1164     * Searching for short or common text can give more results than the system can process.
1165     *
1166     * @param int $limit
1167     *
1168     * @return Closure():void
1169     */
1170    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1171    {
1172        return static function () use ($limit): void {
1173            static $n = 0;
1174
1175            if (++$n > $limit) {
1176                $message = I18N::translate('The search returned too many results.');
1177
1178                throw new HttpServiceUnavailableException($message);
1179            }
1180        };
1181    }
1182
1183    /**
1184     * Convert a row from any tree in the families table into a family object.
1185     *
1186     * @return Closure(object):Family
1187     */
1188    private function familyRowMapper(): Closure
1189    {
1190        return function (object $row): Family {
1191            $tree = $this->tree_service->find((int) $row->f_file);
1192
1193            return Registry::familyFactory()->mapper($tree)($row);
1194        };
1195    }
1196
1197    /**
1198     * Convert a row from any tree in the individuals table into an individual object.
1199     *
1200     * @return Closure(object):Individual
1201     */
1202    private function individualRowMapper(): Closure
1203    {
1204        return function (object $row): Individual {
1205            $tree = $this->tree_service->find((int) $row->i_file);
1206
1207            return Registry::individualFactory()->mapper($tree)($row);
1208        };
1209    }
1210
1211    /**
1212     * Convert a row from any tree in the media table into a location object.
1213     *
1214     * @return Closure(object):Location
1215     */
1216    private function locationRowMapper(): Closure
1217    {
1218        return function (object $row): Location {
1219            $tree = $this->tree_service->find((int) $row->o_file);
1220
1221            return Registry::locationFactory()->mapper($tree)($row);
1222        };
1223    }
1224
1225    /**
1226     * Convert a row from any tree in the media table into an media object.
1227     *
1228     * @return Closure(object):Media
1229     */
1230    private function mediaRowMapper(): Closure
1231    {
1232        return function (object $row): Media {
1233            $tree = $this->tree_service->find((int) $row->m_file);
1234
1235            return Registry::mediaFactory()->mapper($tree)($row);
1236        };
1237    }
1238
1239    /**
1240     * Convert a row from any tree in the other table into a note object.
1241     *
1242     * @return Closure:Note
1243     */
1244    private function noteRowMapper(): Closure
1245    {
1246        return function (object $row): Note {
1247            $tree = $this->tree_service->find((int) $row->o_file);
1248
1249            return Registry::noteFactory()->mapper($tree)($row);
1250        };
1251    }
1252
1253    /**
1254     * Convert a row from any tree in the other table into a repository object.
1255     *
1256     * @return Closure:Repository
1257     */
1258    private function repositoryRowMapper(): Closure
1259    {
1260        return function (object $row): Repository {
1261            $tree = $this->tree_service->find((int) $row->o_file);
1262
1263            return Registry::repositoryFactory()->mapper($tree)($row);
1264        };
1265    }
1266
1267    /**
1268     * Convert a row from any tree in the other table into a note object.
1269     *
1270     * @return Closure(object):SharedNote
1271     */
1272    private function sharedNoteRowMapper(): Closure
1273    {
1274        return function (object $row): Note {
1275            $tree = $this->tree_service->find((int) $row->o_file);
1276
1277            return Registry::sharedNoteFactory()->mapper($tree)($row);
1278        };
1279    }
1280
1281    /**
1282     * Convert a row from any tree in the sources table into a source object.
1283     *
1284     * @return Closure:Source
1285     */
1286    private function sourceRowMapper(): Closure
1287    {
1288        return function (object $row): Source {
1289            $tree = $this->tree_service->find((int) $row->s_file);
1290
1291            return Registry::sourceFactory()->mapper($tree)($row);
1292        };
1293    }
1294
1295    /**
1296     * Convert a row from any tree in the other table into a submission object.
1297     *
1298     * @return Closure(object):Submission
1299     */
1300    private function submissionRowMapper(): Closure
1301    {
1302        return function (object $row): Submission {
1303            $tree = $this->tree_service->find((int) $row->o_file);
1304
1305            return Registry::submissionFactory()->mapper($tree)($row);
1306        };
1307    }
1308
1309    /**
1310     * Convert a row from any tree in the other table into a submitter object.
1311     *
1312     * @return Closure(object):Submitter
1313     */
1314    private function submitterRowMapper(): Closure
1315    {
1316        return function (object $row): Submitter {
1317            $tree = $this->tree_service->find((int) $row->o_file);
1318
1319            return Registry::submitterFactory()->mapper($tree)($row);
1320        };
1321    }
1322}
1323