xref: /webtrees/app/Services/SearchService.php (revision 8e1afc648389017cb4e48e677289723c2833c768)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Family;
25use Fisharebest\Webtrees\Gedcom;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Http\Exceptions\HttpServiceUnavailableException;
28use Fisharebest\Webtrees\I18N;
29use Fisharebest\Webtrees\Individual;
30use Fisharebest\Webtrees\Location;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Place;
34use Fisharebest\Webtrees\Registry;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\SharedNote;
37use Fisharebest\Webtrees\Soundex;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submission;
40use Fisharebest\Webtrees\Submitter;
41use Fisharebest\Webtrees\Tree;
42use Illuminate\Database\Capsule\Manager as DB;
43use Illuminate\Database\Query\Builder;
44use Illuminate\Database\Query\Expression;
45use Illuminate\Database\Query\JoinClause;
46use Illuminate\Support\Collection;
47
48use function addcslashes;
49use function array_filter;
50use function array_map;
51use function array_unique;
52use function explode;
53use function implode;
54use function is_string;
55use function mb_stripos;
56use function preg_match;
57use function preg_quote;
58use function preg_replace;
59use function str_ends_with;
60use function str_starts_with;
61
62use const PHP_INT_MAX;
63
64/**
65 * Search trees for genealogy records.
66 */
67class SearchService
68{
69    // Do not attempt to show search results larger than this/
70    protected const MAX_SEARCH_RESULTS = 5000;
71
72    private TreeService $tree_service;
73
74    /**
75     * SearchService constructor.
76     *
77     * @param TreeService $tree_service
78     */
79    public function __construct(
80        TreeService $tree_service
81    ) {
82        $this->tree_service = $tree_service;
83    }
84
85    /**
86     * @param array<Tree>   $trees
87     * @param array<string> $search
88     *
89     * @return Collection<int,Family>
90     */
91    public function searchFamilies(array $trees, array $search): Collection
92    {
93        $query = DB::table('families');
94
95        $this->whereTrees($query, 'f_file', $trees);
96        $this->whereSearch($query, 'f_gedcom', $search);
97
98        return $query
99            ->get()
100            ->each($this->rowLimiter())
101            ->map($this->familyRowMapper())
102            ->filter(GedcomRecord::accessFilter())
103            ->filter($this->rawGedcomFilter($search));
104    }
105
106    /**
107     * Search for families by name.
108     *
109     * @param array<Tree>   $trees
110     * @param array<string> $search
111     * @param int           $offset
112     * @param int           $limit
113     *
114     * @return Collection<int,Family>
115     */
116    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
117    {
118        $query = DB::table('families')
119            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
120                $join
121                    ->on('husb_name.n_file', '=', 'families.f_file')
122                    ->on('husb_name.n_id', '=', 'families.f_husb')
123                    ->where('husb_name.n_type', '<>', '_MARNM');
124            })
125            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
126                $join
127                    ->on('wife_name.n_file', '=', 'families.f_file')
128                    ->on('wife_name.n_id', '=', 'families.f_wife')
129                    ->where('wife_name.n_type', '<>', '_MARNM');
130            });
131
132        $prefix = DB::connection()->getTablePrefix();
133        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
134
135        $this->whereTrees($query, 'f_file', $trees);
136        $this->whereSearch($query, $field, $search);
137
138        $query
139            ->orderBy('husb_name.n_sort')
140            ->orderBy('wife_name.n_sort')
141            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
142
143        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
144    }
145
146    /**
147     * @param Place $place
148     *
149     * @return Collection<int,Family>
150     */
151    public function searchFamiliesInPlace(Place $place): Collection
152    {
153        return DB::table('families')
154            ->join('placelinks', static function (JoinClause $query) {
155                $query
156                    ->on('families.f_file', '=', 'placelinks.pl_file')
157                    ->on('families.f_id', '=', 'placelinks.pl_gid');
158            })
159            ->where('f_file', '=', $place->tree()->id())
160            ->where('pl_p_id', '=', $place->id())
161            ->select(['families.*'])
162            ->get()
163            ->each($this->rowLimiter())
164            ->map($this->familyRowMapper())
165            ->filter(GedcomRecord::accessFilter());
166    }
167
168    /**
169     * @param array<Tree>   $trees
170     * @param array<string> $search
171     *
172     * @return Collection<int,Individual>
173     */
174    public function searchIndividuals(array $trees, array $search): Collection
175    {
176        $query = DB::table('individuals');
177
178        $this->whereTrees($query, 'i_file', $trees);
179        $this->whereSearch($query, 'i_gedcom', $search);
180
181        return $query
182            ->get()
183            ->each($this->rowLimiter())
184            ->map($this->individualRowMapper())
185            ->filter(GedcomRecord::accessFilter())
186            ->filter($this->rawGedcomFilter($search));
187    }
188
189    /**
190     * Search for individuals by name.
191     *
192     * @param array<Tree>   $trees
193     * @param array<string> $search
194     * @param int           $offset
195     * @param int           $limit
196     *
197     * @return Collection<int,Individual>
198     */
199    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
200    {
201        $query = DB::table('individuals')
202            ->join('name', static function (JoinClause $join): void {
203                $join
204                    ->on('name.n_file', '=', 'individuals.i_file')
205                    ->on('name.n_id', '=', 'individuals.i_id');
206            })
207            ->orderBy('n_sort')
208            ->select(['individuals.*', 'n_sort']);
209
210        $this->whereTrees($query, 'i_file', $trees);
211        $this->whereSearch($query, 'n_full', $search);
212
213        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
214    }
215
216    /**
217     * @param Place $place
218     *
219     * @return Collection<int,Individual>
220     */
221    public function searchIndividualsInPlace(Place $place): Collection
222    {
223        return DB::table('individuals')
224            ->join('placelinks', static function (JoinClause $join) {
225                $join
226                    ->on('i_file', '=', 'pl_file')
227                    ->on('i_id', '=', 'pl_gid');
228            })
229            ->where('i_file', '=', $place->tree()->id())
230            ->where('pl_p_id', '=', $place->id())
231            ->select(['individuals.*'])
232            ->get()
233            ->each($this->rowLimiter())
234            ->map($this->individualRowMapper())
235            ->filter(GedcomRecord::accessFilter());
236    }
237
238    /**
239     * Search for submissions.
240     *
241     * @param array<Tree>   $trees
242     * @param array<string> $search
243     * @param int           $offset
244     * @param int           $limit
245     *
246     * @return Collection<int,Location>
247     */
248    public function searchLocations(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
249    {
250        $query = DB::table('other')
251            ->where('o_type', '=', Location::RECORD_TYPE);
252
253        $this->whereTrees($query, 'o_file', $trees);
254        $this->whereSearch($query, 'o_gedcom', $search);
255
256        return $this->paginateQuery($query, $this->locationRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
257    }
258
259    /**
260     * Search for media objects.
261     *
262     * @param array<Tree>   $trees
263     * @param array<string> $search
264     * @param int           $offset
265     * @param int           $limit
266     *
267     * @return Collection<int,Media>
268     */
269    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
270    {
271        $query = DB::table('media');
272
273        $this->whereTrees($query, 'media.m_file', $trees);
274        $this->whereSearch($query, 'm_gedcom', $search);
275
276        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
277    }
278
279    /**
280     * Search for notes.
281     *
282     * @param array<Tree>   $trees
283     * @param array<string> $search
284     * @param int           $offset
285     * @param int           $limit
286     *
287     * @return Collection<int,Note>
288     */
289    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
290    {
291        $query = DB::table('other')
292            ->where('o_type', '=', Note::RECORD_TYPE);
293
294        $this->whereTrees($query, 'o_file', $trees);
295        $this->whereSearch($query, 'o_gedcom', $search);
296
297        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
298    }
299
300    /**
301     * Search for notes.
302     *
303     * @param array<Tree>   $trees
304     * @param array<string> $search
305     * @param int           $offset
306     * @param int           $limit
307     *
308     * @return Collection<int,SharedNote>
309     */
310    public function searchSharedNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
311    {
312        $query = DB::table('other')
313            ->where('o_type', '=', SharedNote::RECORD_TYPE);
314
315        $this->whereTrees($query, 'o_file', $trees);
316        $this->whereSearch($query, 'o_gedcom', $search);
317
318        return $this->paginateQuery($query, $this->sharedNoteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
319    }
320
321    /**
322     * Search for repositories.
323     *
324     * @param array<Tree>   $trees
325     * @param array<string> $search
326     * @param int           $offset
327     * @param int           $limit
328     *
329     * @return Collection<int,Repository>
330     */
331    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
332    {
333        $query = DB::table('other')
334            ->where('o_type', '=', Repository::RECORD_TYPE);
335
336        $this->whereTrees($query, 'o_file', $trees);
337        $this->whereSearch($query, 'o_gedcom', $search);
338
339        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
340    }
341
342    /**
343     * Search for sources.
344     *
345     * @param array<Tree>   $trees
346     * @param array<string> $search
347     * @param int      $offset
348     * @param int      $limit
349     *
350     * @return Collection<int,Source>
351     */
352    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
353    {
354        $query = DB::table('sources');
355
356        $this->whereTrees($query, 's_file', $trees);
357        $this->whereSearch($query, 's_gedcom', $search);
358
359        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
360    }
361
362    /**
363     * Search for sources by name.
364     *
365     * @param array<Tree>   $trees
366     * @param array<string> $search
367     * @param int           $offset
368     * @param int           $limit
369     *
370     * @return Collection<int,Source>
371     */
372    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
373    {
374        $query = DB::table('sources')
375            ->orderBy('s_name');
376
377        $this->whereTrees($query, 's_file', $trees);
378        $this->whereSearch($query, 's_name', $search);
379
380        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
381    }
382
383    /**
384     * Search for sources.
385     *
386     * @param array<Tree>   $trees
387     * @param array<string> $search
388     * @param int           $offset
389     * @param int           $limit
390     *
391     * @return Collection<int,string>
392     */
393    public function searchSurnames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
394    {
395        $query = DB::table('name');
396
397        $this->whereTrees($query, 'n_file', $trees);
398        $this->whereSearch($query, 'n_surname', $search);
399
400        return $query
401            ->groupBy(['n_surname'])
402            ->orderBy('n_surname')
403            ->skip($offset)
404            ->take($limit)
405            ->pluck('n_surname');
406    }
407
408    /**
409     * Search for submissions.
410     *
411     * @param array<Tree>   $trees
412     * @param array<string> $search
413     * @param int           $offset
414     * @param int           $limit
415     *
416     * @return Collection<int,Submission>
417     */
418    public function searchSubmissions(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
419    {
420        $query = DB::table('other')
421            ->where('o_type', '=', Submission::RECORD_TYPE);
422
423        $this->whereTrees($query, 'o_file', $trees);
424        $this->whereSearch($query, 'o_gedcom', $search);
425
426        return $this->paginateQuery($query, $this->submissionRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
427    }
428
429    /**
430     * Search for submitters.
431     *
432     * @param array<Tree>   $trees
433     * @param array<string> $search
434     * @param int           $offset
435     * @param int           $limit
436     *
437     * @return Collection<int,Submitter>
438     */
439    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
440    {
441        $query = DB::table('other')
442            ->where('o_type', '=', Submitter::RECORD_TYPE);
443
444        $this->whereTrees($query, 'o_file', $trees);
445        $this->whereSearch($query, 'o_gedcom', $search);
446
447        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
448    }
449
450    /**
451     * Search for places.
452     *
453     * @param Tree   $tree
454     * @param string $search
455     * @param int    $offset
456     * @param int    $limit
457     *
458     * @return Collection<int,Place>
459     */
460    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
461    {
462        $query = DB::table('places AS p0')
463            ->where('p0.p_file', '=', $tree->id())
464            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
465            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
466            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
467            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
468            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
469            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
470            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
471            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
472            ->orderBy('p0.p_place')
473            ->orderBy('p1.p_place')
474            ->orderBy('p2.p_place')
475            ->orderBy('p3.p_place')
476            ->orderBy('p4.p_place')
477            ->orderBy('p5.p_place')
478            ->orderBy('p6.p_place')
479            ->orderBy('p7.p_place')
480            ->orderBy('p8.p_place')
481            ->select([
482                'p0.p_place AS place0',
483                'p1.p_place AS place1',
484                'p2.p_place AS place2',
485                'p3.p_place AS place3',
486                'p4.p_place AS place4',
487                'p5.p_place AS place5',
488                'p6.p_place AS place6',
489                'p7.p_place AS place7',
490                'p8.p_place AS place8',
491            ]);
492
493        // Filter each level of the hierarchy.
494        foreach (explode(',', $search, 9) as $level => $string) {
495            $query->where('p' . $level . '.p_place', 'LIKE', '%' . addcslashes($string, '\\%_') . '%');
496        }
497
498        $row_mapper = static function (object $row) use ($tree): Place {
499            $place = implode(', ', array_filter((array) $row));
500
501            return new Place($place, $tree);
502        };
503
504        $filter = static function (): bool {
505            return true;
506        };
507
508        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
509    }
510
511    /**
512     * @param array<Tree>          $trees
513     * @param array<string,string> $fields
514     * @param array<string,string> $modifiers
515     *
516     * @return Collection<int,Individual>
517     */
518    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
519    {
520        $fields = array_filter($fields, static fn (string $x): bool => $x !== '');
521
522        $query = DB::table('individuals')
523            ->select(['individuals.*'])
524            ->distinct();
525
526        $this->whereTrees($query, 'i_file', $trees);
527
528        // Join the following tables
529        $father_name   = false;
530        $mother_name   = false;
531        $spouse_family = false;
532        $indi_name     = false;
533        $indi_dates    = [];
534        $fam_dates     = [];
535        $indi_plac     = false;
536        $fam_plac      = false;
537
538        foreach ($fields as $field_name => $field_value) {
539            if (str_starts_with($field_name, 'FATHER:NAME')) {
540                $father_name = true;
541            } elseif (str_starts_with($field_name, 'MOTHER:NAME')) {
542                $mother_name = true;
543            } elseif (str_starts_with($field_name, 'INDI:NAME:GIVN')) {
544                $indi_name = true;
545            } elseif (str_starts_with($field_name, 'INDI:NAME:SURN')) {
546                $indi_name = true;
547            } elseif (str_starts_with($field_name, 'FAM:')) {
548                $spouse_family = true;
549                if (str_ends_with($field_name, ':DATE')) {
550                    $fam_dates[] = explode(':', $field_name)[1];
551                } elseif (str_ends_with($field_name, ':PLAC')) {
552                    $fam_plac = true;
553                }
554            } elseif (str_starts_with($field_name, 'INDI:')) {
555                if (str_ends_with($field_name, ':DATE')) {
556                    $indi_dates[] = explode(':', $field_name)[1];
557                } elseif (str_ends_with($field_name, ':PLAC')) {
558                    $indi_plac = true;
559                }
560            }
561        }
562
563        if ($father_name || $mother_name) {
564            $query->join('link AS l1', static function (JoinClause $join): void {
565                $join
566                    ->on('l1.l_file', '=', 'individuals.i_file')
567                    ->on('l1.l_from', '=', 'individuals.i_id')
568                    ->where('l1.l_type', '=', 'FAMC');
569            });
570
571            if ($father_name) {
572                $query->join('link AS l2', static function (JoinClause $join): void {
573                    $join
574                        ->on('l2.l_file', '=', 'l1.l_file')
575                        ->on('l2.l_from', '=', 'l1.l_to')
576                        ->where('l2.l_type', '=', 'HUSB');
577                });
578                $query->join('name AS father_name', static function (JoinClause $join): void {
579                    $join
580                        ->on('father_name.n_file', '=', 'l2.l_file')
581                        ->on('father_name.n_id', '=', 'l2.l_to');
582                });
583            }
584
585            if ($mother_name) {
586                $query->join('link AS l3', static function (JoinClause $join): void {
587                    $join
588                        ->on('l3.l_file', '=', 'l1.l_file')
589                        ->on('l3.l_from', '=', 'l1.l_to')
590                        ->where('l3.l_type', '=', 'WIFE');
591                });
592                $query->join('name AS mother_name', static function (JoinClause $join): void {
593                    $join
594                        ->on('mother_name.n_file', '=', 'l3.l_file')
595                        ->on('mother_name.n_id', '=', 'l3.l_to');
596                });
597            }
598        }
599
600        if ($spouse_family) {
601            $query->join('link AS l4', static function (JoinClause $join): void {
602                $join
603                    ->on('l4.l_file', '=', 'individuals.i_file')
604                    ->on('l4.l_from', '=', 'individuals.i_id')
605                    ->where('l4.l_type', '=', 'FAMS');
606            });
607            $query->join('families AS spouse_families', static function (JoinClause $join): void {
608                $join
609                    ->on('spouse_families.f_file', '=', 'l4.l_file')
610                    ->on('spouse_families.f_id', '=', 'l4.l_to');
611            });
612        }
613
614        if ($indi_name) {
615            $query->join('name AS individual_name', static function (JoinClause $join): void {
616                $join
617                    ->on('individual_name.n_file', '=', 'individuals.i_file')
618                    ->on('individual_name.n_id', '=', 'individuals.i_id');
619            });
620        }
621
622        foreach (array_unique($indi_dates) as $indi_date) {
623            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
624                $join
625                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
626                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
627            });
628        }
629
630        foreach (array_unique($fam_dates) as $fam_date) {
631            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
632                $join
633                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
634                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
635            });
636        }
637
638        if ($indi_plac) {
639            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
640                $join
641                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
642                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
643            });
644            $query->join('places AS individual_places', static function (JoinClause $join): void {
645                $join
646                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
647                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
648            });
649        }
650
651        if ($fam_plac) {
652            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
653                $join
654                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
655                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
656            });
657            $query->join('places AS family_places', static function (JoinClause $join): void {
658                $join
659                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
660                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
661            });
662        }
663
664        foreach ($fields as $field_name => $field_value) {
665            $parts = explode(':', $field_name . ':::');
666            if (str_starts_with($field_name, 'INDI:NAME:')) {
667                switch ($field_name) {
668                    case 'INDI:NAME:GIVN':
669                        switch ($modifiers[$field_name]) {
670                            case 'EXACT':
671                                $query->where('individual_name.n_givn', '=', $field_value);
672                                break;
673                            case 'BEGINS':
674                                $query->where('individual_name.n_givn', 'LIKE', $field_value . '%');
675                                break;
676                            case 'CONTAINS':
677                                $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
678                                break;
679                            case 'SDX_STD':
680                                $sdx = Soundex::russell($field_value);
681                                if ($sdx !== '') {
682                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
683                                } else {
684                                    // No phonetic content? Use a substring match
685                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
686                                }
687                                break;
688                            case 'SDX': // SDX uses DM by default.
689                            case 'SDX_DM':
690                                $sdx = Soundex::daitchMokotoff($field_value);
691                                if ($sdx !== '') {
692                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
693                                } else {
694                                    // No phonetic content? Use a substring match
695                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
696                                }
697                                break;
698                        }
699                        unset($fields[$field_name]);
700                        break;
701                    case 'INDI:NAME:SURN':
702                        switch ($modifiers[$field_name]) {
703                            case 'EXACT':
704                                $query->where(function (Builder $query) use ($field_value): void {
705                                    $query
706                                        ->where('individual_name.n_surn', '=', $field_value)
707                                        ->orWhere('individual_name.n_surname', '=', $field_value);
708                                });
709                                break;
710                            case 'BEGINS':
711                                $query->where(function (Builder $query) use ($field_value): void {
712                                    $query
713                                        ->where('individual_name.n_surn', 'LIKE', $field_value . '%')
714                                        ->orWhere('individual_name.n_surname', 'LIKE', $field_value . '%');
715                                });
716                                break;
717                            case 'CONTAINS':
718                                $query->where(function (Builder $query) use ($field_value): void {
719                                    $query
720                                        ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
721                                        ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
722                                });
723                                break;
724                            case 'SDX_STD':
725                                $sdx = Soundex::russell($field_value);
726                                if ($sdx !== '') {
727                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
728                                } else {
729                                    // No phonetic content? Use a substring match
730                                    $query->where(function (Builder $query) use ($field_value): void {
731                                        $query
732                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
733                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
734                                    });
735                                }
736                                break;
737                            case 'SDX': // SDX uses DM by default.
738                            case 'SDX_DM':
739                                $sdx = Soundex::daitchMokotoff($field_value);
740                                if ($sdx !== '') {
741                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
742                                } else {
743                                    // No phonetic content? Use a substring match
744                                    $query->where(function (Builder $query) use ($field_value): void {
745                                        $query
746                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
747                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
748                                    });
749                                }
750                                break;
751                        }
752                        unset($fields[$field_name]);
753                        break;
754                    case 'INDI:NAME:NICK':
755                    case 'INDI:NAME:_MARNM':
756                    case 'INDI:NAME:_HEB':
757                    case 'INDI:NAME:_AKA':
758                        $like = "%\n1 NAME%\n2 " . $parts[2] . ' %' . preg_quote($field_value, '/') . '%';
759                        $query->where('individuals.i_gedcom', 'LIKE', $like);
760                        break;
761                }
762            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':DATE')) {
763                $date = new Date($field_value);
764                if ($date->isOK()) {
765                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
766                    $query
767                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
768                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
769                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
770                }
771                unset($fields[$field_name]);
772            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':DATE')) {
773                $date = new Date($field_value);
774                if ($date->isOK()) {
775                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
776                    $query
777                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
778                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
779                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
780                }
781                unset($fields[$field_name]);
782            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
783                // SQL can only link a place to a person/family, not to an event.
784                $query->where('individual_places.p_place', 'LIKE', '%' . $field_value . '%');
785            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
786                // SQL can only link a place to a person/family, not to an event.
787                $query->where('family_places.p_place', 'LIKE', '%' . $field_value . '%');
788            } elseif (str_starts_with($field_name, 'MOTHER:NAME:') || str_starts_with($field_name, 'FATHER:NAME:')) {
789                $table = str_starts_with($field_name, 'FATHER:NAME:') ? 'father_name' : 'mother_name';
790                switch ($parts[2]) {
791                    case 'GIVN':
792                        switch ($modifiers[$field_name]) {
793                            case 'EXACT':
794                                $query->where($table . '.n_givn', '=', $field_value);
795                                break;
796                            case 'BEGINS':
797                                $query->where($table . '.n_givn', 'LIKE', $field_value . '%');
798                                break;
799                            case 'CONTAINS':
800                                $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
801                                break;
802                            case 'SDX_STD':
803                                $sdx = Soundex::russell($field_value);
804                                if ($sdx !== '') {
805                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
806                                } else {
807                                    // No phonetic content? Use a substring match
808                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
809                                }
810                                break;
811                            case 'SDX': // SDX uses DM by default.
812                            case 'SDX_DM':
813                                $sdx = Soundex::daitchMokotoff($field_value);
814                                if ($sdx !== '') {
815                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
816                                } else {
817                                    // No phonetic content? Use a substring match
818                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
819                                }
820                                break;
821                        }
822                        break;
823                    case 'SURN':
824                        switch ($modifiers[$field_name]) {
825                            case 'EXACT':
826                                $query->where($table . '.n_surn', '=', $field_value);
827                                break;
828                            case 'BEGINS':
829                                $query->where($table . '.n_surn', 'LIKE', $field_value . '%');
830                                break;
831                            case 'CONTAINS':
832                                $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
833                                break;
834                            case 'SDX_STD':
835                                $sdx = Soundex::russell($field_value);
836                                if ($sdx !== '') {
837                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
838                                } else {
839                                    // No phonetic content? Use a substring match
840                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
841                                }
842                                break;
843                            case 'SDX': // SDX uses DM by default.
844                            case 'SDX_DM':
845                                $sdx = Soundex::daitchMokotoff($field_value);
846                                if ($sdx !== '') {
847                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
848                                } else {
849                                    // No phonetic content? Use a substring match
850                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
851                                }
852                                break;
853                        }
854                        break;
855                }
856                unset($fields[$field_name]);
857            } elseif (str_starts_with($field_name, 'FAM:')) {
858                // e.g. searches for occupation, religion, note, etc.
859                // Initial matching only.  Need PHP to apply filter.
860                $query->where('spouse_families.f_gedcom', 'LIKE', "%\n1 " . $parts[1] . ' %' . $field_value . '%');
861            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':TYPE')) {
862                // Initial matching only.  Need PHP to apply filter.
863                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[1] . "%\n2 TYPE %" . $field_value . '%');
864            } elseif (str_starts_with($field_name, 'INDI:')) {
865                // e.g. searches for occupation, religion, note, etc.
866                // Initial matching only.  Need PHP to apply filter.
867                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[1] . '%' . $parts[2] . '%' . $field_value . '%');
868            }
869        }
870
871        return $query
872            ->get()
873            ->each($this->rowLimiter())
874            ->map($this->individualRowMapper())
875            ->filter(GedcomRecord::accessFilter())
876            ->filter(static function (Individual $individual) use ($fields): bool {
877                // Check for searches which were only partially matched by SQL
878                foreach ($fields as $field_name => $field_value) {
879                    $parts = explode(':', $field_name . '::::');
880
881                    if (str_starts_with($field_name, 'INDI:NAME:') && $field_name !== 'INDI:NAME:GIVN' && $field_name !== 'INDI:NAME:SURN') {
882                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[2] . ' .*' . preg_quote($field_value, '/') . '/i';
883
884                        if (preg_match($regex, $individual->gedcom()) === 1) {
885                            continue;
886                        }
887
888                        return false;
889                    }
890
891                    $regex = '/' . preg_quote($field_value, '/') . '/i';
892
893                    if (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
894                        foreach ($individual->facts([$parts[1]]) as $fact) {
895                            if (preg_match($regex, $fact->place()->gedcomName()) === 1) {
896                                continue 2;
897                            }
898                        }
899                        return false;
900                    }
901
902                    if (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
903                        foreach ($individual->spouseFamilies() as $family) {
904                            foreach ($family->facts([$parts[1]]) as $fact) {
905                                if (preg_match($regex, $fact->place()->gedcomName()) === 1) {
906                                    continue 3;
907                                }
908                            }
909                        }
910                        return false;
911                    }
912
913                    if ($field_name === 'INDI:FACT:TYPE' || $field_name === 'INDI:EVEN:TYPE' || $field_name === 'INDI:CHAN:_WT_USER') {
914                        foreach ($individual->facts([$parts[1]]) as $fact) {
915                            if (preg_match($regex, $fact->attribute($parts[2])) === 1) {
916                                continue 2;
917                            }
918                        }
919
920                        return false;
921                    }
922
923                    if (str_starts_with($field_name, 'INDI:')) {
924                        foreach ($individual->facts([$parts[1]]) as $fact) {
925                            if (preg_match($regex, $fact->value()) === 1) {
926                                continue 2;
927                            }
928                        }
929
930                        return false;
931                    }
932
933                    if (str_starts_with($field_name, 'FAM:')) {
934                        foreach ($individual->spouseFamilies() as $family) {
935                            foreach ($family->facts([$parts[1]]) as $fact) {
936                                if (preg_match($regex, $fact->value()) === 1) {
937                                    continue 3;
938                                }
939                            }
940                        }
941                        return false;
942                    }
943                }
944
945                return true;
946            });
947    }
948
949    /**
950     * @param string      $soundex
951     * @param string      $lastname
952     * @param string      $firstname
953     * @param string      $place
954     * @param array<Tree> $search_trees
955     *
956     * @return Collection<int,Individual>
957     */
958    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
959    {
960        switch ($soundex) {
961            default:
962            case 'Russell':
963                $givn_sdx   = Soundex::russell($firstname);
964                $surn_sdx   = Soundex::russell($lastname);
965                $plac_sdx   = Soundex::russell($place);
966                $givn_field = 'n_soundex_givn_std';
967                $surn_field = 'n_soundex_surn_std';
968                $plac_field = 'p_std_soundex';
969                break;
970            case 'DaitchM':
971                $givn_sdx   = Soundex::daitchMokotoff($firstname);
972                $surn_sdx   = Soundex::daitchMokotoff($lastname);
973                $plac_sdx   = Soundex::daitchMokotoff($place);
974                $givn_field = 'n_soundex_givn_dm';
975                $surn_field = 'n_soundex_surn_dm';
976                $plac_field = 'p_dm_soundex';
977                break;
978        }
979
980        // Nothing to search for? Return nothing.
981        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
982            return new Collection();
983        }
984
985        $query = DB::table('individuals')
986            ->select(['individuals.*'])
987            ->distinct();
988
989        $this->whereTrees($query, 'i_file', $search_trees);
990
991        if ($plac_sdx !== '') {
992            $query->join('placelinks', static function (JoinClause $join): void {
993                $join
994                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
995                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
996            });
997            $query->join('places', static function (JoinClause $join): void {
998                $join
999                    ->on('places.p_file', '=', 'placelinks.pl_file')
1000                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
1001            });
1002
1003            $this->wherePhonetic($query, $plac_field, $plac_sdx);
1004        }
1005
1006        if ($givn_sdx !== '' || $surn_sdx !== '') {
1007            $query->join('name', static function (JoinClause $join): void {
1008                $join
1009                    ->on('name.n_file', '=', 'individuals.i_file')
1010                    ->on('name.n_id', '=', 'individuals.i_id');
1011            });
1012
1013            $this->wherePhonetic($query, $givn_field, $givn_sdx);
1014            $this->wherePhonetic($query, $surn_field, $surn_sdx);
1015        }
1016
1017        return $query
1018            ->get()
1019            ->each($this->rowLimiter())
1020            ->map($this->individualRowMapper())
1021            ->filter(GedcomRecord::accessFilter());
1022    }
1023
1024    /**
1025     * Paginate a search query.
1026     *
1027     * @param Builder $query      Searches the database for the desired records.
1028     * @param Closure $row_mapper Converts a row from the query into a record.
1029     * @param Closure $row_filter
1030     * @param int     $offset     Skip this many rows.
1031     * @param int     $limit      Take this many rows.
1032     *
1033     * @return Collection<int,mixed>
1034     */
1035    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
1036    {
1037        $collection = new Collection();
1038
1039        foreach ($query->cursor() as $row) {
1040            $record = $row_mapper($row);
1041            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
1042            // where individuals have multiple names - and we need to sort results by name.
1043            if ($collection->containsStrict($record)) {
1044                continue;
1045            }
1046            // If the object has a method "canShow()", then use it to filter for privacy.
1047            if ($row_filter($record)) {
1048                if ($offset > 0) {
1049                    $offset--;
1050                } else {
1051                    if ($limit > 0) {
1052                        $collection->push($record);
1053                    }
1054
1055                    $limit--;
1056
1057                    if ($limit === 0) {
1058                        break;
1059                    }
1060                }
1061            }
1062        }
1063
1064
1065        return $collection;
1066    }
1067
1068    /**
1069     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
1070     *
1071     * @param Builder           $query
1072     * @param Expression|string $column
1073     * @param array<string>     $search_terms
1074     */
1075    private function whereSearch(Builder $query, Expression|string $column, array $search_terms): void
1076    {
1077        foreach ($search_terms as $search_term) {
1078            $query->where($column, 'LIKE', '%' . addcslashes($search_term, '\\%_') . '%');
1079        }
1080    }
1081
1082    /**
1083     * Apply soundex search filters to a SQL query column.
1084     *
1085     * @param Builder           $query
1086     * @param Expression|string $field
1087     * @param string            $soundex
1088     */
1089    private function wherePhonetic(Builder $query, $field, string $soundex): void
1090    {
1091        if ($soundex !== '') {
1092            $query->where(static function (Builder $query) use ($soundex, $field): void {
1093                foreach (explode(':', $soundex) as $sdx) {
1094                    $query->orWhere($field, 'LIKE', '%' . $sdx . '%');
1095                }
1096            });
1097        }
1098    }
1099
1100    /**
1101     * @param Builder     $query
1102     * @param string      $tree_id_field
1103     * @param array<Tree> $trees
1104     */
1105    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1106    {
1107        $tree_ids = array_map(static function (Tree $tree): int {
1108            return $tree->id();
1109        }, $trees);
1110
1111        $query->whereIn($tree_id_field, $tree_ids);
1112    }
1113
1114    /**
1115     * Find the media object that uses a particular media file.
1116     *
1117     * @param string $file
1118     *
1119     * @return array<Media>
1120     */
1121    public function findMediaObjectsForMediaFile(string $file): array
1122    {
1123        return DB::table('media')
1124            ->join('media_file', static function (JoinClause $join): void {
1125                $join
1126                    ->on('media_file.m_file', '=', 'media.m_file')
1127                    ->on('media_file.m_id', '=', 'media.m_id');
1128            })
1129            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1130            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1131            ->select(['media.*'])
1132            ->distinct()
1133            ->get()
1134            ->map($this->mediaRowMapper())
1135            ->all();
1136    }
1137
1138    /**
1139     * A closure to filter records by privacy-filtered GEDCOM data.
1140     *
1141     * @param array<string> $search_terms
1142     *
1143     * @return Closure(GedcomRecord):bool
1144     */
1145    private function rawGedcomFilter(array $search_terms): Closure
1146    {
1147        return static function (GedcomRecord $record) use ($search_terms): bool {
1148            // Ignore non-genealogy fields
1149            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1150
1151            // Ignore matches in links
1152            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1153
1154            // Re-apply the filtering
1155            foreach ($search_terms as $search_term) {
1156                if (mb_stripos($gedcom, $search_term) === false) {
1157                    return false;
1158                }
1159            }
1160
1161            return true;
1162        };
1163    }
1164
1165    /**
1166     * Searching for short or common text can give more results than the system can process.
1167     *
1168     * @param int $limit
1169     *
1170     * @return Closure():void
1171     */
1172    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1173    {
1174        return static function () use ($limit): void {
1175            static $n = 0;
1176
1177            if (++$n > $limit) {
1178                $message = I18N::translate('The search returned too many results.');
1179
1180                throw new HttpServiceUnavailableException($message);
1181            }
1182        };
1183    }
1184
1185    /**
1186     * Convert a row from any tree in the families table into a family object.
1187     *
1188     * @return Closure(object):Family
1189     */
1190    private function familyRowMapper(): Closure
1191    {
1192        return function (object $row): Family {
1193            $tree = $this->tree_service->find((int) $row->f_file);
1194
1195            return Registry::familyFactory()->mapper($tree)($row);
1196        };
1197    }
1198
1199    /**
1200     * Convert a row from any tree in the individuals table into an individual object.
1201     *
1202     * @return Closure(object):Individual
1203     */
1204    private function individualRowMapper(): Closure
1205    {
1206        return function (object $row): Individual {
1207            $tree = $this->tree_service->find((int) $row->i_file);
1208
1209            return Registry::individualFactory()->mapper($tree)($row);
1210        };
1211    }
1212
1213    /**
1214     * Convert a row from any tree in the media table into a location object.
1215     *
1216     * @return Closure(object):Location
1217     */
1218    private function locationRowMapper(): Closure
1219    {
1220        return function (object $row): Location {
1221            $tree = $this->tree_service->find((int) $row->o_file);
1222
1223            return Registry::locationFactory()->mapper($tree)($row);
1224        };
1225    }
1226
1227    /**
1228     * Convert a row from any tree in the media table into an media object.
1229     *
1230     * @return Closure(object):Media
1231     */
1232    private function mediaRowMapper(): Closure
1233    {
1234        return function (object $row): Media {
1235            $tree = $this->tree_service->find((int) $row->m_file);
1236
1237            return Registry::mediaFactory()->mapper($tree)($row);
1238        };
1239    }
1240
1241    /**
1242     * Convert a row from any tree in the other table into a note object.
1243     *
1244     * @return Closure:Note
1245     */
1246    private function noteRowMapper(): Closure
1247    {
1248        return function (object $row): Note {
1249            $tree = $this->tree_service->find((int) $row->o_file);
1250
1251            return Registry::noteFactory()->mapper($tree)($row);
1252        };
1253    }
1254
1255    /**
1256     * Convert a row from any tree in the other table into a repository object.
1257     *
1258     * @return Closure:Repository
1259     */
1260    private function repositoryRowMapper(): Closure
1261    {
1262        return function (object $row): Repository {
1263            $tree = $this->tree_service->find((int) $row->o_file);
1264
1265            return Registry::repositoryFactory()->mapper($tree)($row);
1266        };
1267    }
1268
1269    /**
1270     * Convert a row from any tree in the other table into a note object.
1271     *
1272     * @return Closure(object):SharedNote
1273     */
1274    private function sharedNoteRowMapper(): Closure
1275    {
1276        return function (object $row): Note {
1277            $tree = $this->tree_service->find((int) $row->o_file);
1278
1279            return Registry::sharedNoteFactory()->mapper($tree)($row);
1280        };
1281    }
1282
1283    /**
1284     * Convert a row from any tree in the sources table into a source object.
1285     *
1286     * @return Closure:Source
1287     */
1288    private function sourceRowMapper(): Closure
1289    {
1290        return function (object $row): Source {
1291            $tree = $this->tree_service->find((int) $row->s_file);
1292
1293            return Registry::sourceFactory()->mapper($tree)($row);
1294        };
1295    }
1296
1297    /**
1298     * Convert a row from any tree in the other table into a submission object.
1299     *
1300     * @return Closure(object):Submission
1301     */
1302    private function submissionRowMapper(): Closure
1303    {
1304        return function (object $row): Submission {
1305            $tree = $this->tree_service->find((int) $row->o_file);
1306
1307            return Registry::submissionFactory()->mapper($tree)($row);
1308        };
1309    }
1310
1311    /**
1312     * Convert a row from any tree in the other table into a submitter object.
1313     *
1314     * @return Closure(object):Submitter
1315     */
1316    private function submitterRowMapper(): Closure
1317    {
1318        return function (object $row): Submitter {
1319            $tree = $this->tree_service->find((int) $row->o_file);
1320
1321            return Registry::submitterFactory()->mapper($tree)($row);
1322        };
1323    }
1324}
1325