xref: /webtrees/app/Services/SearchService.php (revision b53d28e3c14f231603d2f8c357faae5787e46766)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Family;
25use Fisharebest\Webtrees\Gedcom;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Http\Exceptions\HttpServiceUnavailableException;
28use Fisharebest\Webtrees\I18N;
29use Fisharebest\Webtrees\Individual;
30use Fisharebest\Webtrees\Location;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Place;
34use Fisharebest\Webtrees\Registry;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\SharedNote;
37use Fisharebest\Webtrees\Soundex;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submission;
40use Fisharebest\Webtrees\Submitter;
41use Fisharebest\Webtrees\Tree;
42use Illuminate\Database\Capsule\Manager as DB;
43use Illuminate\Database\Query\Builder;
44use Illuminate\Database\Query\Expression;
45use Illuminate\Database\Query\JoinClause;
46use Illuminate\Support\Collection;
47
48use function addcslashes;
49use function array_filter;
50use function array_map;
51use function array_unique;
52use function explode;
53use function implode;
54use function mb_stripos;
55use function preg_match;
56use function preg_quote;
57use function preg_replace;
58use function str_ends_with;
59use function str_starts_with;
60
61use const PHP_INT_MAX;
62
63/**
64 * Search trees for genealogy records.
65 */
66class SearchService
67{
68    // Do not attempt to show search results larger than this/
69    protected const MAX_SEARCH_RESULTS = 5000;
70
71    private TreeService $tree_service;
72
73    /**
74     * SearchService constructor.
75     *
76     * @param TreeService $tree_service
77     */
78    public function __construct(
79        TreeService $tree_service
80    ) {
81        $this->tree_service = $tree_service;
82    }
83
84    /**
85     * @param array<Tree>   $trees
86     * @param array<string> $search
87     *
88     * @return Collection<int,Family>
89     */
90    public function searchFamilies(array $trees, array $search): Collection
91    {
92        $query = DB::table('families');
93
94        $this->whereTrees($query, 'f_file', $trees);
95        $this->whereSearch($query, 'f_gedcom', $search);
96
97        return $query
98            ->get()
99            ->each($this->rowLimiter())
100            ->map($this->familyRowMapper())
101            ->filter(GedcomRecord::accessFilter())
102            ->filter($this->rawGedcomFilter($search));
103    }
104
105    /**
106     * Search for families by name.
107     *
108     * @param array<Tree>   $trees
109     * @param array<string> $search
110     * @param int           $offset
111     * @param int           $limit
112     *
113     * @return Collection<int,Family>
114     */
115    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
116    {
117        $query = DB::table('families')
118            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
119                $join
120                    ->on('husb_name.n_file', '=', 'families.f_file')
121                    ->on('husb_name.n_id', '=', 'families.f_husb')
122                    ->where('husb_name.n_type', '<>', '_MARNM');
123            })
124            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
125                $join
126                    ->on('wife_name.n_file', '=', 'families.f_file')
127                    ->on('wife_name.n_id', '=', 'families.f_wife')
128                    ->where('wife_name.n_type', '<>', '_MARNM');
129            });
130
131        $prefix = DB::connection()->getTablePrefix();
132        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
133
134        $this->whereTrees($query, 'f_file', $trees);
135        $this->whereSearch($query, $field, $search);
136
137        $query
138            ->orderBy('husb_name.n_sort')
139            ->orderBy('wife_name.n_sort')
140            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
141
142        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
143    }
144
145    /**
146     * @param Place $place
147     *
148     * @return Collection<int,Family>
149     */
150    public function searchFamiliesInPlace(Place $place): Collection
151    {
152        return DB::table('families')
153            ->join('placelinks', static function (JoinClause $query) {
154                $query
155                    ->on('families.f_file', '=', 'placelinks.pl_file')
156                    ->on('families.f_id', '=', 'placelinks.pl_gid');
157            })
158            ->where('f_file', '=', $place->tree()->id())
159            ->where('pl_p_id', '=', $place->id())
160            ->select(['families.*'])
161            ->get()
162            ->each($this->rowLimiter())
163            ->map($this->familyRowMapper())
164            ->filter(GedcomRecord::accessFilter());
165    }
166
167    /**
168     * @param array<Tree>   $trees
169     * @param array<string> $search
170     *
171     * @return Collection<int,Individual>
172     */
173    public function searchIndividuals(array $trees, array $search): Collection
174    {
175        $query = DB::table('individuals');
176
177        $this->whereTrees($query, 'i_file', $trees);
178        $this->whereSearch($query, 'i_gedcom', $search);
179
180        return $query
181            ->get()
182            ->each($this->rowLimiter())
183            ->map($this->individualRowMapper())
184            ->filter(GedcomRecord::accessFilter())
185            ->filter($this->rawGedcomFilter($search));
186    }
187
188    /**
189     * Search for individuals by name.
190     *
191     * @param array<Tree>   $trees
192     * @param array<string> $search
193     * @param int           $offset
194     * @param int           $limit
195     *
196     * @return Collection<int,Individual>
197     */
198    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
199    {
200        $query = DB::table('individuals')
201            ->join('name', static function (JoinClause $join): void {
202                $join
203                    ->on('name.n_file', '=', 'individuals.i_file')
204                    ->on('name.n_id', '=', 'individuals.i_id');
205            })
206            ->orderBy('n_sort')
207            ->select(['individuals.*', 'n_sort']);
208
209        $this->whereTrees($query, 'i_file', $trees);
210        $this->whereSearch($query, 'n_full', $search);
211
212        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
213    }
214
215    /**
216     * @param Place $place
217     *
218     * @return Collection<int,Individual>
219     */
220    public function searchIndividualsInPlace(Place $place): Collection
221    {
222        return DB::table('individuals')
223            ->join('placelinks', static function (JoinClause $join) {
224                $join
225                    ->on('i_file', '=', 'pl_file')
226                    ->on('i_id', '=', 'pl_gid');
227            })
228            ->where('i_file', '=', $place->tree()->id())
229            ->where('pl_p_id', '=', $place->id())
230            ->select(['individuals.*'])
231            ->get()
232            ->each($this->rowLimiter())
233            ->map($this->individualRowMapper())
234            ->filter(GedcomRecord::accessFilter());
235    }
236
237    /**
238     * Search for submissions.
239     *
240     * @param array<Tree>   $trees
241     * @param array<string> $search
242     * @param int           $offset
243     * @param int           $limit
244     *
245     * @return Collection<int,Location>
246     */
247    public function searchLocations(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
248    {
249        $query = DB::table('other')
250            ->where('o_type', '=', Location::RECORD_TYPE);
251
252        $this->whereTrees($query, 'o_file', $trees);
253        $this->whereSearch($query, 'o_gedcom', $search);
254
255        return $this->paginateQuery($query, $this->locationRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
256    }
257
258    /**
259     * Search for media objects.
260     *
261     * @param array<Tree>   $trees
262     * @param array<string> $search
263     * @param int           $offset
264     * @param int           $limit
265     *
266     * @return Collection<int,Media>
267     */
268    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
269    {
270        $query = DB::table('media');
271
272        $this->whereTrees($query, 'media.m_file', $trees);
273        $this->whereSearch($query, 'm_gedcom', $search);
274
275        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
276    }
277
278    /**
279     * Search for notes.
280     *
281     * @param array<Tree>   $trees
282     * @param array<string> $search
283     * @param int           $offset
284     * @param int           $limit
285     *
286     * @return Collection<int,Note>
287     */
288    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
289    {
290        $query = DB::table('other')
291            ->where('o_type', '=', Note::RECORD_TYPE);
292
293        $this->whereTrees($query, 'o_file', $trees);
294        $this->whereSearch($query, 'o_gedcom', $search);
295
296        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
297    }
298
299    /**
300     * Search for notes.
301     *
302     * @param array<Tree>   $trees
303     * @param array<string> $search
304     * @param int           $offset
305     * @param int           $limit
306     *
307     * @return Collection<int,SharedNote>
308     */
309    public function searchSharedNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
310    {
311        $query = DB::table('other')
312            ->where('o_type', '=', SharedNote::RECORD_TYPE);
313
314        $this->whereTrees($query, 'o_file', $trees);
315        $this->whereSearch($query, 'o_gedcom', $search);
316
317        return $this->paginateQuery($query, $this->sharedNoteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
318    }
319
320    /**
321     * Search for repositories.
322     *
323     * @param array<Tree>   $trees
324     * @param array<string> $search
325     * @param int           $offset
326     * @param int           $limit
327     *
328     * @return Collection<int,Repository>
329     */
330    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
331    {
332        $query = DB::table('other')
333            ->where('o_type', '=', Repository::RECORD_TYPE);
334
335        $this->whereTrees($query, 'o_file', $trees);
336        $this->whereSearch($query, 'o_gedcom', $search);
337
338        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
339    }
340
341    /**
342     * Search for sources.
343     *
344     * @param array<Tree>   $trees
345     * @param array<string> $search
346     * @param int      $offset
347     * @param int      $limit
348     *
349     * @return Collection<int,Source>
350     */
351    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
352    {
353        $query = DB::table('sources');
354
355        $this->whereTrees($query, 's_file', $trees);
356        $this->whereSearch($query, 's_gedcom', $search);
357
358        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
359    }
360
361    /**
362     * Search for sources by name.
363     *
364     * @param array<Tree>   $trees
365     * @param array<string> $search
366     * @param int           $offset
367     * @param int           $limit
368     *
369     * @return Collection<int,Source>
370     */
371    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
372    {
373        $query = DB::table('sources')
374            ->orderBy('s_name');
375
376        $this->whereTrees($query, 's_file', $trees);
377        $this->whereSearch($query, 's_name', $search);
378
379        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
380    }
381
382    /**
383     * Search for sources.
384     *
385     * @param array<Tree>   $trees
386     * @param array<string> $search
387     * @param int           $offset
388     * @param int           $limit
389     *
390     * @return Collection<int,string>
391     */
392    public function searchSurnames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
393    {
394        $query = DB::table('name');
395
396        $this->whereTrees($query, 'n_file', $trees);
397        $this->whereSearch($query, 'n_surname', $search);
398
399        return $query
400            ->groupBy(['n_surname'])
401            ->orderBy('n_surname')
402            ->skip($offset)
403            ->take($limit)
404            ->pluck('n_surname');
405    }
406
407    /**
408     * Search for submissions.
409     *
410     * @param array<Tree>   $trees
411     * @param array<string> $search
412     * @param int           $offset
413     * @param int           $limit
414     *
415     * @return Collection<int,Submission>
416     */
417    public function searchSubmissions(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
418    {
419        $query = DB::table('other')
420            ->where('o_type', '=', Submission::RECORD_TYPE);
421
422        $this->whereTrees($query, 'o_file', $trees);
423        $this->whereSearch($query, 'o_gedcom', $search);
424
425        return $this->paginateQuery($query, $this->submissionRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
426    }
427
428    /**
429     * Search for submitters.
430     *
431     * @param array<Tree>   $trees
432     * @param array<string> $search
433     * @param int           $offset
434     * @param int           $limit
435     *
436     * @return Collection<int,Submitter>
437     */
438    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
439    {
440        $query = DB::table('other')
441            ->where('o_type', '=', Submitter::RECORD_TYPE);
442
443        $this->whereTrees($query, 'o_file', $trees);
444        $this->whereSearch($query, 'o_gedcom', $search);
445
446        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
447    }
448
449    /**
450     * Search for places.
451     *
452     * @param Tree   $tree
453     * @param string $search
454     * @param int    $offset
455     * @param int    $limit
456     *
457     * @return Collection<int,Place>
458     */
459    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
460    {
461        $query = DB::table('places AS p0')
462            ->where('p0.p_file', '=', $tree->id())
463            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
464            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
465            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
466            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
467            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
468            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
469            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
470            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
471            ->orderBy('p0.p_place')
472            ->orderBy('p1.p_place')
473            ->orderBy('p2.p_place')
474            ->orderBy('p3.p_place')
475            ->orderBy('p4.p_place')
476            ->orderBy('p5.p_place')
477            ->orderBy('p6.p_place')
478            ->orderBy('p7.p_place')
479            ->orderBy('p8.p_place')
480            ->select([
481                'p0.p_place AS place0',
482                'p1.p_place AS place1',
483                'p2.p_place AS place2',
484                'p3.p_place AS place3',
485                'p4.p_place AS place4',
486                'p5.p_place AS place5',
487                'p6.p_place AS place6',
488                'p7.p_place AS place7',
489                'p8.p_place AS place8',
490            ]);
491
492        // Filter each level of the hierarchy.
493        foreach (explode(',', $search, 9) as $level => $string) {
494            $query->where('p' . $level . '.p_place', 'LIKE', '%' . addcslashes($string, '\\%_') . '%');
495        }
496
497        $row_mapper = static function (object $row) use ($tree): Place {
498            $place = implode(', ', array_filter((array) $row));
499
500            return new Place($place, $tree);
501        };
502
503        $filter = static function (): bool {
504            return true;
505        };
506
507        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
508    }
509
510    /**
511     * @param array<Tree>          $trees
512     * @param array<string,string> $fields
513     * @param array<string,string> $modifiers
514     *
515     * @return Collection<int,Individual>
516     */
517    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
518    {
519        $fields = array_filter($fields, static fn (string $x): bool => $x !== '');
520
521        $query = DB::table('individuals')
522            ->select(['individuals.*'])
523            ->distinct();
524
525        $this->whereTrees($query, 'i_file', $trees);
526
527        // Join the following tables
528        $father_name   = false;
529        $mother_name   = false;
530        $spouse_family = false;
531        $indi_name     = false;
532        $indi_dates    = [];
533        $fam_dates     = [];
534        $indi_plac     = false;
535        $fam_plac      = false;
536
537        foreach ($fields as $field_name => $field_value) {
538            if (str_starts_with($field_name, 'FATHER:NAME')) {
539                $father_name = true;
540            } elseif (str_starts_with($field_name, 'MOTHER:NAME')) {
541                $mother_name = true;
542            } elseif (str_starts_with($field_name, 'INDI:NAME:GIVN')) {
543                $indi_name = true;
544            } elseif (str_starts_with($field_name, 'INDI:NAME:SURN')) {
545                $indi_name = true;
546            } elseif (str_starts_with($field_name, 'FAM:')) {
547                $spouse_family = true;
548                if (str_ends_with($field_name, ':DATE')) {
549                    $fam_dates[] = explode(':', $field_name)[1];
550                } elseif (str_ends_with($field_name, ':PLAC')) {
551                    $fam_plac = true;
552                }
553            } elseif (str_starts_with($field_name, 'INDI:')) {
554                if (str_ends_with($field_name, ':DATE')) {
555                    $indi_dates[] = explode(':', $field_name)[1];
556                } elseif (str_ends_with($field_name, ':PLAC')) {
557                    $indi_plac = true;
558                }
559            }
560        }
561
562        if ($father_name || $mother_name) {
563            $query->join('link AS l1', static function (JoinClause $join): void {
564                $join
565                    ->on('l1.l_file', '=', 'individuals.i_file')
566                    ->on('l1.l_from', '=', 'individuals.i_id')
567                    ->where('l1.l_type', '=', 'FAMC');
568            });
569
570            if ($father_name) {
571                $query->join('link AS l2', static function (JoinClause $join): void {
572                    $join
573                        ->on('l2.l_file', '=', 'l1.l_file')
574                        ->on('l2.l_from', '=', 'l1.l_to')
575                        ->where('l2.l_type', '=', 'HUSB');
576                });
577                $query->join('name AS father_name', static function (JoinClause $join): void {
578                    $join
579                        ->on('father_name.n_file', '=', 'l2.l_file')
580                        ->on('father_name.n_id', '=', 'l2.l_to');
581                });
582            }
583
584            if ($mother_name) {
585                $query->join('link AS l3', static function (JoinClause $join): void {
586                    $join
587                        ->on('l3.l_file', '=', 'l1.l_file')
588                        ->on('l3.l_from', '=', 'l1.l_to')
589                        ->where('l3.l_type', '=', 'WIFE');
590                });
591                $query->join('name AS mother_name', static function (JoinClause $join): void {
592                    $join
593                        ->on('mother_name.n_file', '=', 'l3.l_file')
594                        ->on('mother_name.n_id', '=', 'l3.l_to');
595                });
596            }
597        }
598
599        if ($spouse_family) {
600            $query->join('link AS l4', static function (JoinClause $join): void {
601                $join
602                    ->on('l4.l_file', '=', 'individuals.i_file')
603                    ->on('l4.l_from', '=', 'individuals.i_id')
604                    ->where('l4.l_type', '=', 'FAMS');
605            });
606            $query->join('families AS spouse_families', static function (JoinClause $join): void {
607                $join
608                    ->on('spouse_families.f_file', '=', 'l4.l_file')
609                    ->on('spouse_families.f_id', '=', 'l4.l_to');
610            });
611        }
612
613        if ($indi_name) {
614            $query->join('name AS individual_name', static function (JoinClause $join): void {
615                $join
616                    ->on('individual_name.n_file', '=', 'individuals.i_file')
617                    ->on('individual_name.n_id', '=', 'individuals.i_id');
618            });
619        }
620
621        foreach (array_unique($indi_dates) as $indi_date) {
622            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
623                $join
624                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
625                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
626            });
627        }
628
629        foreach (array_unique($fam_dates) as $fam_date) {
630            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
631                $join
632                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
633                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
634            });
635        }
636
637        if ($indi_plac) {
638            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
639                $join
640                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
641                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
642            });
643            $query->join('places AS individual_places', static function (JoinClause $join): void {
644                $join
645                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
646                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
647            });
648        }
649
650        if ($fam_plac) {
651            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
652                $join
653                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
654                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
655            });
656            $query->join('places AS family_places', static function (JoinClause $join): void {
657                $join
658                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
659                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
660            });
661        }
662
663        foreach ($fields as $field_name => $field_value) {
664            $parts = explode(':', $field_name . ':::');
665            if (str_starts_with($field_name, 'INDI:NAME:')) {
666                switch ($field_name) {
667                    case 'INDI:NAME:GIVN':
668                        switch ($modifiers[$field_name]) {
669                            case 'EXACT':
670                                $query->where('individual_name.n_givn', '=', $field_value);
671                                break;
672                            case 'BEGINS':
673                                $query->where('individual_name.n_givn', 'LIKE', $field_value . '%');
674                                break;
675                            case 'CONTAINS':
676                                $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
677                                break;
678                            case 'SDX_STD':
679                                $sdx = Soundex::russell($field_value);
680                                if ($sdx !== '') {
681                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
682                                } else {
683                                    // No phonetic content? Use a substring match
684                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
685                                }
686                                break;
687                            case 'SDX': // SDX uses DM by default.
688                            case 'SDX_DM':
689                                $sdx = Soundex::daitchMokotoff($field_value);
690                                if ($sdx !== '') {
691                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
692                                } else {
693                                    // No phonetic content? Use a substring match
694                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
695                                }
696                                break;
697                        }
698                        unset($fields[$field_name]);
699                        break;
700                    case 'INDI:NAME:SURN':
701                        switch ($modifiers[$field_name]) {
702                            case 'EXACT':
703                                $query->where(function (Builder $query) use ($field_value): void {
704                                    $query
705                                        ->where('individual_name.n_surn', '=', $field_value)
706                                        ->orWhere('individual_name.n_surname', '=', $field_value);
707                                });
708                                break;
709                            case 'BEGINS':
710                                $query->where(function (Builder $query) use ($field_value): void {
711                                    $query
712                                        ->where('individual_name.n_surn', 'LIKE', $field_value . '%')
713                                        ->orWhere('individual_name.n_surname', 'LIKE', $field_value . '%');
714                                });
715                                break;
716                            case 'CONTAINS':
717                                $query->where(function (Builder $query) use ($field_value): void {
718                                    $query
719                                        ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
720                                        ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
721                                });
722                                break;
723                            case 'SDX_STD':
724                                $sdx = Soundex::russell($field_value);
725                                if ($sdx !== '') {
726                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
727                                } else {
728                                    // No phonetic content? Use a substring match
729                                    $query->where(function (Builder $query) use ($field_value): void {
730                                        $query
731                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
732                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
733                                    });
734                                }
735                                break;
736                            case 'SDX': // SDX uses DM by default.
737                            case 'SDX_DM':
738                                $sdx = Soundex::daitchMokotoff($field_value);
739                                if ($sdx !== '') {
740                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
741                                } else {
742                                    // No phonetic content? Use a substring match
743                                    $query->where(function (Builder $query) use ($field_value): void {
744                                        $query
745                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
746                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
747                                    });
748                                }
749                                break;
750                        }
751                        unset($fields[$field_name]);
752                        break;
753                    case 'INDI:NAME:NICK':
754                    case 'INDI:NAME:_MARNM':
755                    case 'INDI:NAME:_HEB':
756                    case 'INDI:NAME:_AKA':
757                        $like = "%\n1 NAME%\n2 " . $parts[2] . ' %' . preg_quote($field_value, '/') . '%';
758                        $query->where('individuals.i_gedcom', 'LIKE', $like);
759                        break;
760                }
761            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':DATE')) {
762                $date = new Date($field_value);
763                if ($date->isOK()) {
764                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
765                    $query
766                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
767                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
768                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
769                }
770                unset($fields[$field_name]);
771            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':DATE')) {
772                $date = new Date($field_value);
773                if ($date->isOK()) {
774                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
775                    $query
776                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
777                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
778                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
779                }
780                unset($fields[$field_name]);
781            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
782                // SQL can only link a place to a person/family, not to an event.
783                $query->where('individual_places.p_place', 'LIKE', '%' . $field_value . '%');
784            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
785                // SQL can only link a place to a person/family, not to an event.
786                $query->where('family_places.p_place', 'LIKE', '%' . $field_value . '%');
787            } elseif (str_starts_with($field_name, 'MOTHER:NAME:') || str_starts_with($field_name, 'FATHER:NAME:')) {
788                $table = str_starts_with($field_name, 'FATHER:NAME:') ? 'father_name' : 'mother_name';
789                switch ($parts[2]) {
790                    case 'GIVN':
791                        switch ($modifiers[$field_name]) {
792                            case 'EXACT':
793                                $query->where($table . '.n_givn', '=', $field_value);
794                                break;
795                            case 'BEGINS':
796                                $query->where($table . '.n_givn', 'LIKE', $field_value . '%');
797                                break;
798                            case 'CONTAINS':
799                                $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
800                                break;
801                            case 'SDX_STD':
802                                $sdx = Soundex::russell($field_value);
803                                if ($sdx !== '') {
804                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
805                                } else {
806                                    // No phonetic content? Use a substring match
807                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
808                                }
809                                break;
810                            case 'SDX': // SDX uses DM by default.
811                            case 'SDX_DM':
812                                $sdx = Soundex::daitchMokotoff($field_value);
813                                if ($sdx !== '') {
814                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
815                                } else {
816                                    // No phonetic content? Use a substring match
817                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
818                                }
819                                break;
820                        }
821                        break;
822                    case 'SURN':
823                        switch ($modifiers[$field_name]) {
824                            case 'EXACT':
825                                $query->where($table . '.n_surn', '=', $field_value);
826                                break;
827                            case 'BEGINS':
828                                $query->where($table . '.n_surn', 'LIKE', $field_value . '%');
829                                break;
830                            case 'CONTAINS':
831                                $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
832                                break;
833                            case 'SDX_STD':
834                                $sdx = Soundex::russell($field_value);
835                                if ($sdx !== '') {
836                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
837                                } else {
838                                    // No phonetic content? Use a substring match
839                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
840                                }
841                                break;
842                            case 'SDX': // SDX uses DM by default.
843                            case 'SDX_DM':
844                                $sdx = Soundex::daitchMokotoff($field_value);
845                                if ($sdx !== '') {
846                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
847                                } else {
848                                    // No phonetic content? Use a substring match
849                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
850                                }
851                                break;
852                        }
853                        break;
854                }
855                unset($fields[$field_name]);
856            } elseif (str_starts_with($field_name, 'FAM:')) {
857                // e.g. searches for occupation, religion, note, etc.
858                // Initial matching only.  Need PHP to apply filter.
859                $query->where('spouse_families.f_gedcom', 'LIKE', "%\n1 " . $parts[1] . ' %' . $field_value . '%');
860            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':TYPE')) {
861                // Initial matching only.  Need PHP to apply filter.
862                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[1] . "%\n2 TYPE %" . $field_value . '%');
863            } elseif (str_starts_with($field_name, 'INDI:')) {
864                // e.g. searches for occupation, religion, note, etc.
865                // Initial matching only.  Need PHP to apply filter.
866                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[1] . '%' . $parts[2] . '%' . $field_value . '%');
867            }
868        }
869
870        return $query
871            ->get()
872            ->each($this->rowLimiter())
873            ->map($this->individualRowMapper())
874            ->filter(GedcomRecord::accessFilter())
875            ->filter(static function (Individual $individual) use ($fields): bool {
876                // Check for searches which were only partially matched by SQL
877                foreach ($fields as $field_name => $field_value) {
878                    $parts = explode(':', $field_name . '::::');
879
880                    if (str_starts_with($field_name, 'INDI:NAME:') && $field_name !== 'INDI:NAME:GIVN' && $field_name !== 'INDI:NAME:SURN') {
881                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[2] . ' .*' . preg_quote($field_value, '/') . '/i';
882
883                        if (preg_match($regex, $individual->gedcom()) === 1) {
884                            continue;
885                        }
886
887                        return false;
888                    }
889
890                    $regex = '/' . preg_quote($field_value, '/') . '/i';
891
892                    if (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
893                        foreach ($individual->facts([$parts[1]]) as $fact) {
894                            if (preg_match($regex, $fact->place()->gedcomName()) === 1) {
895                                continue 2;
896                            }
897                        }
898                        return false;
899                    }
900
901                    if (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
902                        foreach ($individual->spouseFamilies() as $family) {
903                            foreach ($family->facts([$parts[1]]) as $fact) {
904                                if (preg_match($regex, $fact->place()->gedcomName()) === 1) {
905                                    continue 3;
906                                }
907                            }
908                        }
909                        return false;
910                    }
911
912                    if ($field_name === 'INDI:FACT:TYPE' || $field_name === 'INDI:EVEN:TYPE' || $field_name === 'INDI:CHAN:_WT_USER') {
913                        foreach ($individual->facts([$parts[1]]) as $fact) {
914                            if (preg_match($regex, $fact->attribute($parts[2])) === 1) {
915                                continue 2;
916                            }
917                        }
918
919                        return false;
920                    }
921
922                    if (str_starts_with($field_name, 'INDI:')) {
923                        foreach ($individual->facts([$parts[1]]) as $fact) {
924                            if (preg_match($regex, $fact->value()) === 1) {
925                                continue 2;
926                            }
927                        }
928
929                        return false;
930                    }
931
932                    if (str_starts_with($field_name, 'FAM:')) {
933                        foreach ($individual->spouseFamilies() as $family) {
934                            foreach ($family->facts([$parts[1]]) as $fact) {
935                                if (preg_match($regex, $fact->value()) === 1) {
936                                    continue 3;
937                                }
938                            }
939                        }
940                        return false;
941                    }
942                }
943
944                return true;
945            });
946    }
947
948    /**
949     * @param string      $soundex
950     * @param string      $lastname
951     * @param string      $firstname
952     * @param string      $place
953     * @param array<Tree> $search_trees
954     *
955     * @return Collection<int,Individual>
956     */
957    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
958    {
959        switch ($soundex) {
960            default:
961            case 'Russell':
962                $givn_sdx   = Soundex::russell($firstname);
963                $surn_sdx   = Soundex::russell($lastname);
964                $plac_sdx   = Soundex::russell($place);
965                $givn_field = 'n_soundex_givn_std';
966                $surn_field = 'n_soundex_surn_std';
967                $plac_field = 'p_std_soundex';
968                break;
969            case 'DaitchM':
970                $givn_sdx   = Soundex::daitchMokotoff($firstname);
971                $surn_sdx   = Soundex::daitchMokotoff($lastname);
972                $plac_sdx   = Soundex::daitchMokotoff($place);
973                $givn_field = 'n_soundex_givn_dm';
974                $surn_field = 'n_soundex_surn_dm';
975                $plac_field = 'p_dm_soundex';
976                break;
977        }
978
979        // Nothing to search for? Return nothing.
980        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
981            return new Collection();
982        }
983
984        $query = DB::table('individuals')
985            ->select(['individuals.*'])
986            ->distinct();
987
988        $this->whereTrees($query, 'i_file', $search_trees);
989
990        if ($plac_sdx !== '') {
991            $query->join('placelinks', static function (JoinClause $join): void {
992                $join
993                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
994                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
995            });
996            $query->join('places', static function (JoinClause $join): void {
997                $join
998                    ->on('places.p_file', '=', 'placelinks.pl_file')
999                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
1000            });
1001
1002            $this->wherePhonetic($query, $plac_field, $plac_sdx);
1003        }
1004
1005        if ($givn_sdx !== '' || $surn_sdx !== '') {
1006            $query->join('name', static function (JoinClause $join): void {
1007                $join
1008                    ->on('name.n_file', '=', 'individuals.i_file')
1009                    ->on('name.n_id', '=', 'individuals.i_id');
1010            });
1011
1012            $this->wherePhonetic($query, $givn_field, $givn_sdx);
1013            $this->wherePhonetic($query, $surn_field, $surn_sdx);
1014        }
1015
1016        return $query
1017            ->get()
1018            ->each($this->rowLimiter())
1019            ->map($this->individualRowMapper())
1020            ->filter(GedcomRecord::accessFilter());
1021    }
1022
1023    /**
1024     * Paginate a search query.
1025     *
1026     * @param Builder $query      Searches the database for the desired records.
1027     * @param Closure $row_mapper Converts a row from the query into a record.
1028     * @param Closure $row_filter
1029     * @param int     $offset     Skip this many rows.
1030     * @param int     $limit      Take this many rows.
1031     *
1032     * @return Collection<int,mixed>
1033     */
1034    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
1035    {
1036        $collection = new Collection();
1037
1038        foreach ($query->cursor() as $row) {
1039            $record = $row_mapper($row);
1040            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
1041            // where individuals have multiple names - and we need to sort results by name.
1042            if ($collection->containsStrict($record)) {
1043                continue;
1044            }
1045            // If the object has a method "canShow()", then use it to filter for privacy.
1046            if ($row_filter($record)) {
1047                if ($offset > 0) {
1048                    $offset--;
1049                } else {
1050                    if ($limit > 0) {
1051                        $collection->push($record);
1052                    }
1053
1054                    $limit--;
1055
1056                    if ($limit === 0) {
1057                        break;
1058                    }
1059                }
1060            }
1061        }
1062
1063
1064        return $collection;
1065    }
1066
1067    /**
1068     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
1069     *
1070     * @param Builder           $query
1071     * @param Expression|string $field
1072     * @param array<string>     $search_terms
1073     */
1074    private function whereSearch(Builder $query, $field, array $search_terms): void
1075    {
1076        if ($field instanceof Expression) {
1077            $field = $field->getValue();
1078        }
1079
1080        foreach ($search_terms as $search_term) {
1081            $query->where(new Expression($field), 'LIKE', '%' . addcslashes($search_term, '\\%_') . '%');
1082        }
1083    }
1084
1085    /**
1086     * Apply soundex search filters to a SQL query column.
1087     *
1088     * @param Builder           $query
1089     * @param Expression|string $field
1090     * @param string            $soundex
1091     */
1092    private function wherePhonetic(Builder $query, $field, string $soundex): void
1093    {
1094        if ($soundex !== '') {
1095            $query->where(static function (Builder $query) use ($soundex, $field): void {
1096                foreach (explode(':', $soundex) as $sdx) {
1097                    $query->orWhere($field, 'LIKE', '%' . $sdx . '%');
1098                }
1099            });
1100        }
1101    }
1102
1103    /**
1104     * @param Builder     $query
1105     * @param string      $tree_id_field
1106     * @param array<Tree> $trees
1107     */
1108    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1109    {
1110        $tree_ids = array_map(static function (Tree $tree): int {
1111            return $tree->id();
1112        }, $trees);
1113
1114        $query->whereIn($tree_id_field, $tree_ids);
1115    }
1116
1117    /**
1118     * Find the media object that uses a particular media file.
1119     *
1120     * @param string $file
1121     *
1122     * @return array<Media>
1123     */
1124    public function findMediaObjectsForMediaFile(string $file): array
1125    {
1126        return DB::table('media')
1127            ->join('media_file', static function (JoinClause $join): void {
1128                $join
1129                    ->on('media_file.m_file', '=', 'media.m_file')
1130                    ->on('media_file.m_id', '=', 'media.m_id');
1131            })
1132            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1133            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1134            ->select(['media.*'])
1135            ->distinct()
1136            ->get()
1137            ->map($this->mediaRowMapper())
1138            ->all();
1139    }
1140
1141    /**
1142     * A closure to filter records by privacy-filtered GEDCOM data.
1143     *
1144     * @param array<string> $search_terms
1145     *
1146     * @return Closure(GedcomRecord):bool
1147     */
1148    private function rawGedcomFilter(array $search_terms): Closure
1149    {
1150        return static function (GedcomRecord $record) use ($search_terms): bool {
1151            // Ignore non-genealogy fields
1152            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1153
1154            // Ignore matches in links
1155            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1156
1157            // Re-apply the filtering
1158            foreach ($search_terms as $search_term) {
1159                if (mb_stripos($gedcom, $search_term) === false) {
1160                    return false;
1161                }
1162            }
1163
1164            return true;
1165        };
1166    }
1167
1168    /**
1169     * Searching for short or common text can give more results than the system can process.
1170     *
1171     * @param int $limit
1172     *
1173     * @return Closure():void
1174     */
1175    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1176    {
1177        return static function () use ($limit): void {
1178            static $n = 0;
1179
1180            if (++$n > $limit) {
1181                $message = I18N::translate('The search returned too many results.');
1182
1183                throw new HttpServiceUnavailableException($message);
1184            }
1185        };
1186    }
1187
1188    /**
1189     * Convert a row from any tree in the families table into a family object.
1190     *
1191     * @return Closure(object):Family
1192     */
1193    private function familyRowMapper(): Closure
1194    {
1195        return function (object $row): Family {
1196            $tree = $this->tree_service->find((int) $row->f_file);
1197
1198            return Registry::familyFactory()->mapper($tree)($row);
1199        };
1200    }
1201
1202    /**
1203     * Convert a row from any tree in the individuals table into an individual object.
1204     *
1205     * @return Closure(object):Individual
1206     */
1207    private function individualRowMapper(): Closure
1208    {
1209        return function (object $row): Individual {
1210            $tree = $this->tree_service->find((int) $row->i_file);
1211
1212            return Registry::individualFactory()->mapper($tree)($row);
1213        };
1214    }
1215
1216    /**
1217     * Convert a row from any tree in the media table into a location object.
1218     *
1219     * @return Closure(object):Location
1220     */
1221    private function locationRowMapper(): Closure
1222    {
1223        return function (object $row): Location {
1224            $tree = $this->tree_service->find((int) $row->o_file);
1225
1226            return Registry::locationFactory()->mapper($tree)($row);
1227        };
1228    }
1229
1230    /**
1231     * Convert a row from any tree in the media table into an media object.
1232     *
1233     * @return Closure(object):Media
1234     */
1235    private function mediaRowMapper(): Closure
1236    {
1237        return function (object $row): Media {
1238            $tree = $this->tree_service->find((int) $row->m_file);
1239
1240            return Registry::mediaFactory()->mapper($tree)($row);
1241        };
1242    }
1243
1244    /**
1245     * Convert a row from any tree in the other table into a note object.
1246     *
1247     * @return Closure:Note
1248     */
1249    private function noteRowMapper(): Closure
1250    {
1251        return function (object $row): Note {
1252            $tree = $this->tree_service->find((int) $row->o_file);
1253
1254            return Registry::noteFactory()->mapper($tree)($row);
1255        };
1256    }
1257
1258    /**
1259     * Convert a row from any tree in the other table into a repository object.
1260     *
1261     * @return Closure:Repository
1262     */
1263    private function repositoryRowMapper(): Closure
1264    {
1265        return function (object $row): Repository {
1266            $tree = $this->tree_service->find((int) $row->o_file);
1267
1268            return Registry::repositoryFactory()->mapper($tree)($row);
1269        };
1270    }
1271
1272    /**
1273     * Convert a row from any tree in the other table into a note object.
1274     *
1275     * @return Closure(object):SharedNote
1276     */
1277    private function sharedNoteRowMapper(): Closure
1278    {
1279        return function (object $row): Note {
1280            $tree = $this->tree_service->find((int) $row->o_file);
1281
1282            return Registry::sharedNoteFactory()->mapper($tree)($row);
1283        };
1284    }
1285
1286    /**
1287     * Convert a row from any tree in the sources table into a source object.
1288     *
1289     * @return Closure:Source
1290     */
1291    private function sourceRowMapper(): Closure
1292    {
1293        return function (object $row): Source {
1294            $tree = $this->tree_service->find((int) $row->s_file);
1295
1296            return Registry::sourceFactory()->mapper($tree)($row);
1297        };
1298    }
1299
1300    /**
1301     * Convert a row from any tree in the other table into a submission object.
1302     *
1303     * @return Closure(object):Submission
1304     */
1305    private function submissionRowMapper(): Closure
1306    {
1307        return function (object $row): Submission {
1308            $tree = $this->tree_service->find((int) $row->o_file);
1309
1310            return Registry::submissionFactory()->mapper($tree)($row);
1311        };
1312    }
1313
1314    /**
1315     * Convert a row from any tree in the other table into a submitter object.
1316     *
1317     * @return Closure(object):Submitter
1318     */
1319    private function submitterRowMapper(): Closure
1320    {
1321        return function (object $row): Submitter {
1322            $tree = $this->tree_service->find((int) $row->o_file);
1323
1324            return Registry::submitterFactory()->mapper($tree)($row);
1325        };
1326    }
1327}
1328