xref: /webtrees/app/Services/SearchService.php (revision 31c839a4f79226f43f06cc39413dfb2e413ab9a8)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Family;
25use Fisharebest\Webtrees\Gedcom;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Http\Exceptions\HttpServiceUnavailableException;
28use Fisharebest\Webtrees\I18N;
29use Fisharebest\Webtrees\Individual;
30use Fisharebest\Webtrees\Location;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Place;
34use Fisharebest\Webtrees\Registry;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\SharedNote;
37use Fisharebest\Webtrees\Soundex;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submission;
40use Fisharebest\Webtrees\Submitter;
41use Fisharebest\Webtrees\Tree;
42use Illuminate\Database\Capsule\Manager as DB;
43use Illuminate\Database\Query\Builder;
44use Illuminate\Database\Query\Expression;
45use Illuminate\Database\Query\JoinClause;
46use Illuminate\Support\Collection;
47
48use function addcslashes;
49use function array_filter;
50use function array_map;
51use function array_unique;
52use function explode;
53use function implode;
54use function is_string;
55use function mb_stripos;
56use function preg_match;
57use function preg_quote;
58use function preg_replace;
59use function str_ends_with;
60use function str_starts_with;
61
62use const PHP_INT_MAX;
63
64/**
65 * Search trees for genealogy records.
66 */
67class SearchService
68{
69    // Do not attempt to show search results larger than this/
70    protected const MAX_SEARCH_RESULTS = 5000;
71
72    private TreeService $tree_service;
73    private $like_operator = 'LIKE';
74
75    /**
76     * @param TreeService $tree_service
77     */
78    public function __construct(
79        TreeService $tree_service
80    ) {
81        $this->tree_service = $tree_service;
82
83        // Allows to make insensitive searches in PostgreSQL
84        if (DB::connection()->getDriverName() === 'pgsql') {
85            $this->like_operator = 'ILIKE';
86        }
87    }
88
89    /**
90     * @param array<Tree>   $trees
91     * @param array<string> $search
92     *
93     * @return Collection<int,Family>
94     */
95    public function searchFamilies(array $trees, array $search): Collection
96    {
97        $query = DB::table('families');
98
99        $this->whereTrees($query, 'f_file', $trees);
100        $this->whereSearch($query, 'f_gedcom', $search);
101
102        return $query
103            ->get()
104            ->each($this->rowLimiter())
105            ->map($this->familyRowMapper())
106            ->filter(GedcomRecord::accessFilter())
107            ->filter($this->rawGedcomFilter($search));
108    }
109
110    /**
111     * Search for families by name.
112     *
113     * @param array<Tree>   $trees
114     * @param array<string> $search
115     * @param int           $offset
116     * @param int           $limit
117     *
118     * @return Collection<int,Family>
119     */
120    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
121    {
122        $query = DB::table('families')
123            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
124                $join
125                    ->on('husb_name.n_file', '=', 'families.f_file')
126                    ->on('husb_name.n_id', '=', 'families.f_husb')
127                    ->where('husb_name.n_type', '<>', '_MARNM');
128            })
129            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
130                $join
131                    ->on('wife_name.n_file', '=', 'families.f_file')
132                    ->on('wife_name.n_id', '=', 'families.f_wife')
133                    ->where('wife_name.n_type', '<>', '_MARNM');
134            });
135
136        $prefix = DB::connection()->getTablePrefix();
137        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
138
139        $this->whereTrees($query, 'f_file', $trees);
140        $this->whereSearch($query, $field, $search);
141
142        $query
143            ->orderBy('husb_name.n_sort')
144            ->orderBy('wife_name.n_sort')
145            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
146
147        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
148    }
149
150    /**
151     * @param Place $place
152     *
153     * @return Collection<int,Family>
154     */
155    public function searchFamiliesInPlace(Place $place): Collection
156    {
157        return DB::table('families')
158            ->join('placelinks', static function (JoinClause $query) {
159                $query
160                    ->on('families.f_file', '=', 'placelinks.pl_file')
161                    ->on('families.f_id', '=', 'placelinks.pl_gid');
162            })
163            ->where('f_file', '=', $place->tree()->id())
164            ->where('pl_p_id', '=', $place->id())
165            ->select(['families.*'])
166            ->get()
167            ->each($this->rowLimiter())
168            ->map($this->familyRowMapper())
169            ->filter(GedcomRecord::accessFilter());
170    }
171
172    /**
173     * @param array<Tree>   $trees
174     * @param array<string> $search
175     *
176     * @return Collection<int,Individual>
177     */
178    public function searchIndividuals(array $trees, array $search): Collection
179    {
180        $query = DB::table('individuals');
181
182        $this->whereTrees($query, 'i_file', $trees);
183        $this->whereSearch($query, 'i_gedcom', $search);
184
185        return $query
186            ->get()
187            ->each($this->rowLimiter())
188            ->map($this->individualRowMapper())
189            ->filter(GedcomRecord::accessFilter())
190            ->filter($this->rawGedcomFilter($search));
191    }
192
193    /**
194     * Search for individuals by name.
195     *
196     * @param array<Tree>   $trees
197     * @param array<string> $search
198     * @param int           $offset
199     * @param int           $limit
200     *
201     * @return Collection<int,Individual>
202     */
203    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
204    {
205        $query = DB::table('individuals')
206            ->join('name', static function (JoinClause $join): void {
207                $join
208                    ->on('name.n_file', '=', 'individuals.i_file')
209                    ->on('name.n_id', '=', 'individuals.i_id');
210            })
211            ->orderBy('n_sort')
212            ->select(['individuals.*', 'n_sort']);
213
214        $this->whereTrees($query, 'i_file', $trees);
215        $this->whereSearch($query, 'n_full', $search);
216
217        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
218    }
219
220    /**
221     * @param Place $place
222     *
223     * @return Collection<int,Individual>
224     */
225    public function searchIndividualsInPlace(Place $place): Collection
226    {
227        return DB::table('individuals')
228            ->join('placelinks', static function (JoinClause $join) {
229                $join
230                    ->on('i_file', '=', 'pl_file')
231                    ->on('i_id', '=', 'pl_gid');
232            })
233            ->where('i_file', '=', $place->tree()->id())
234            ->where('pl_p_id', '=', $place->id())
235            ->select(['individuals.*'])
236            ->get()
237            ->each($this->rowLimiter())
238            ->map($this->individualRowMapper())
239            ->filter(GedcomRecord::accessFilter());
240    }
241
242    /**
243     * Search for submissions.
244     *
245     * @param array<Tree>   $trees
246     * @param array<string> $search
247     * @param int           $offset
248     * @param int           $limit
249     *
250     * @return Collection<int,Location>
251     */
252    public function searchLocations(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
253    {
254        $query = DB::table('other')
255            ->where('o_type', '=', Location::RECORD_TYPE);
256
257        $this->whereTrees($query, 'o_file', $trees);
258        $this->whereSearch($query, 'o_gedcom', $search);
259
260        return $this->paginateQuery($query, $this->locationRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
261    }
262
263    /**
264     * Search for media objects.
265     *
266     * @param array<Tree>   $trees
267     * @param array<string> $search
268     * @param int           $offset
269     * @param int           $limit
270     *
271     * @return Collection<int,Media>
272     */
273    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
274    {
275        $query = DB::table('media');
276
277        $this->whereTrees($query, 'media.m_file', $trees);
278        $this->whereSearch($query, 'm_gedcom', $search);
279
280        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
281    }
282
283    /**
284     * Search for notes.
285     *
286     * @param array<Tree>   $trees
287     * @param array<string> $search
288     * @param int           $offset
289     * @param int           $limit
290     *
291     * @return Collection<int,Note>
292     */
293    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
294    {
295        $query = DB::table('other')
296            ->where('o_type', '=', Note::RECORD_TYPE);
297
298        $this->whereTrees($query, 'o_file', $trees);
299        $this->whereSearch($query, 'o_gedcom', $search);
300
301        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
302    }
303
304    /**
305     * Search for notes.
306     *
307     * @param array<Tree>   $trees
308     * @param array<string> $search
309     * @param int           $offset
310     * @param int           $limit
311     *
312     * @return Collection<int,SharedNote>
313     */
314    public function searchSharedNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
315    {
316        $query = DB::table('other')
317            ->where('o_type', '=', SharedNote::RECORD_TYPE);
318
319        $this->whereTrees($query, 'o_file', $trees);
320        $this->whereSearch($query, 'o_gedcom', $search);
321
322        return $this->paginateQuery($query, $this->sharedNoteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
323    }
324
325    /**
326     * Search for repositories.
327     *
328     * @param array<Tree>   $trees
329     * @param array<string> $search
330     * @param int           $offset
331     * @param int           $limit
332     *
333     * @return Collection<int,Repository>
334     */
335    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
336    {
337        $query = DB::table('other')
338            ->where('o_type', '=', Repository::RECORD_TYPE);
339
340        $this->whereTrees($query, 'o_file', $trees);
341        $this->whereSearch($query, 'o_gedcom', $search);
342
343        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
344    }
345
346    /**
347     * Search for sources.
348     *
349     * @param array<Tree>   $trees
350     * @param array<string> $search
351     * @param int      $offset
352     * @param int      $limit
353     *
354     * @return Collection<int,Source>
355     */
356    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
357    {
358        $query = DB::table('sources');
359
360        $this->whereTrees($query, 's_file', $trees);
361        $this->whereSearch($query, 's_gedcom', $search);
362
363        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
364    }
365
366    /**
367     * Search for sources by name.
368     *
369     * @param array<Tree>   $trees
370     * @param array<string> $search
371     * @param int           $offset
372     * @param int           $limit
373     *
374     * @return Collection<int,Source>
375     */
376    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
377    {
378        $query = DB::table('sources')
379            ->orderBy('s_name');
380
381        $this->whereTrees($query, 's_file', $trees);
382        $this->whereSearch($query, 's_name', $search);
383
384        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
385    }
386
387    /**
388     * Search for sources.
389     *
390     * @param array<Tree>   $trees
391     * @param array<string> $search
392     * @param int           $offset
393     * @param int           $limit
394     *
395     * @return Collection<int,string>
396     */
397    public function searchSurnames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
398    {
399        $query = DB::table('name');
400
401        $this->whereTrees($query, 'n_file', $trees);
402        $this->whereSearch($query, 'n_surname', $search);
403
404        return $query
405            ->groupBy(['n_surname'])
406            ->orderBy('n_surname')
407            ->skip($offset)
408            ->take($limit)
409            ->pluck('n_surname');
410    }
411
412    /**
413     * Search for submissions.
414     *
415     * @param array<Tree>   $trees
416     * @param array<string> $search
417     * @param int           $offset
418     * @param int           $limit
419     *
420     * @return Collection<int,Submission>
421     */
422    public function searchSubmissions(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
423    {
424        $query = DB::table('other')
425            ->where('o_type', '=', Submission::RECORD_TYPE);
426
427        $this->whereTrees($query, 'o_file', $trees);
428        $this->whereSearch($query, 'o_gedcom', $search);
429
430        return $this->paginateQuery($query, $this->submissionRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
431    }
432
433    /**
434     * Search for submitters.
435     *
436     * @param array<Tree>   $trees
437     * @param array<string> $search
438     * @param int           $offset
439     * @param int           $limit
440     *
441     * @return Collection<int,Submitter>
442     */
443    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
444    {
445        $query = DB::table('other')
446            ->where('o_type', '=', Submitter::RECORD_TYPE);
447
448        $this->whereTrees($query, 'o_file', $trees);
449        $this->whereSearch($query, 'o_gedcom', $search);
450
451        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
452    }
453
454    /**
455     * Search for places.
456     *
457     * @param Tree   $tree
458     * @param string $search
459     * @param int    $offset
460     * @param int    $limit
461     *
462     * @return Collection<int,Place>
463     */
464    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
465    {
466        $query = DB::table('places AS p0')
467            ->where('p0.p_file', '=', $tree->id())
468            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
469            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
470            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
471            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
472            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
473            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
474            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
475            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
476            ->orderBy('p0.p_place')
477            ->orderBy('p1.p_place')
478            ->orderBy('p2.p_place')
479            ->orderBy('p3.p_place')
480            ->orderBy('p4.p_place')
481            ->orderBy('p5.p_place')
482            ->orderBy('p6.p_place')
483            ->orderBy('p7.p_place')
484            ->orderBy('p8.p_place')
485            ->select([
486                'p0.p_place AS place0',
487                'p1.p_place AS place1',
488                'p2.p_place AS place2',
489                'p3.p_place AS place3',
490                'p4.p_place AS place4',
491                'p5.p_place AS place5',
492                'p6.p_place AS place6',
493                'p7.p_place AS place7',
494                'p8.p_place AS place8',
495            ]);
496
497        // Filter each level of the hierarchy.
498        foreach (explode(',', $search, 9) as $level => $string) {
499            $query->where('p' . $level . '.p_place', $this->like_operator, '%' . addcslashes($string, '\\%_') . '%');
500        }
501
502        $row_mapper = static function (object $row) use ($tree): Place {
503            $place = implode(', ', array_filter((array) $row));
504
505            return new Place($place, $tree);
506        };
507
508        $filter = static function (): bool {
509            return true;
510        };
511
512        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
513    }
514
515    /**
516     * @param array<Tree>          $trees
517     * @param array<string,string> $fields
518     * @param array<string,string> $modifiers
519     *
520     * @return Collection<int,Individual>
521     */
522    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
523    {
524        $fields = array_filter($fields, static fn (string $x): bool => $x !== '');
525
526        $query = DB::table('individuals')
527            ->select(['individuals.*'])
528            ->distinct();
529
530        $this->whereTrees($query, 'i_file', $trees);
531
532        // Join the following tables
533        $father_name   = false;
534        $mother_name   = false;
535        $spouse_family = false;
536        $indi_name     = false;
537        $indi_dates    = [];
538        $fam_dates     = [];
539        $indi_plac     = false;
540        $fam_plac      = false;
541
542        foreach ($fields as $field_name => $field_value) {
543            if (str_starts_with($field_name, 'FATHER:NAME')) {
544                $father_name = true;
545            } elseif (str_starts_with($field_name, 'MOTHER:NAME')) {
546                $mother_name = true;
547            } elseif (str_starts_with($field_name, 'INDI:NAME:GIVN')) {
548                $indi_name = true;
549            } elseif (str_starts_with($field_name, 'INDI:NAME:SURN')) {
550                $indi_name = true;
551            } elseif (str_starts_with($field_name, 'FAM:')) {
552                $spouse_family = true;
553                if (str_ends_with($field_name, ':DATE')) {
554                    $fam_dates[] = explode(':', $field_name)[1];
555                } elseif (str_ends_with($field_name, ':PLAC')) {
556                    $fam_plac = true;
557                }
558            } elseif (str_starts_with($field_name, 'INDI:')) {
559                if (str_ends_with($field_name, ':DATE')) {
560                    $indi_dates[] = explode(':', $field_name)[1];
561                } elseif (str_ends_with($field_name, ':PLAC')) {
562                    $indi_plac = true;
563                }
564            }
565        }
566
567        if ($father_name || $mother_name) {
568            $query->join('link AS l1', static function (JoinClause $join): void {
569                $join
570                    ->on('l1.l_file', '=', 'individuals.i_file')
571                    ->on('l1.l_from', '=', 'individuals.i_id')
572                    ->where('l1.l_type', '=', 'FAMC');
573            });
574
575            if ($father_name) {
576                $query->join('link AS l2', static function (JoinClause $join): void {
577                    $join
578                        ->on('l2.l_file', '=', 'l1.l_file')
579                        ->on('l2.l_from', '=', 'l1.l_to')
580                        ->where('l2.l_type', '=', 'HUSB');
581                });
582                $query->join('name AS father_name', static function (JoinClause $join): void {
583                    $join
584                        ->on('father_name.n_file', '=', 'l2.l_file')
585                        ->on('father_name.n_id', '=', 'l2.l_to');
586                });
587            }
588
589            if ($mother_name) {
590                $query->join('link AS l3', static function (JoinClause $join): void {
591                    $join
592                        ->on('l3.l_file', '=', 'l1.l_file')
593                        ->on('l3.l_from', '=', 'l1.l_to')
594                        ->where('l3.l_type', '=', 'WIFE');
595                });
596                $query->join('name AS mother_name', static function (JoinClause $join): void {
597                    $join
598                        ->on('mother_name.n_file', '=', 'l3.l_file')
599                        ->on('mother_name.n_id', '=', 'l3.l_to');
600                });
601            }
602        }
603
604        if ($spouse_family) {
605            $query->join('link AS l4', static function (JoinClause $join): void {
606                $join
607                    ->on('l4.l_file', '=', 'individuals.i_file')
608                    ->on('l4.l_from', '=', 'individuals.i_id')
609                    ->where('l4.l_type', '=', 'FAMS');
610            });
611            $query->join('families AS spouse_families', static function (JoinClause $join): void {
612                $join
613                    ->on('spouse_families.f_file', '=', 'l4.l_file')
614                    ->on('spouse_families.f_id', '=', 'l4.l_to');
615            });
616        }
617
618        if ($indi_name) {
619            $query->join('name AS individual_name', static function (JoinClause $join): void {
620                $join
621                    ->on('individual_name.n_file', '=', 'individuals.i_file')
622                    ->on('individual_name.n_id', '=', 'individuals.i_id');
623            });
624        }
625
626        foreach (array_unique($indi_dates) as $indi_date) {
627            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
628                $join
629                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
630                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
631            });
632        }
633
634        foreach (array_unique($fam_dates) as $fam_date) {
635            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
636                $join
637                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
638                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
639            });
640        }
641
642        if ($indi_plac) {
643            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
644                $join
645                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
646                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
647            });
648            $query->join('places AS individual_places', static function (JoinClause $join): void {
649                $join
650                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
651                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
652            });
653        }
654
655        if ($fam_plac) {
656            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
657                $join
658                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
659                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
660            });
661            $query->join('places AS family_places', static function (JoinClause $join): void {
662                $join
663                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
664                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
665            });
666        }
667
668        foreach ($fields as $field_name => $field_value) {
669            $parts = explode(':', $field_name . ':::');
670            if (str_starts_with($field_name, 'INDI:NAME:')) {
671                switch ($field_name) {
672                    case 'INDI:NAME:GIVN':
673                        switch ($modifiers[$field_name]) {
674                            case 'EXACT':
675                                $query->where('individual_name.n_givn', '=', $field_value);
676                                break;
677                            case 'BEGINS':
678                                $query->where('individual_name.n_givn', $this->like_operator, $field_value . '%');
679                                break;
680                            case 'CONTAINS':
681                                $query->where('individual_name.n_givn', $this->like_operator, '%' . $field_value . '%');
682                                break;
683                            case 'SDX_STD':
684                                $sdx = Soundex::russell($field_value);
685                                if ($sdx !== '') {
686                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
687                                } else {
688                                    // No phonetic content? Use a substring match
689                                    $query->where('individual_name.n_givn', $this->like_operator, '%' . $field_value . '%');
690                                }
691                                break;
692                            case 'SDX': // SDX uses DM by default.
693                            case 'SDX_DM':
694                                $sdx = Soundex::daitchMokotoff($field_value);
695                                if ($sdx !== '') {
696                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
697                                } else {
698                                    // No phonetic content? Use a substring match
699                                    $query->where('individual_name.n_givn', $this->like_operator, '%' . $field_value . '%');
700                                }
701                                break;
702                        }
703                        unset($fields[$field_name]);
704                        break;
705                    case 'INDI:NAME:SURN':
706                        switch ($modifiers[$field_name]) {
707                            case 'EXACT':
708                                $query->where(function (Builder $query) use ($field_value): void {
709                                    $query
710                                        ->where('individual_name.n_surn', '=', $field_value)
711                                        ->orWhere('individual_name.n_surname', '=', $field_value);
712                                });
713                                break;
714                            case 'BEGINS':
715                                $query->where(function (Builder $query) use ($field_value): void {
716                                    $query
717                                        ->where('individual_name.n_surn', $this->like_operator, $field_value . '%')
718                                        ->orWhere('individual_name.n_surname', $this->like_operator, $field_value . '%');
719                                });
720                                break;
721                            case 'CONTAINS':
722                                $query->where(function (Builder $query) use ($field_value): void {
723                                    $query
724                                        ->where('individual_name.n_surn', $this->like_operator, '%' . $field_value . '%')
725                                        ->orWhere('individual_name.n_surname', $this->like_operator, '%' . $field_value . '%');
726                                });
727                                break;
728                            case 'SDX_STD':
729                                $sdx = Soundex::russell($field_value);
730                                if ($sdx !== '') {
731                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
732                                } else {
733                                    // No phonetic content? Use a substring match
734                                    $query->where(function (Builder $query) use ($field_value): void {
735                                        $query
736                                            ->where('individual_name.n_surn', $this->like_operator, '%' . $field_value . '%')
737                                            ->orWhere('individual_name.n_surname', $this->like_operator, '%' . $field_value . '%');
738                                    });
739                                }
740                                break;
741                            case 'SDX': // SDX uses DM by default.
742                            case 'SDX_DM':
743                                $sdx = Soundex::daitchMokotoff($field_value);
744                                if ($sdx !== '') {
745                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
746                                } else {
747                                    // No phonetic content? Use a substring match
748                                    $query->where(function (Builder $query) use ($field_value): void {
749                                        $query
750                                            ->where('individual_name.n_surn', $this->like_operator, '%' . $field_value . '%')
751                                            ->orWhere('individual_name.n_surname', $this->like_operator, '%' . $field_value . '%');
752                                    });
753                                }
754                                break;
755                        }
756                        unset($fields[$field_name]);
757                        break;
758                    case 'INDI:NAME:NICK':
759                    case 'INDI:NAME:_MARNM':
760                    case 'INDI:NAME:_HEB':
761                    case 'INDI:NAME:_AKA':
762                        $like = "%\n1 NAME%\n2 " . $parts[2] . ' %' . preg_quote($field_value, '/') . '%';
763                        $query->where('individuals.i_gedcom', $this->like_operator, $like);
764                        break;
765                }
766            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':DATE')) {
767                $date = new Date($field_value);
768                if ($date->isOK()) {
769                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
770                    $query
771                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
772                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
773                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
774                }
775                unset($fields[$field_name]);
776            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':DATE')) {
777                $date = new Date($field_value);
778                if ($date->isOK()) {
779                    $delta = 365 * (int) ($modifiers[$field_name] ?? 0);
780                    $query
781                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
782                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
783                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
784                }
785                unset($fields[$field_name]);
786            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
787                // SQL can only link a place to a person/family, not to an event.
788                $query->where('individual_places.p_place', $this->like_operator, '%' . $field_value . '%');
789            } elseif (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
790                // SQL can only link a place to a person/family, not to an event.
791                $query->where('family_places.p_place', $this->like_operator, '%' . $field_value . '%');
792            } elseif (str_starts_with($field_name, 'MOTHER:NAME:') || str_starts_with($field_name, 'FATHER:NAME:')) {
793                $table = str_starts_with($field_name, 'FATHER:NAME:') ? 'father_name' : 'mother_name';
794                switch ($parts[2]) {
795                    case 'GIVN':
796                        switch ($modifiers[$field_name]) {
797                            case 'EXACT':
798                                $query->where($table . '.n_givn', '=', $field_value);
799                                break;
800                            case 'BEGINS':
801                                $query->where($table . '.n_givn', $this->like_operator, $field_value . '%');
802                                break;
803                            case 'CONTAINS':
804                                $query->where($table . '.n_givn', $this->like_operator, '%' . $field_value . '%');
805                                break;
806                            case 'SDX_STD':
807                                $sdx = Soundex::russell($field_value);
808                                if ($sdx !== '') {
809                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
810                                } else {
811                                    // No phonetic content? Use a substring match
812                                    $query->where($table . '.n_givn', $this->like_operator, '%' . $field_value . '%');
813                                }
814                                break;
815                            case 'SDX': // SDX uses DM by default.
816                            case 'SDX_DM':
817                                $sdx = Soundex::daitchMokotoff($field_value);
818                                if ($sdx !== '') {
819                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
820                                } else {
821                                    // No phonetic content? Use a substring match
822                                    $query->where($table . '.n_givn', $this->like_operator, '%' . $field_value . '%');
823                                }
824                                break;
825                        }
826                        break;
827                    case 'SURN':
828                        switch ($modifiers[$field_name]) {
829                            case 'EXACT':
830                                $query->where($table . '.n_surn', '=', $field_value);
831                                break;
832                            case 'BEGINS':
833                                $query->where($table . '.n_surn', $this->like_operator, $field_value . '%');
834                                break;
835                            case 'CONTAINS':
836                                $query->where($table . '.n_surn', $this->like_operator, '%' . $field_value . '%');
837                                break;
838                            case 'SDX_STD':
839                                $sdx = Soundex::russell($field_value);
840                                if ($sdx !== '') {
841                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
842                                } else {
843                                    // No phonetic content? Use a substring match
844                                    $query->where($table . '.n_surn', $this->like_operator, '%' . $field_value . '%');
845                                }
846                                break;
847                            case 'SDX': // SDX uses DM by default.
848                            case 'SDX_DM':
849                                $sdx = Soundex::daitchMokotoff($field_value);
850                                if ($sdx !== '') {
851                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
852                                } else {
853                                    // No phonetic content? Use a substring match
854                                    $query->where($table . '.n_surn', $this->like_operator, '%' . $field_value . '%');
855                                }
856                                break;
857                        }
858                        break;
859                }
860                unset($fields[$field_name]);
861            } elseif (str_starts_with($field_name, 'FAM:')) {
862                // e.g. searches for occupation, religion, note, etc.
863                // Initial matching only.  Need PHP to apply filter.
864                $query->where('spouse_families.f_gedcom', $this->like_operator, "%\n1 " . $parts[1] . ' %' . $field_value . '%');
865            } elseif (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':TYPE')) {
866                // Initial matching only.  Need PHP to apply filter.
867                $query->where('individuals.i_gedcom', $this->like_operator, "%\n1 " . $parts[1] . "%\n2 TYPE %" . $field_value . '%');
868            } elseif (str_starts_with($field_name, 'INDI:')) {
869                // e.g. searches for occupation, religion, note, etc.
870                // Initial matching only.  Need PHP to apply filter.
871                $query->where('individuals.i_gedcom', $this->like_operator, "%\n1 " . $parts[1] . '%' . $parts[2] . '%' . $field_value . '%');
872            }
873        }
874
875        return $query
876            ->get()
877            ->each($this->rowLimiter())
878            ->map($this->individualRowMapper())
879            ->filter(GedcomRecord::accessFilter())
880            ->filter(static function (Individual $individual) use ($fields): bool {
881                // Check for searches which were only partially matched by SQL
882                foreach ($fields as $field_name => $field_value) {
883                    $parts = explode(':', $field_name . '::::');
884
885                    if (str_starts_with($field_name, 'INDI:NAME:') && $field_name !== 'INDI:NAME:GIVN' && $field_name !== 'INDI:NAME:SURN') {
886                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[2] . ' .*' . preg_quote($field_value, '/') . '/i';
887
888                        if (preg_match($regex, $individual->gedcom()) === 1) {
889                            continue;
890                        }
891
892                        return false;
893                    }
894
895                    $regex = '/' . preg_quote($field_value, '/') . '/i';
896
897                    if (str_starts_with($field_name, 'INDI:') && str_ends_with($field_name, ':PLAC')) {
898                        foreach ($individual->facts([$parts[1]]) as $fact) {
899                            if (preg_match($regex, $fact->place()->gedcomName()) === 1) {
900                                continue 2;
901                            }
902                        }
903                        return false;
904                    }
905
906                    if (str_starts_with($field_name, 'FAM:') && str_ends_with($field_name, ':PLAC')) {
907                        foreach ($individual->spouseFamilies() as $family) {
908                            foreach ($family->facts([$parts[1]]) as $fact) {
909                                if (preg_match($regex, $fact->place()->gedcomName()) === 1) {
910                                    continue 3;
911                                }
912                            }
913                        }
914                        return false;
915                    }
916
917                    if ($field_name === 'INDI:FACT:TYPE' || $field_name === 'INDI:EVEN:TYPE' || $field_name === 'INDI:CHAN:_WT_USER') {
918                        foreach ($individual->facts([$parts[1]]) as $fact) {
919                            if (preg_match($regex, $fact->attribute($parts[2])) === 1) {
920                                continue 2;
921                            }
922                        }
923
924                        return false;
925                    }
926
927                    if (str_starts_with($field_name, 'INDI:')) {
928                        foreach ($individual->facts([$parts[1]]) as $fact) {
929                            if (preg_match($regex, $fact->value()) === 1) {
930                                continue 2;
931                            }
932                        }
933
934                        return false;
935                    }
936
937                    if (str_starts_with($field_name, 'FAM:')) {
938                        foreach ($individual->spouseFamilies() as $family) {
939                            foreach ($family->facts([$parts[1]]) as $fact) {
940                                if (preg_match($regex, $fact->value()) === 1) {
941                                    continue 3;
942                                }
943                            }
944                        }
945                        return false;
946                    }
947                }
948
949                return true;
950            });
951    }
952
953    /**
954     * @param string      $soundex
955     * @param string      $lastname
956     * @param string      $firstname
957     * @param string      $place
958     * @param array<Tree> $search_trees
959     *
960     * @return Collection<int,Individual>
961     */
962    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
963    {
964        switch ($soundex) {
965            default:
966            case 'Russell':
967                $givn_sdx   = Soundex::russell($firstname);
968                $surn_sdx   = Soundex::russell($lastname);
969                $plac_sdx   = Soundex::russell($place);
970                $givn_field = 'n_soundex_givn_std';
971                $surn_field = 'n_soundex_surn_std';
972                $plac_field = 'p_std_soundex';
973                break;
974            case 'DaitchM':
975                $givn_sdx   = Soundex::daitchMokotoff($firstname);
976                $surn_sdx   = Soundex::daitchMokotoff($lastname);
977                $plac_sdx   = Soundex::daitchMokotoff($place);
978                $givn_field = 'n_soundex_givn_dm';
979                $surn_field = 'n_soundex_surn_dm';
980                $plac_field = 'p_dm_soundex';
981                break;
982        }
983
984        // Nothing to search for? Return nothing.
985        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
986            return new Collection();
987        }
988
989        $query = DB::table('individuals')
990            ->select(['individuals.*'])
991            ->distinct();
992
993        $this->whereTrees($query, 'i_file', $search_trees);
994
995        if ($plac_sdx !== '') {
996            $query->join('placelinks', static function (JoinClause $join): void {
997                $join
998                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
999                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
1000            });
1001            $query->join('places', static function (JoinClause $join): void {
1002                $join
1003                    ->on('places.p_file', '=', 'placelinks.pl_file')
1004                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
1005            });
1006
1007            $this->wherePhonetic($query, $plac_field, $plac_sdx);
1008        }
1009
1010        if ($givn_sdx !== '' || $surn_sdx !== '') {
1011            $query->join('name', static function (JoinClause $join): void {
1012                $join
1013                    ->on('name.n_file', '=', 'individuals.i_file')
1014                    ->on('name.n_id', '=', 'individuals.i_id');
1015            });
1016
1017            $this->wherePhonetic($query, $givn_field, $givn_sdx);
1018            $this->wherePhonetic($query, $surn_field, $surn_sdx);
1019        }
1020
1021        return $query
1022            ->get()
1023            ->each($this->rowLimiter())
1024            ->map($this->individualRowMapper())
1025            ->filter(GedcomRecord::accessFilter());
1026    }
1027
1028    /**
1029     * Paginate a search query.
1030     *
1031     * @param Builder $query      Searches the database for the desired records.
1032     * @param Closure $row_mapper Converts a row from the query into a record.
1033     * @param Closure $row_filter
1034     * @param int     $offset     Skip this many rows.
1035     * @param int     $limit      Take this many rows.
1036     *
1037     * @return Collection<int,mixed>
1038     */
1039    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
1040    {
1041        $collection = new Collection();
1042
1043        foreach ($query->cursor() as $row) {
1044            $record = $row_mapper($row);
1045            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
1046            // where individuals have multiple names - and we need to sort results by name.
1047            if ($collection->containsStrict($record)) {
1048                continue;
1049            }
1050            // If the object has a method "canShow()", then use it to filter for privacy.
1051            if ($row_filter($record)) {
1052                if ($offset > 0) {
1053                    $offset--;
1054                } else {
1055                    if ($limit > 0) {
1056                        $collection->push($record);
1057                    }
1058
1059                    $limit--;
1060
1061                    if ($limit === 0) {
1062                        break;
1063                    }
1064                }
1065            }
1066        }
1067
1068
1069        return $collection;
1070    }
1071
1072    /**
1073     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
1074     *
1075     * @param Builder           $query
1076     * @param Expression|string $column
1077     * @param array<string>     $search_terms
1078     */
1079    private function whereSearch(Builder $query, Expression|string $column, array $search_terms): void
1080    {
1081        foreach ($search_terms as $search_term) {
1082            $query->where($column, $this->like_operator, '%' . addcslashes($search_term, '\\%_') . '%');
1083        }
1084    }
1085
1086    /**
1087     * Apply soundex search filters to a SQL query column.
1088     *
1089     * @param Builder           $query
1090     * @param Expression|string $field
1091     * @param string            $soundex
1092     */
1093    private function wherePhonetic(Builder $query, $field, string $soundex): void
1094    {
1095        if ($soundex !== '') {
1096            $query->where(static function (Builder $query) use ($soundex, $field): void {
1097                foreach (explode(':', $soundex) as $sdx) {
1098                    $query->orWhere($field, $this->like_operator, '%' . $sdx . '%');
1099                }
1100            });
1101        }
1102    }
1103
1104    /**
1105     * @param Builder     $query
1106     * @param string      $tree_id_field
1107     * @param array<Tree> $trees
1108     */
1109    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1110    {
1111        $tree_ids = array_map(static function (Tree $tree): int {
1112            return $tree->id();
1113        }, $trees);
1114
1115        $query->whereIn($tree_id_field, $tree_ids);
1116    }
1117
1118    /**
1119     * Find the media object that uses a particular media file.
1120     *
1121     * @param string $file
1122     *
1123     * @return array<Media>
1124     */
1125    public function findMediaObjectsForMediaFile(string $file): array
1126    {
1127        return DB::table('media')
1128            ->join('media_file', static function (JoinClause $join): void {
1129                $join
1130                    ->on('media_file.m_file', '=', 'media.m_file')
1131                    ->on('media_file.m_id', '=', 'media.m_id');
1132            })
1133            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1134            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1135            ->select(['media.*'])
1136            ->distinct()
1137            ->get()
1138            ->map($this->mediaRowMapper())
1139            ->all();
1140    }
1141
1142    /**
1143     * A closure to filter records by privacy-filtered GEDCOM data.
1144     *
1145     * @param array<string> $search_terms
1146     *
1147     * @return Closure(GedcomRecord):bool
1148     */
1149    private function rawGedcomFilter(array $search_terms): Closure
1150    {
1151        return static function (GedcomRecord $record) use ($search_terms): bool {
1152            // Ignore non-genealogy fields
1153            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1154
1155            // Ignore matches in links
1156            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1157
1158            // Re-apply the filtering
1159            foreach ($search_terms as $search_term) {
1160                if (mb_stripos($gedcom, $search_term) === false) {
1161                    return false;
1162                }
1163            }
1164
1165            return true;
1166        };
1167    }
1168
1169    /**
1170     * Searching for short or common text can give more results than the system can process.
1171     *
1172     * @param int $limit
1173     *
1174     * @return Closure():void
1175     */
1176    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1177    {
1178        return static function () use ($limit): void {
1179            static $n = 0;
1180
1181            if (++$n > $limit) {
1182                $message = I18N::translate('The search returned too many results.');
1183
1184                throw new HttpServiceUnavailableException($message);
1185            }
1186        };
1187    }
1188
1189    /**
1190     * Convert a row from any tree in the families table into a family object.
1191     *
1192     * @return Closure(object):Family
1193     */
1194    private function familyRowMapper(): Closure
1195    {
1196        return function (object $row): Family {
1197            $tree = $this->tree_service->find((int) $row->f_file);
1198
1199            return Registry::familyFactory()->mapper($tree)($row);
1200        };
1201    }
1202
1203    /**
1204     * Convert a row from any tree in the individuals table into an individual object.
1205     *
1206     * @return Closure(object):Individual
1207     */
1208    private function individualRowMapper(): Closure
1209    {
1210        return function (object $row): Individual {
1211            $tree = $this->tree_service->find((int) $row->i_file);
1212
1213            return Registry::individualFactory()->mapper($tree)($row);
1214        };
1215    }
1216
1217    /**
1218     * Convert a row from any tree in the media table into a location object.
1219     *
1220     * @return Closure(object):Location
1221     */
1222    private function locationRowMapper(): Closure
1223    {
1224        return function (object $row): Location {
1225            $tree = $this->tree_service->find((int) $row->o_file);
1226
1227            return Registry::locationFactory()->mapper($tree)($row);
1228        };
1229    }
1230
1231    /**
1232     * Convert a row from any tree in the media table into an media object.
1233     *
1234     * @return Closure(object):Media
1235     */
1236    private function mediaRowMapper(): Closure
1237    {
1238        return function (object $row): Media {
1239            $tree = $this->tree_service->find((int) $row->m_file);
1240
1241            return Registry::mediaFactory()->mapper($tree)($row);
1242        };
1243    }
1244
1245    /**
1246     * Convert a row from any tree in the other table into a note object.
1247     *
1248     * @return Closure:Note
1249     */
1250    private function noteRowMapper(): Closure
1251    {
1252        return function (object $row): Note {
1253            $tree = $this->tree_service->find((int) $row->o_file);
1254
1255            return Registry::noteFactory()->mapper($tree)($row);
1256        };
1257    }
1258
1259    /**
1260     * Convert a row from any tree in the other table into a repository object.
1261     *
1262     * @return Closure:Repository
1263     */
1264    private function repositoryRowMapper(): Closure
1265    {
1266        return function (object $row): Repository {
1267            $tree = $this->tree_service->find((int) $row->o_file);
1268
1269            return Registry::repositoryFactory()->mapper($tree)($row);
1270        };
1271    }
1272
1273    /**
1274     * Convert a row from any tree in the other table into a note object.
1275     *
1276     * @return Closure(object):SharedNote
1277     */
1278    private function sharedNoteRowMapper(): Closure
1279    {
1280        return function (object $row): Note {
1281            $tree = $this->tree_service->find((int) $row->o_file);
1282
1283            return Registry::sharedNoteFactory()->mapper($tree)($row);
1284        };
1285    }
1286
1287    /**
1288     * Convert a row from any tree in the sources table into a source object.
1289     *
1290     * @return Closure:Source
1291     */
1292    private function sourceRowMapper(): Closure
1293    {
1294        return function (object $row): Source {
1295            $tree = $this->tree_service->find((int) $row->s_file);
1296
1297            return Registry::sourceFactory()->mapper($tree)($row);
1298        };
1299    }
1300
1301    /**
1302     * Convert a row from any tree in the other table into a submission object.
1303     *
1304     * @return Closure(object):Submission
1305     */
1306    private function submissionRowMapper(): Closure
1307    {
1308        return function (object $row): Submission {
1309            $tree = $this->tree_service->find((int) $row->o_file);
1310
1311            return Registry::submissionFactory()->mapper($tree)($row);
1312        };
1313    }
1314
1315    /**
1316     * Convert a row from any tree in the other table into a submitter object.
1317     *
1318     * @return Closure(object):Submitter
1319     */
1320    private function submitterRowMapper(): Closure
1321    {
1322        return function (object $row): Submitter {
1323            $tree = $this->tree_service->find((int) $row->o_file);
1324
1325            return Registry::submitterFactory()->mapper($tree)($row);
1326        };
1327    }
1328}
1329