xref: /webtrees/app/Services/SearchService.php (revision 8155639ada601d420a9fdb23f244fc0be2786250)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Exceptions\HttpServiceUnavailableException;
25use Fisharebest\Webtrees\Location;
26use Fisharebest\Webtrees\Registry;
27use Fisharebest\Webtrees\Family;
28use Fisharebest\Webtrees\Gedcom;
29use Fisharebest\Webtrees\GedcomRecord;
30use Fisharebest\Webtrees\I18N;
31use Fisharebest\Webtrees\Individual;
32use Fisharebest\Webtrees\Media;
33use Fisharebest\Webtrees\Note;
34use Fisharebest\Webtrees\Place;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\Soundex;
37use Fisharebest\Webtrees\Source;
38use Fisharebest\Webtrees\Submission;
39use Fisharebest\Webtrees\Submitter;
40use Fisharebest\Webtrees\Tree;
41use Illuminate\Database\Capsule\Manager as DB;
42use Illuminate\Database\Query\Builder;
43use Illuminate\Database\Query\Expression;
44use Illuminate\Database\Query\JoinClause;
45use Illuminate\Support\Collection;
46use stdClass;
47
48use function addcslashes;
49use function array_filter;
50use function array_map;
51use function array_unique;
52use function explode;
53use function implode;
54use function mb_stripos;
55use function preg_match;
56use function preg_quote;
57use function preg_replace;
58
59use const PHP_INT_MAX;
60
61/**
62 * Search trees for genealogy records.
63 */
64class SearchService
65{
66    // Do not attempt to show search results larger than this/
67    protected const MAX_SEARCH_RESULTS = 5000;
68
69    /** @var TreeService */
70    private $tree_service;
71
72    /**
73     * SearchService constructor.
74     *
75     * @param TreeService $tree_service
76     */
77    public function __construct(
78        TreeService $tree_service
79    ) {
80        $this->tree_service = $tree_service;
81    }
82
83    /**
84     * @param Tree[]   $trees
85     * @param string[] $search
86     *
87     * @return Collection<Family>
88     */
89    public function searchFamilies(array $trees, array $search): Collection
90    {
91        $query = DB::table('families');
92
93        $this->whereTrees($query, 'f_file', $trees);
94        $this->whereSearch($query, 'f_gedcom', $search);
95
96        return $query
97            ->get()
98            ->each($this->rowLimiter())
99            ->map($this->familyRowMapper())
100            ->filter(GedcomRecord::accessFilter())
101            ->filter($this->rawGedcomFilter($search));
102    }
103
104    /**
105     * Search for families by name.
106     *
107     * @param Tree[]   $trees
108     * @param string[] $search
109     * @param int      $offset
110     * @param int      $limit
111     *
112     * @return Collection<Family>
113     */
114    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
115    {
116        $query = DB::table('families')
117            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
118                $join
119                    ->on('husb_name.n_file', '=', 'families.f_file')
120                    ->on('husb_name.n_id', '=', 'families.f_husb')
121                    ->where('husb_name.n_type', '<>', '_MARNM');
122            })
123            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
124                $join
125                    ->on('wife_name.n_file', '=', 'families.f_file')
126                    ->on('wife_name.n_id', '=', 'families.f_wife')
127                    ->where('wife_name.n_type', '<>', '_MARNM');
128            });
129
130        $prefix = DB::connection()->getTablePrefix();
131        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
132
133        $this->whereTrees($query, 'f_file', $trees);
134        $this->whereSearch($query, $field, $search);
135
136        $query
137            ->orderBy('husb_name.n_sort')
138            ->orderBy('wife_name.n_sort')
139            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
140
141        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
142    }
143
144    /**
145     * @param Place $place
146     *
147     * @return Collection<Family>
148     */
149    public function searchFamiliesInPlace(Place $place): Collection
150    {
151        return DB::table('families')
152            ->join('placelinks', static function (JoinClause $query) {
153                $query
154                    ->on('families.f_file', '=', 'placelinks.pl_file')
155                    ->on('families.f_id', '=', 'placelinks.pl_gid');
156            })
157            ->where('f_file', '=', $place->tree()->id())
158            ->where('pl_p_id', '=', $place->id())
159            ->select(['families.*'])
160            ->get()
161            ->each($this->rowLimiter())
162            ->map($this->familyRowMapper())
163            ->filter(GedcomRecord::accessFilter());
164    }
165
166    /**
167     * @param Tree[]   $trees
168     * @param string[] $search
169     *
170     * @return Collection<Individual>
171     */
172    public function searchIndividuals(array $trees, array $search): Collection
173    {
174        $query = DB::table('individuals');
175
176        $this->whereTrees($query, 'i_file', $trees);
177        $this->whereSearch($query, 'i_gedcom', $search);
178
179        return $query
180            ->get()
181            ->each($this->rowLimiter())
182            ->map($this->individualRowMapper())
183            ->filter(GedcomRecord::accessFilter())
184            ->filter($this->rawGedcomFilter($search));
185    }
186
187    /**
188     * Search for individuals by name.
189     *
190     * @param Tree[]   $trees
191     * @param string[] $search
192     * @param int      $offset
193     * @param int      $limit
194     *
195     * @return Collection<Individual>
196     */
197    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
198    {
199        $query = DB::table('individuals')
200            ->join('name', static function (JoinClause $join): void {
201                $join
202                    ->on('name.n_file', '=', 'individuals.i_file')
203                    ->on('name.n_id', '=', 'individuals.i_id');
204            })
205            ->orderBy('n_sort')
206            ->select(['individuals.*', 'n_sort']);
207
208        $this->whereTrees($query, 'i_file', $trees);
209        $this->whereSearch($query, 'n_full', $search);
210
211        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
212    }
213
214    /**
215     * @param Place $place
216     *
217     * @return Collection<Individual>
218     */
219    public function searchIndividualsInPlace(Place $place): Collection
220    {
221        return DB::table('individuals')
222            ->join('placelinks', static function (JoinClause $join) {
223                $join
224                    ->on('i_file', '=', 'pl_file')
225                    ->on('i_id', '=', 'pl_gid');
226            })
227            ->where('i_file', '=', $place->tree()->id())
228            ->where('pl_p_id', '=', $place->id())
229            ->select(['individuals.*'])
230            ->get()
231            ->each($this->rowLimiter())
232            ->map($this->individualRowMapper())
233            ->filter(GedcomRecord::accessFilter());
234    }
235
236    /**
237     * Search for submissions.
238     *
239     * @param Tree[]   $trees
240     * @param string[] $search
241     * @param int      $offset
242     * @param int      $limit
243     *
244     * @return Collection<Location>
245     */
246    public function searchLocations(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
247    {
248        $query = DB::table('other')
249            ->where('o_type', '=', '_LOC');
250
251        $this->whereTrees($query, 'o_file', $trees);
252        $this->whereSearch($query, 'o_gedcom', $search);
253
254        return $this->paginateQuery($query, $this->locationRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
255    }
256
257    /**
258     * Search for media objects.
259     *
260     * @param Tree[]   $trees
261     * @param string[] $search
262     * @param int      $offset
263     * @param int      $limit
264     *
265     * @return Collection<Media>
266     */
267    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
268    {
269        $query = DB::table('media');
270
271        $this->whereTrees($query, 'media.m_file', $trees);
272        $this->whereSearch($query, 'm_gedcom', $search);
273
274        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
275    }
276
277    /**
278     * Search for notes.
279     *
280     * @param Tree[]   $trees
281     * @param string[] $search
282     * @param int      $offset
283     * @param int      $limit
284     *
285     * @return Collection<Note>
286     */
287    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
288    {
289        $query = DB::table('other')
290            ->where('o_type', '=', 'NOTE');
291
292        $this->whereTrees($query, 'o_file', $trees);
293        $this->whereSearch($query, 'o_gedcom', $search);
294
295        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
296    }
297
298    /**
299     * Search for repositories.
300     *
301     * @param Tree[]   $trees
302     * @param string[] $search
303     * @param int      $offset
304     * @param int      $limit
305     *
306     * @return Collection<Repository>
307     */
308    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
309    {
310        $query = DB::table('other')
311            ->where('o_type', '=', 'REPO');
312
313        $this->whereTrees($query, 'o_file', $trees);
314        $this->whereSearch($query, 'o_gedcom', $search);
315
316        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
317    }
318
319    /**
320     * Search for sources.
321     *
322     * @param Tree[]   $trees
323     * @param string[] $search
324     * @param int      $offset
325     * @param int      $limit
326     *
327     * @return Collection<Source>
328     */
329    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
330    {
331        $query = DB::table('sources');
332
333        $this->whereTrees($query, 's_file', $trees);
334        $this->whereSearch($query, 's_gedcom', $search);
335
336        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
337    }
338
339    /**
340     * Search for sources by name.
341     *
342     * @param Tree[]   $trees
343     * @param string[] $search
344     * @param int      $offset
345     * @param int      $limit
346     *
347     * @return Collection<Source>
348     */
349    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
350    {
351        $query = DB::table('sources')
352            ->orderBy('s_name');
353
354        $this->whereTrees($query, 's_file', $trees);
355        $this->whereSearch($query, 's_name', $search);
356
357        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
358    }
359
360    /**
361     * Search for sources.
362     *
363     * @param Tree[]   $trees
364     * @param string[] $search
365     * @param int      $offset
366     * @param int      $limit
367     *
368     * @return Collection<string>
369     */
370    public function searchSurnames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
371    {
372        $query = DB::table('name');
373
374        $this->whereTrees($query, 'n_file', $trees);
375        $this->whereSearch($query, 'n_surname', $search);
376
377        return $query
378            ->groupBy(['n_surname'])
379            ->orderBy('n_surname')
380            ->skip($offset)
381            ->take($limit)
382            ->pluck('n_surname');
383    }
384
385    /**
386     * Search for submissions.
387     *
388     * @param Tree[]   $trees
389     * @param string[] $search
390     * @param int      $offset
391     * @param int      $limit
392     *
393     * @return Collection<Submission>
394     */
395    public function searchSubmissions(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
396    {
397        $query = DB::table('other')
398            ->where('o_type', '=', 'SUBN');
399
400        $this->whereTrees($query, 'o_file', $trees);
401        $this->whereSearch($query, 'o_gedcom', $search);
402
403        return $this->paginateQuery($query, $this->submissionRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
404    }
405
406    /**
407     * Search for submitters.
408     *
409     * @param Tree[]   $trees
410     * @param string[] $search
411     * @param int      $offset
412     * @param int      $limit
413     *
414     * @return Collection<Submitter>
415     */
416    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
417    {
418        $query = DB::table('other')
419            ->where('o_type', '=', 'SUBM');
420
421        $this->whereTrees($query, 'o_file', $trees);
422        $this->whereSearch($query, 'o_gedcom', $search);
423
424        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
425    }
426
427    /**
428     * Search for places.
429     *
430     * @param Tree   $tree
431     * @param string $search
432     * @param int    $offset
433     * @param int    $limit
434     *
435     * @return Collection<Place>
436     */
437    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
438    {
439        $query = DB::table('places AS p0')
440            ->where('p0.p_file', '=', $tree->id())
441            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
442            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
443            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
444            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
445            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
446            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
447            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
448            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
449            ->orderBy('p0.p_place')
450            ->orderBy('p1.p_place')
451            ->orderBy('p2.p_place')
452            ->orderBy('p3.p_place')
453            ->orderBy('p4.p_place')
454            ->orderBy('p5.p_place')
455            ->orderBy('p6.p_place')
456            ->orderBy('p7.p_place')
457            ->orderBy('p8.p_place')
458            ->select([
459                'p0.p_place AS place0',
460                'p1.p_place AS place1',
461                'p2.p_place AS place2',
462                'p3.p_place AS place3',
463                'p4.p_place AS place4',
464                'p5.p_place AS place5',
465                'p6.p_place AS place6',
466                'p7.p_place AS place7',
467                'p8.p_place AS place8',
468            ]);
469
470        // Filter each level of the hierarchy.
471        foreach (explode(',', $search, 9) as $level => $string) {
472            $query->where('p' . $level . '.p_place', 'LIKE', '%' . addcslashes($string, '\\%_') . '%');
473        }
474
475        $row_mapper = static function (stdClass $row) use ($tree): Place {
476            $place = implode(', ', array_filter((array) $row));
477
478            return new Place($place, $tree);
479        };
480
481        $filter = static function (): bool {
482            return true;
483        };
484
485        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
486    }
487
488    /**
489     * @param Tree[]   $trees
490     * @param string[] $fields
491     * @param string[] $modifiers
492     *
493     * @return Collection<Individual>
494     */
495    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
496    {
497        $fields = array_filter($fields);
498
499        $query = DB::table('individuals')
500            ->select(['individuals.*'])
501            ->distinct();
502
503        $this->whereTrees($query, 'i_file', $trees);
504
505        // Join the following tables
506        $father_name   = false;
507        $mother_name   = false;
508        $spouse_family = false;
509        $indi_name     = false;
510        $indi_dates    = [];
511        $fam_dates     = [];
512        $indi_plac     = false;
513        $fam_plac      = false;
514
515        foreach ($fields as $field_name => $field_value) {
516            if ($field_value !== '') {
517                // Fields can have up to 4 parts, but we only need the first 3 to identify
518                // which tables to select
519                $field_parts = explode(':', $field_name . '::');
520
521                if ($field_parts[0] === 'FAMC') {
522                    // Parent name - FAMC:[HUSB|WIFE]:NAME:[GIVN|SURN]
523                    if ($field_parts[1] === 'HUSB') {
524                        $father_name = true;
525                    } else {
526                        $mother_name = true;
527                    }
528                } elseif ($field_parts[0] === 'NAME') {
529                    // Individual name - NAME:[GIVN|SURN]
530                    $indi_name = true;
531                } elseif ($field_parts[0] === 'FAMS') {
532                    // Family facts - FAMS:NOTE or FAMS:[FACT]:[DATE|PLAC]
533                    $spouse_family = true;
534                    if ($field_parts[2] === 'DATE') {
535                        $fam_dates[] = $field_parts[1];
536                    } elseif ($field_parts[2] === 'PLAC') {
537                        $fam_plac = true;
538                    }
539                } else {
540                    // Individual facts - [FACT] or [FACT]:[DATE|PLAC]
541                    if ($field_parts[1] === 'DATE') {
542                        $indi_dates[] = $field_parts[0];
543                    } elseif ($field_parts[1] === 'PLAC') {
544                        $indi_plac = true;
545                    }
546                }
547            }
548        }
549
550        if ($father_name || $mother_name) {
551            $query->join('link AS l1', static function (JoinClause $join): void {
552                $join
553                    ->on('l1.l_file', '=', 'individuals.i_file')
554                    ->on('l1.l_from', '=', 'individuals.i_id')
555                    ->where('l1.l_type', '=', 'FAMC');
556            });
557
558            if ($father_name) {
559                $query->join('link AS l2', static function (JoinClause $join): void {
560                    $join
561                        ->on('l2.l_file', '=', 'l1.l_file')
562                        ->on('l2.l_from', '=', 'l1.l_to')
563                        ->where('l2.l_type', '=', 'HUSB');
564                });
565                $query->join('name AS father_name', static function (JoinClause $join): void {
566                    $join
567                        ->on('father_name.n_file', '=', 'l2.l_file')
568                        ->on('father_name.n_id', '=', 'l2.l_to');
569                });
570            }
571
572            if ($mother_name) {
573                $query->join('link AS l3', static function (JoinClause $join): void {
574                    $join
575                        ->on('l3.l_file', '=', 'l1.l_file')
576                        ->on('l3.l_from', '=', 'l1.l_to')
577                        ->where('l3.l_type', '=', 'WIFE');
578                });
579                $query->join('name AS mother_name', static function (JoinClause $join): void {
580                    $join
581                        ->on('mother_name.n_file', '=', 'l3.l_file')
582                        ->on('mother_name.n_id', '=', 'l3.l_to');
583                });
584            }
585        }
586
587        if ($spouse_family) {
588            $query->join('link AS l4', static function (JoinClause $join): void {
589                $join
590                    ->on('l4.l_file', '=', 'individuals.i_file')
591                    ->on('l4.l_from', '=', 'individuals.i_id')
592                    ->where('l4.l_type', '=', 'FAMS');
593            });
594            $query->join('families AS spouse_families', static function (JoinClause $join): void {
595                $join
596                    ->on('spouse_families.f_file', '=', 'l4.l_file')
597                    ->on('spouse_families.f_id', '=', 'l4.l_to');
598            });
599        }
600
601        if ($indi_name) {
602            $query->join('name AS individual_name', static function (JoinClause $join): void {
603                $join
604                    ->on('individual_name.n_file', '=', 'individuals.i_file')
605                    ->on('individual_name.n_id', '=', 'individuals.i_id');
606            });
607        }
608
609        foreach (array_unique($indi_dates) as $indi_date) {
610            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
611                $join
612                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
613                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
614            });
615        }
616
617        foreach (array_unique($fam_dates) as $fam_date) {
618            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
619                $join
620                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
621                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
622            });
623        }
624
625        if ($indi_plac) {
626            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
627                $join
628                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
629                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
630            });
631            $query->join('places AS individual_places', static function (JoinClause $join): void {
632                $join
633                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
634                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
635            });
636        }
637
638        if ($fam_plac) {
639            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
640                $join
641                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
642                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
643            });
644            $query->join('places AS family_places', static function (JoinClause $join): void {
645                $join
646                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
647                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
648            });
649        }
650
651        foreach ($fields as $field_name => $field_value) {
652            $parts = explode(':', $field_name . ':::');
653            if ($parts[0] === 'NAME') {
654                // NAME:*
655                switch ($parts[1]) {
656                    case 'GIVN':
657                        switch ($modifiers[$field_name]) {
658                            case 'EXACT':
659                                $query->where('individual_name.n_givn', '=', $field_value);
660                                break;
661                            case 'BEGINS':
662                                $query->where('individual_name.n_givn', 'LIKE', $field_value . '%');
663                                break;
664                            case 'CONTAINS':
665                                $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
666                                break;
667                            case 'SDX_STD':
668                                $sdx = Soundex::russell($field_value);
669                                if ($sdx !== '') {
670                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
671                                } else {
672                                    // No phonetic content? Use a substring match
673                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
674                                }
675                                break;
676                            case 'SDX': // SDX uses DM by default.
677                            case 'SDX_DM':
678                                $sdx = Soundex::daitchMokotoff($field_value);
679                                if ($sdx !== '') {
680                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
681                                } else {
682                                    // No phonetic content? Use a substring match
683                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
684                                }
685                                break;
686                        }
687                        unset($fields[$field_name]);
688                        break;
689                    case 'SURN':
690                        switch ($modifiers[$field_name]) {
691                            case 'EXACT':
692                                $query->where(function (Builder $query) use ($field_value): void {
693                                    $query
694                                        ->where('individual_name.n_surn', '=', $field_value)
695                                        ->orWhere('individual_name.n_surname', '=', $field_value);
696                                });
697                                break;
698                            case 'BEGINS':
699                                $query->where(function (Builder $query) use ($field_value): void {
700                                    $query
701                                        ->where('individual_name.n_surn', 'LIKE', $field_value . '%')
702                                        ->orWhere('individual_name.n_surname', 'LIKE', $field_value . '%');
703                                });
704                                break;
705                            case 'CONTAINS':
706                                $query->where(function (Builder $query) use ($field_value): void {
707                                    $query
708                                        ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
709                                        ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
710                                });
711                                break;
712                            case 'SDX_STD':
713                                $sdx = Soundex::russell($field_value);
714                                if ($sdx !== '') {
715                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
716                                } else {
717                                    // No phonetic content? Use a substring match
718                                    $query->where(function (Builder $query) use ($field_value): void {
719                                        $query
720                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
721                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
722                                    });
723                                }
724                                break;
725                            case 'SDX': // SDX uses DM by default.
726                            case 'SDX_DM':
727                                $sdx = Soundex::daitchMokotoff($field_value);
728                                if ($sdx !== '') {
729                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
730                                } else {
731                                    // No phonetic content? Use a substring match
732                                    $query->where(function (Builder $query) use ($field_value): void {
733                                        $query
734                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
735                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
736                                    });
737                                }
738                                break;
739                        }
740                        unset($fields[$field_name]);
741                        break;
742                    case 'NICK':
743                    case '_MARNM':
744                    case '_HEB':
745                    case '_AKA':
746                        $like = "%\n1 " . $parts[0] . "%\n2 " . $parts[1] . ' %' . preg_quote($field_value, '/') . '%';
747                        $query->where('individuals.i_gedcom', 'LIKE', $like);
748                        break;
749                }
750            } elseif ($parts[1] === 'DATE') {
751                // *:DATE
752                $date = new Date($field_value);
753                if ($date->isOK()) {
754                    $delta = 365 * ($modifiers[$field_name] ?? 0);
755                    $query
756                        ->where('date_' . $parts[0] . '.d_fact', '=', $parts[0])
757                        ->where('date_' . $parts[0] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
758                        ->where('date_' . $parts[0] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
759                }
760                unset($fields[$field_name]);
761            } elseif ($parts[0] === 'FAMS' && $parts[2] === 'DATE') {
762                // FAMS:*:DATE
763                $date = new Date($field_value);
764                if ($date->isOK()) {
765                    $delta = 365 * $modifiers[$field_name];
766                    $query
767                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
768                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
769                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
770                }
771                unset($fields[$field_name]);
772            } elseif ($parts[1] === 'PLAC') {
773                // *:PLAC
774                // SQL can only link a place to a person/family, not to an event.
775                $query->where('individual_places.p_place', 'LIKE', '%' . $field_value . '%');
776            } elseif ($parts[0] === 'FAMS' && $parts[2] === 'PLAC') {
777                // FAMS:*:PLAC
778                // SQL can only link a place to a person/family, not to an event.
779                $query->where('family_places.p_place', 'LIKE', '%' . $field_value . '%');
780            } elseif ($parts[0] === 'FAMC' && $parts[2] === 'NAME') {
781                $table = $parts[1] === 'HUSB' ? 'father_name' : 'mother_name';
782                // NAME:*
783                switch ($parts[3]) {
784                    case 'GIVN':
785                        switch ($modifiers[$field_name]) {
786                            case 'EXACT':
787                                $query->where($table . '.n_givn', '=', $field_value);
788                                break;
789                            case 'BEGINS':
790                                $query->where($table . '.n_givn', 'LIKE', $field_value . '%');
791                                break;
792                            case 'CONTAINS':
793                                $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
794                                break;
795                            case 'SDX_STD':
796                                $sdx = Soundex::russell($field_value);
797                                if ($sdx !== '') {
798                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
799                                } else {
800                                    // No phonetic content? Use a substring match
801                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
802                                }
803                                break;
804                            case 'SDX': // SDX uses DM by default.
805                            case 'SDX_DM':
806                                $sdx = Soundex::daitchMokotoff($field_value);
807                                if ($sdx !== '') {
808                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
809                                } else {
810                                    // No phonetic content? Use a substring match
811                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
812                                }
813                                break;
814                        }
815                        break;
816                    case 'SURN':
817                        switch ($modifiers[$field_name]) {
818                            case 'EXACT':
819                                $query->where($table . '.n_surn', '=', $field_value);
820                                break;
821                            case 'BEGINS':
822                                $query->where($table . '.n_surn', 'LIKE', $field_value . '%');
823                                break;
824                            case 'CONTAINS':
825                                $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
826                                break;
827                            case 'SDX_STD':
828                                $sdx = Soundex::russell($field_value);
829                                if ($sdx !== '') {
830                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
831                                } else {
832                                    // No phonetic content? Use a substring match
833                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
834                                }
835                                break;
836                            case 'SDX': // SDX uses DM by default.
837                            case 'SDX_DM':
838                                $sdx = Soundex::daitchMokotoff($field_value);
839                                if ($sdx !== '') {
840                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
841                                } else {
842                                    // No phonetic content? Use a substring match
843                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
844                                }
845                                break;
846                        }
847                        break;
848                }
849                unset($fields[$field_name]);
850            } elseif ($parts[0] === 'FAMS') {
851                // e.g. searches for occupation, religion, note, etc.
852                // Initial matching only.  Need PHP to apply filter.
853                $query->where('spouse_families.f_gedcom', 'LIKE', "%\n1 " . $parts[1] . ' %' . $field_value . '%');
854            } elseif ($parts[1] === 'TYPE') {
855                // e.g. FACT:TYPE or EVEN:TYPE
856                // Initial matching only.  Need PHP to apply filter.
857                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[0] . "%\n2 TYPE %" . $field_value . '%');
858            } else {
859                // e.g. searches for occupation, religion, note, etc.
860                // Initial matching only.  Need PHP to apply filter.
861                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[0] . '%' . $parts[1] . '%' . $field_value . '%');
862            }
863        }
864
865        return $query
866            ->get()
867            ->each($this->rowLimiter())
868            ->map($this->individualRowMapper())
869            ->filter(GedcomRecord::accessFilter())
870            ->filter(static function (Individual $individual) use ($fields): bool {
871                // Check for searches which were only partially matched by SQL
872                foreach ($fields as $field_name => $field_value) {
873                    $parts = explode(':', $field_name . '::::');
874
875                    // NAME:*
876                    if ($parts[0] === 'NAME') {
877                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[1] . ' .*' . preg_quote($field_value, '/') . '/i';
878
879                        if (preg_match($regex, $individual->gedcom())) {
880                            continue;
881                        }
882
883                        return false;
884                    }
885
886                    $regex = '/' . preg_quote($field_value, '/') . '/i';
887
888                    // *:PLAC
889                    if ($parts[1] === 'PLAC') {
890                        foreach ($individual->facts([$parts[0]]) as $fact) {
891                            if (preg_match($regex, $fact->place()->gedcomName())) {
892                                continue 2;
893                            }
894                        }
895                        return false;
896                    }
897
898                    // FAMS:*:PLAC
899                    if ($parts[0] === 'FAMS' && $parts[2] === 'PLAC') {
900                        foreach ($individual->spouseFamilies() as $family) {
901                            foreach ($family->facts([$parts[1]]) as $fact) {
902                                if (preg_match($regex, $fact->place()->gedcomName())) {
903                                    continue 3;
904                                }
905                            }
906                        }
907                        return false;
908                    }
909
910                    // e.g. searches for occupation, religion, note, etc.
911                    if ($parts[0] === 'FAMS') {
912                        foreach ($individual->spouseFamilies() as $family) {
913                            foreach ($family->facts([$parts[1]]) as $fact) {
914                                if (preg_match($regex, $fact->value())) {
915                                    continue 3;
916                                }
917                            }
918                        }
919                        return false;
920                    }
921
922                    // e.g. FACT:TYPE or EVEN:TYPE
923                    if ($parts[1] === 'TYPE' || $parts[1] === '_WT_USER') {
924                        foreach ($individual->facts([$parts[0]]) as $fact) {
925                            if (preg_match($regex, $fact->attribute($parts[1]))) {
926                                continue 2;
927                            }
928                        }
929
930                        return false;
931                    }
932                }
933
934                return true;
935            });
936    }
937
938    /**
939     * @param string $soundex
940     * @param string $lastname
941     * @param string $firstname
942     * @param string $place
943     * @param Tree[] $search_trees
944     *
945     * @return Collection<Individual>
946     */
947    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
948    {
949        switch ($soundex) {
950            default:
951            case 'Russell':
952                $givn_sdx   = Soundex::russell($firstname);
953                $surn_sdx   = Soundex::russell($lastname);
954                $plac_sdx   = Soundex::russell($place);
955                $givn_field = 'n_soundex_givn_std';
956                $surn_field = 'n_soundex_surn_std';
957                $plac_field = 'p_std_soundex';
958                break;
959            case 'DaitchM':
960                $givn_sdx   = Soundex::daitchMokotoff($firstname);
961                $surn_sdx   = Soundex::daitchMokotoff($lastname);
962                $plac_sdx   = Soundex::daitchMokotoff($place);
963                $givn_field = 'n_soundex_givn_dm';
964                $surn_field = 'n_soundex_surn_dm';
965                $plac_field = 'p_dm_soundex';
966                break;
967        }
968
969        // Nothing to search for? Return nothing.
970        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
971            return new Collection();
972        }
973
974        $query = DB::table('individuals')
975            ->select(['individuals.*'])
976            ->distinct();
977
978        $this->whereTrees($query, 'i_file', $search_trees);
979
980        if ($plac_sdx !== '') {
981            $query->join('placelinks', static function (JoinClause $join): void {
982                $join
983                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
984                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
985            });
986            $query->join('places', static function (JoinClause $join): void {
987                $join
988                    ->on('places.p_file', '=', 'placelinks.pl_file')
989                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
990            });
991
992            $this->wherePhonetic($query, $plac_field, $plac_sdx);
993        }
994
995        if ($givn_sdx !== '' || $surn_sdx !== '') {
996            $query->join('name', static function (JoinClause $join): void {
997                $join
998                    ->on('name.n_file', '=', 'individuals.i_file')
999                    ->on('name.n_id', '=', 'individuals.i_id');
1000            });
1001
1002            $this->wherePhonetic($query, $givn_field, $givn_sdx);
1003            $this->wherePhonetic($query, $surn_field, $surn_sdx);
1004        }
1005
1006        return $query
1007            ->get()
1008            ->each($this->rowLimiter())
1009            ->map($this->individualRowMapper())
1010            ->filter(GedcomRecord::accessFilter());
1011    }
1012
1013    /**
1014     * Paginate a search query.
1015     *
1016     * @param Builder $query      Searches the database for the desired records.
1017     * @param Closure $row_mapper Converts a row from the query into a record.
1018     * @param Closure $row_filter
1019     * @param int     $offset     Skip this many rows.
1020     * @param int     $limit      Take this many rows.
1021     *
1022     * @return Collection<mixed>
1023     */
1024    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
1025    {
1026        $collection = new Collection();
1027
1028        foreach ($query->cursor() as $row) {
1029            $record = $row_mapper($row);
1030            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
1031            // where individuals have multiple names - and we need to sort results by name.
1032            if ($collection->containsStrict($record)) {
1033                continue;
1034            }
1035            // If the object has a method "canShow()", then use it to filter for privacy.
1036            if ($row_filter($record)) {
1037                if ($offset > 0) {
1038                    $offset--;
1039                } else {
1040                    if ($limit > 0) {
1041                        $collection->push($record);
1042                    }
1043
1044                    $limit--;
1045
1046                    if ($limit === 0) {
1047                        break;
1048                    }
1049                }
1050            }
1051        }
1052
1053
1054        return $collection;
1055    }
1056
1057    /**
1058     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
1059     *
1060     * @param Builder           $query
1061     * @param Expression|string $field
1062     * @param string[]          $search_terms
1063     */
1064    private function whereSearch(Builder $query, $field, array $search_terms): void
1065    {
1066        if ($field instanceof Expression) {
1067            $field = $field->getValue();
1068        }
1069
1070        foreach ($search_terms as $search_term) {
1071            $query->where(new Expression($field), 'LIKE', '%' . addcslashes($search_term, '\\%_') . '%');
1072        }
1073    }
1074
1075    /**
1076     * Apply soundex search filters to a SQL query column.
1077     *
1078     * @param Builder           $query
1079     * @param Expression|string $field
1080     * @param string            $soundex
1081     */
1082    private function wherePhonetic(Builder $query, $field, string $soundex): void
1083    {
1084        if ($soundex !== '') {
1085            $query->where(static function (Builder $query) use ($soundex, $field): void {
1086                foreach (explode(':', $soundex) as $sdx) {
1087                    $query->orWhere($field, 'LIKE', '%' . $sdx . '%');
1088                }
1089            });
1090        }
1091    }
1092
1093    /**
1094     * @param Builder $query
1095     * @param string  $tree_id_field
1096     * @param Tree[]  $trees
1097     */
1098    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1099    {
1100        $tree_ids = array_map(static function (Tree $tree): int {
1101            return $tree->id();
1102        }, $trees);
1103
1104        $query->whereIn($tree_id_field, $tree_ids);
1105    }
1106
1107    /**
1108     * Find the media object that uses a particular media file.
1109     *
1110     * @param string $file
1111     *
1112     * @return Media[]
1113     */
1114    public function findMediaObjectsForMediaFile(string $file): array
1115    {
1116        return DB::table('media')
1117            ->join('media_file', static function (JoinClause $join): void {
1118                $join
1119                    ->on('media_file.m_file', '=', 'media.m_file')
1120                    ->on('media_file.m_id', '=', 'media.m_id');
1121            })
1122            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1123            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1124            ->select(['media.*'])
1125            ->distinct()
1126            ->get()
1127            ->map($this->mediaRowMapper())
1128            ->all();
1129    }
1130
1131    /**
1132     * A closure to filter records by privacy-filtered GEDCOM data.
1133     *
1134     * @param array<string> $search_terms
1135     *
1136     * @return Closure
1137     */
1138    private function rawGedcomFilter(array $search_terms): Closure
1139    {
1140        return static function (GedcomRecord $record) use ($search_terms): bool {
1141            // Ignore non-genealogy fields
1142            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1143
1144            // Ignore matches in links
1145            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1146
1147            // Re-apply the filtering
1148            foreach ($search_terms as $search_term) {
1149                if (mb_stripos($gedcom, $search_term) === false) {
1150                    return false;
1151                }
1152            }
1153
1154            return true;
1155        };
1156    }
1157
1158    /**
1159     * Searching for short or common text can give more results than the system can process.
1160     *
1161     * @param int $limit
1162     *
1163     * @return Closure
1164     */
1165    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1166    {
1167        return static function () use ($limit): void {
1168            static $n = 0;
1169
1170            if (++$n > $limit) {
1171                $message = I18N::translate('The search returned too many results.');
1172
1173                throw new HttpServiceUnavailableException($message);
1174            }
1175        };
1176    }
1177
1178    /**
1179     * Convert a row from any tree in the families table into a family object.
1180     *
1181     * @return Closure
1182     */
1183    private function familyRowMapper(): Closure
1184    {
1185        return function (stdClass $row): Family {
1186            $tree = $this->tree_service->find((int) $row->f_file);
1187
1188            return Registry::familyFactory()->mapper($tree)($row);
1189        };
1190    }
1191
1192    /**
1193     * Convert a row from any tree in the individuals table into an individual object.
1194     *
1195     * @return Closure
1196     */
1197    private function individualRowMapper(): Closure
1198    {
1199        return function (stdClass $row): Individual {
1200            $tree = $this->tree_service->find((int) $row->i_file);
1201
1202            return Registry::individualFactory()->mapper($tree)($row);
1203        };
1204    }
1205
1206    /**
1207     * Convert a row from any tree in the media table into a location object.
1208     *
1209     * @return Closure
1210     */
1211    private function locationRowMapper(): Closure
1212    {
1213        return function (stdClass $row): Media {
1214            $tree = $this->tree_service->find((int) $row->m_file);
1215
1216            return Registry::locationFactory()->mapper($tree)($row);
1217        };
1218    }
1219
1220    /**
1221     * Convert a row from any tree in the media table into an media object.
1222     *
1223     * @return Closure
1224     */
1225    private function mediaRowMapper(): Closure
1226    {
1227        return function (stdClass $row): Media {
1228            $tree = $this->tree_service->find((int) $row->m_file);
1229
1230            return Registry::mediaFactory()->mapper($tree)($row);
1231        };
1232    }
1233
1234    /**
1235     * Convert a row from any tree in the other table into a note object.
1236     *
1237     * @return Closure
1238     */
1239    private function noteRowMapper(): Closure
1240    {
1241        return function (stdClass $row): Note {
1242            $tree = $this->tree_service->find((int) $row->o_file);
1243
1244            return Registry::noteFactory()->mapper($tree)($row);
1245        };
1246    }
1247
1248    /**
1249     * Convert a row from any tree in the other table into a repository object.
1250     *
1251     * @return Closure
1252     */
1253    private function repositoryRowMapper(): Closure
1254    {
1255        return function (stdClass $row): Repository {
1256            $tree = $this->tree_service->find((int) $row->o_file);
1257
1258            return Registry::repositoryFactory()->mapper($tree)($row);
1259        };
1260    }
1261
1262    /**
1263     * Convert a row from any tree in the sources table into a source object.
1264     *
1265     * @return Closure
1266     */
1267    private function sourceRowMapper(): Closure
1268    {
1269        return function (stdClass $row): Source {
1270            $tree = $this->tree_service->find((int) $row->s_file);
1271
1272            return Registry::sourceFactory()->mapper($tree)($row);
1273        };
1274    }
1275
1276    /**
1277     * Convert a row from any tree in the other table into a submission object.
1278     *
1279     * @return Closure
1280     */
1281    private function submissionRowMapper(): Closure
1282    {
1283        return function (stdClass $row): Submission {
1284            $tree = $this->tree_service->find((int) $row->o_file);
1285
1286            return Registry::submissionFactory()->mapper($tree)($row);
1287        };
1288    }
1289
1290    /**
1291     * Convert a row from any tree in the other table into a submitter object.
1292     *
1293     * @return Closure
1294     */
1295    private function submitterRowMapper(): Closure
1296    {
1297        return function (stdClass $row): Submitter {
1298            $tree = $this->tree_service->find((int) $row->o_file);
1299
1300            return Registry::submitterFactory()->mapper($tree)($row);
1301        };
1302    }
1303}
1304