xref: /webtrees/app/Services/SearchService.php (revision fceda430129b1f9eadbe3ee42fc1f28a5c37c72b)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2020 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Exceptions\HttpServiceUnavailableException;
25use Fisharebest\Webtrees\Registry;
26use Fisharebest\Webtrees\Family;
27use Fisharebest\Webtrees\Gedcom;
28use Fisharebest\Webtrees\GedcomRecord;
29use Fisharebest\Webtrees\I18N;
30use Fisharebest\Webtrees\Individual;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Place;
34use Fisharebest\Webtrees\Repository;
35use Fisharebest\Webtrees\Soundex;
36use Fisharebest\Webtrees\Source;
37use Fisharebest\Webtrees\Submitter;
38use Fisharebest\Webtrees\Tree;
39use Illuminate\Database\Capsule\Manager as DB;
40use Illuminate\Database\Query\Builder;
41use Illuminate\Database\Query\Expression;
42use Illuminate\Database\Query\JoinClause;
43use Illuminate\Support\Collection;
44use stdClass;
45
46use function addcslashes;
47use function array_filter;
48use function array_map;
49use function array_unique;
50use function explode;
51use function implode;
52use function mb_stripos;
53use function preg_match;
54use function preg_quote;
55use function preg_replace;
56
57use const PHP_INT_MAX;
58
59/**
60 * Search trees for genealogy records.
61 */
62class SearchService
63{
64    // Do not attempt to show search results larger than this/
65    protected const MAX_SEARCH_RESULTS = 5000;
66
67    /** @var TreeService */
68    private $tree_service;
69
70    /**
71     * SearchService constructor.
72     *
73     * @param TreeService $tree_service
74     */
75    public function __construct(
76        TreeService $tree_service
77    ) {
78        $this->tree_service = $tree_service;
79    }
80
81    /**
82     * @param Tree[]   $trees
83     * @param string[] $search
84     *
85     * @return Collection<Family>
86     */
87    public function searchFamilies(array $trees, array $search): Collection
88    {
89        $query = DB::table('families');
90
91        $this->whereTrees($query, 'f_file', $trees);
92        $this->whereSearch($query, 'f_gedcom', $search);
93
94        return $query
95            ->get()
96            ->each($this->rowLimiter())
97            ->map($this->familyRowMapper())
98            ->filter(GedcomRecord::accessFilter())
99            ->filter($this->rawGedcomFilter($search));
100    }
101
102    /**
103     * Search for families by name.
104     *
105     * @param Tree[]   $trees
106     * @param string[] $search
107     * @param int      $offset
108     * @param int      $limit
109     *
110     * @return Collection<Family>
111     */
112    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
113    {
114        $query = DB::table('families')
115            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
116                $join
117                    ->on('husb_name.n_file', '=', 'families.f_file')
118                    ->on('husb_name.n_id', '=', 'families.f_husb')
119                    ->where('husb_name.n_type', '<>', '_MARNM');
120            })
121            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
122                $join
123                    ->on('wife_name.n_file', '=', 'families.f_file')
124                    ->on('wife_name.n_id', '=', 'families.f_wife')
125                    ->where('wife_name.n_type', '<>', '_MARNM');
126            });
127
128        $prefix = DB::connection()->getTablePrefix();
129        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
130
131        $this->whereTrees($query, 'f_file', $trees);
132        $this->whereSearch($query, $field, $search);
133
134        $query
135            ->orderBy('husb_name.n_sort')
136            ->orderBy('wife_name.n_sort')
137            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
138
139        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
140    }
141
142    /**
143     * @param Place $place
144     *
145     * @return Collection<Family>
146     */
147    public function searchFamiliesInPlace(Place $place): Collection
148    {
149        return DB::table('families')
150            ->join('placelinks', static function (JoinClause $query) {
151                $query
152                    ->on('families.f_file', '=', 'placelinks.pl_file')
153                    ->on('families.f_id', '=', 'placelinks.pl_gid');
154            })
155            ->where('f_file', '=', $place->tree()->id())
156            ->where('pl_p_id', '=', $place->id())
157            ->select(['families.*'])
158            ->get()
159            ->each($this->rowLimiter())
160            ->map($this->familyRowMapper())
161            ->filter(GedcomRecord::accessFilter());
162    }
163
164    /**
165     * @param Tree[]   $trees
166     * @param string[] $search
167     *
168     * @return Collection<Individual>
169     */
170    public function searchIndividuals(array $trees, array $search): Collection
171    {
172        $query = DB::table('individuals');
173
174        $this->whereTrees($query, 'i_file', $trees);
175        $this->whereSearch($query, 'i_gedcom', $search);
176
177        return $query
178            ->get()
179            ->each($this->rowLimiter())
180            ->map($this->individualRowMapper())
181            ->filter(GedcomRecord::accessFilter())
182            ->filter($this->rawGedcomFilter($search));
183    }
184
185    /**
186     * Search for individuals by name.
187     *
188     * @param Tree[]   $trees
189     * @param string[] $search
190     * @param int      $offset
191     * @param int      $limit
192     *
193     * @return Collection<Individual>
194     */
195    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
196    {
197        $query = DB::table('individuals')
198            ->join('name', static function (JoinClause $join): void {
199                $join
200                    ->on('name.n_file', '=', 'individuals.i_file')
201                    ->on('name.n_id', '=', 'individuals.i_id');
202            })
203            ->orderBy('n_sort')
204            ->select(['individuals.*', 'n_sort']);
205
206        $this->whereTrees($query, 'i_file', $trees);
207        $this->whereSearch($query, 'n_full', $search);
208
209        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
210    }
211
212    /**
213     * @param Place $place
214     *
215     * @return Collection<Individual>
216     */
217    public function searchIndividualsInPlace(Place $place): Collection
218    {
219        return DB::table('individuals')
220            ->join('placelinks', static function (JoinClause $join) {
221                $join
222                    ->on('i_file', '=', 'pl_file')
223                    ->on('i_id', '=', 'pl_gid');
224            })
225            ->where('i_file', '=', $place->tree()->id())
226            ->where('pl_p_id', '=', $place->id())
227            ->select(['individuals.*'])
228            ->get()
229            ->each($this->rowLimiter())
230            ->map($this->individualRowMapper())
231            ->filter(GedcomRecord::accessFilter());
232    }
233
234    /**
235     * Search for media objects.
236     *
237     * @param Tree[]   $trees
238     * @param string[] $search
239     * @param int      $offset
240     * @param int      $limit
241     *
242     * @return Collection<Media>
243     */
244    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
245    {
246        $query = DB::table('media');
247
248        $this->whereTrees($query, 'media.m_file', $trees);
249        $this->whereSearch($query, 'm_gedcom', $search);
250
251        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
252    }
253
254    /**
255     * Search for notes.
256     *
257     * @param Tree[]   $trees
258     * @param string[] $search
259     * @param int      $offset
260     * @param int      $limit
261     *
262     * @return Collection<Note>
263     */
264    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
265    {
266        $query = DB::table('other')
267            ->where('o_type', '=', 'NOTE');
268
269        $this->whereTrees($query, 'o_file', $trees);
270        $this->whereSearch($query, 'o_gedcom', $search);
271
272        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
273    }
274
275    /**
276     * Search for repositories.
277     *
278     * @param Tree[]   $trees
279     * @param string[] $search
280     * @param int      $offset
281     * @param int      $limit
282     *
283     * @return Collection<Repository>
284     */
285    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
286    {
287        $query = DB::table('other')
288            ->where('o_type', '=', 'REPO');
289
290        $this->whereTrees($query, 'o_file', $trees);
291        $this->whereSearch($query, 'o_gedcom', $search);
292
293        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
294    }
295
296    /**
297     * Search for sources.
298     *
299     * @param Tree[]   $trees
300     * @param string[] $search
301     * @param int      $offset
302     * @param int      $limit
303     *
304     * @return Collection<Source>
305     */
306    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
307    {
308        $query = DB::table('sources');
309
310        $this->whereTrees($query, 's_file', $trees);
311        $this->whereSearch($query, 's_gedcom', $search);
312
313        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
314    }
315
316    /**
317     * Search for sources by name.
318     *
319     * @param Tree[]   $trees
320     * @param string[] $search
321     * @param int      $offset
322     * @param int      $limit
323     *
324     * @return Collection<Source>
325     */
326    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
327    {
328        $query = DB::table('sources')
329            ->orderBy('s_name');
330
331        $this->whereTrees($query, 's_file', $trees);
332        $this->whereSearch($query, 's_name', $search);
333
334        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
335    }
336
337    /**
338     * Search for sources.
339     *
340     * @param Tree[]   $trees
341     * @param string[] $search
342     * @param int      $offset
343     * @param int      $limit
344     *
345     * @return Collection<string>
346     */
347    public function searchSurnames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
348    {
349        $query = DB::table('name');
350
351        $this->whereTrees($query, 'n_file', $trees);
352        $this->whereSearch($query, 'n_surname', $search);
353
354        return $query
355            ->groupBy(['n_surname'])
356            ->orderBy('n_surname')
357            ->skip($offset)
358            ->take($limit)
359            ->pluck('n_surname');
360    }
361
362    /**
363     * Search for submitters.
364     *
365     * @param Tree[]   $trees
366     * @param string[] $search
367     * @param int      $offset
368     * @param int      $limit
369     *
370     * @return Collection<Submitter>
371     */
372    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
373    {
374        $query = DB::table('other')
375            ->where('o_type', '=', 'SUBM');
376
377        $this->whereTrees($query, 'o_file', $trees);
378        $this->whereSearch($query, 'o_gedcom', $search);
379
380        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
381    }
382
383    /**
384     * Search for places.
385     *
386     * @param Tree   $tree
387     * @param string $search
388     * @param int    $offset
389     * @param int    $limit
390     *
391     * @return Collection<Place>
392     */
393    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
394    {
395        $query = DB::table('places AS p0')
396            ->where('p0.p_file', '=', $tree->id())
397            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
398            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
399            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
400            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
401            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
402            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
403            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
404            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
405            ->orderBy('p0.p_place')
406            ->orderBy('p1.p_place')
407            ->orderBy('p2.p_place')
408            ->orderBy('p3.p_place')
409            ->orderBy('p4.p_place')
410            ->orderBy('p5.p_place')
411            ->orderBy('p6.p_place')
412            ->orderBy('p7.p_place')
413            ->orderBy('p8.p_place')
414            ->select([
415                'p0.p_place AS place0',
416                'p1.p_place AS place1',
417                'p2.p_place AS place2',
418                'p3.p_place AS place3',
419                'p4.p_place AS place4',
420                'p5.p_place AS place5',
421                'p6.p_place AS place6',
422                'p7.p_place AS place7',
423                'p8.p_place AS place8',
424            ]);
425
426        // Filter each level of the hierarchy.
427        foreach (explode(',', $search, 9) as $level => $string) {
428            $query->where('p' . $level . '.p_place', 'LIKE', '%' . addcslashes($string, '\\%_') . '%');
429        }
430
431        $row_mapper = static function (stdClass $row) use ($tree): Place {
432            $place = implode(', ', array_filter((array) $row));
433
434            return new Place($place, $tree);
435        };
436
437        $filter = static function (): bool {
438            return true;
439        };
440
441        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
442    }
443
444    /**
445     * @param Tree[]   $trees
446     * @param string[] $fields
447     * @param string[] $modifiers
448     *
449     * @return Collection<Individual>
450     */
451    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
452    {
453        $fields = array_filter($fields);
454
455        $query = DB::table('individuals')
456            ->select(['individuals.*'])
457            ->distinct();
458
459        $this->whereTrees($query, 'i_file', $trees);
460
461        // Join the following tables
462        $father_name   = false;
463        $mother_name   = false;
464        $spouse_family = false;
465        $indi_name     = false;
466        $indi_dates    = [];
467        $fam_dates     = [];
468        $indi_plac     = false;
469        $fam_plac      = false;
470
471        foreach ($fields as $field_name => $field_value) {
472            if ($field_value !== '') {
473                // Fields can have up to 4 parts, but we only need the first 3 to identify
474                // which tables to select
475                $field_parts = explode(':', $field_name . '::');
476
477                if ($field_parts[0] === 'FAMC') {
478                    // Parent name - FAMC:[HUSB|WIFE]:NAME:[GIVN|SURN]
479                    if ($field_parts[1] === 'HUSB') {
480                        $father_name = true;
481                    } else {
482                        $mother_name = true;
483                    }
484                } elseif ($field_parts[0] === 'NAME') {
485                    // Individual name - NAME:[GIVN|SURN]
486                    $indi_name = true;
487                } elseif ($field_parts[0] === 'FAMS') {
488                    // Family facts - FAMS:NOTE or FAMS:[FACT]:[DATE|PLAC]
489                    $spouse_family = true;
490                    if ($field_parts[2] === 'DATE') {
491                        $fam_dates[] = $field_parts[1];
492                    } elseif ($field_parts[2] === 'PLAC') {
493                        $fam_plac = true;
494                    }
495                } else {
496                    // Individual facts - [FACT] or [FACT]:[DATE|PLAC]
497                    if ($field_parts[1] === 'DATE') {
498                        $indi_dates[] = $field_parts[0];
499                    } elseif ($field_parts[1] === 'PLAC') {
500                        $indi_plac = true;
501                    }
502                }
503            }
504        }
505
506        if ($father_name || $mother_name) {
507            $query->join('link AS l1', static function (JoinClause $join): void {
508                $join
509                    ->on('l1.l_file', '=', 'individuals.i_file')
510                    ->on('l1.l_from', '=', 'individuals.i_id')
511                    ->where('l1.l_type', '=', 'FAMC');
512            });
513
514            if ($father_name) {
515                $query->join('link AS l2', static function (JoinClause $join): void {
516                    $join
517                        ->on('l2.l_file', '=', 'l1.l_file')
518                        ->on('l2.l_from', '=', 'l1.l_to')
519                        ->where('l2.l_type', '=', 'HUSB');
520                });
521                $query->join('name AS father_name', static function (JoinClause $join): void {
522                    $join
523                        ->on('father_name.n_file', '=', 'l2.l_file')
524                        ->on('father_name.n_id', '=', 'l2.l_to');
525                });
526            }
527
528            if ($mother_name) {
529                $query->join('link AS l3', static function (JoinClause $join): void {
530                    $join
531                        ->on('l3.l_file', '=', 'l1.l_file')
532                        ->on('l3.l_from', '=', 'l1.l_to')
533                        ->where('l3.l_type', '=', 'WIFE');
534                });
535                $query->join('name AS mother_name', static function (JoinClause $join): void {
536                    $join
537                        ->on('mother_name.n_file', '=', 'l3.l_file')
538                        ->on('mother_name.n_id', '=', 'l3.l_to');
539                });
540            }
541        }
542
543        if ($spouse_family) {
544            $query->join('link AS l4', static function (JoinClause $join): void {
545                $join
546                    ->on('l4.l_file', '=', 'individuals.i_file')
547                    ->on('l4.l_from', '=', 'individuals.i_id')
548                    ->where('l4.l_type', '=', 'FAMS');
549            });
550            $query->join('families AS spouse_families', static function (JoinClause $join): void {
551                $join
552                    ->on('spouse_families.f_file', '=', 'l4.l_file')
553                    ->on('spouse_families.f_id', '=', 'l4.l_to');
554            });
555        }
556
557        if ($indi_name) {
558            $query->join('name AS individual_name', static function (JoinClause $join): void {
559                $join
560                    ->on('individual_name.n_file', '=', 'individuals.i_file')
561                    ->on('individual_name.n_id', '=', 'individuals.i_id');
562            });
563        }
564
565        foreach (array_unique($indi_dates) as $indi_date) {
566            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
567                $join
568                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
569                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
570            });
571        }
572
573        foreach (array_unique($fam_dates) as $fam_date) {
574            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
575                $join
576                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
577                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
578            });
579        }
580
581        if ($indi_plac) {
582            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
583                $join
584                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
585                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
586            });
587            $query->join('places AS individual_places', static function (JoinClause $join): void {
588                $join
589                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
590                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
591            });
592        }
593
594        if ($fam_plac) {
595            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
596                $join
597                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
598                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
599            });
600            $query->join('places AS family_places', static function (JoinClause $join): void {
601                $join
602                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
603                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
604            });
605        }
606
607        foreach ($fields as $field_name => $field_value) {
608            $parts = explode(':', $field_name . ':::');
609            if ($parts[0] === 'NAME') {
610                // NAME:*
611                switch ($parts[1]) {
612                    case 'GIVN':
613                        switch ($modifiers[$field_name]) {
614                            case 'EXACT':
615                                $query->where('individual_name.n_givn', '=', $field_value);
616                                break;
617                            case 'BEGINS':
618                                $query->where('individual_name.n_givn', 'LIKE', $field_value . '%');
619                                break;
620                            case 'CONTAINS':
621                                $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
622                                break;
623                            case 'SDX_STD':
624                                $sdx = Soundex::russell($field_value);
625                                if ($sdx !== '') {
626                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
627                                } else {
628                                    // No phonetic content? Use a substring match
629                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
630                                }
631                                break;
632                            case 'SDX': // SDX uses DM by default.
633                            case 'SDX_DM':
634                                $sdx = Soundex::daitchMokotoff($field_value);
635                                if ($sdx !== '') {
636                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
637                                } else {
638                                    // No phonetic content? Use a substring match
639                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
640                                }
641                                break;
642                        }
643                        unset($fields[$field_name]);
644                        break;
645                    case 'SURN':
646                        switch ($modifiers[$field_name]) {
647                            case 'EXACT':
648                                $query->where(function (Builder $query) use ($field_value): void {
649                                    $query
650                                        ->where('individual_name.n_surn', '=', $field_value)
651                                        ->orWhere('individual_name.n_surname', '=', $field_value);
652                                });
653                                break;
654                            case 'BEGINS':
655                                $query->where(function (Builder $query) use ($field_value): void {
656                                    $query
657                                        ->where('individual_name.n_surn', 'LIKE', $field_value . '%')
658                                        ->orWhere('individual_name.n_surname', 'LIKE', $field_value . '%');
659                                });
660                                break;
661                            case 'CONTAINS':
662                                $query->where(function (Builder $query) use ($field_value): void {
663                                    $query
664                                        ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
665                                        ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
666                                });
667                                break;
668                            case 'SDX_STD':
669                                $sdx = Soundex::russell($field_value);
670                                if ($sdx !== '') {
671                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
672                                } else {
673                                    // No phonetic content? Use a substring match
674                                    $query->where(function (Builder $query) use ($field_value): void {
675                                        $query
676                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
677                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
678                                    });
679                                }
680                                break;
681                            case 'SDX': // SDX uses DM by default.
682                            case 'SDX_DM':
683                                $sdx = Soundex::daitchMokotoff($field_value);
684                                if ($sdx !== '') {
685                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
686                                } else {
687                                    // No phonetic content? Use a substring match
688                                    $query->where(function (Builder $query) use ($field_value): void {
689                                        $query
690                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
691                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
692                                    });
693                                }
694                                break;
695                        }
696                        unset($fields[$field_name]);
697                        break;
698                    case 'NICK':
699                    case '_MARNM':
700                    case '_HEB':
701                    case '_AKA':
702                        $like = "%\n1 " . $parts[0] . "%\n2 " . $parts[1] . ' %' . preg_quote($field_value, '/') . '%';
703                        $query->where('individuals.i_gedcom', 'LIKE', $like);
704                        break;
705                }
706            } elseif ($parts[1] === 'DATE') {
707                // *:DATE
708                $date = new Date($field_value);
709                if ($date->isOK()) {
710                    $delta = 365 * ($modifiers[$field_name] ?? 0);
711                    $query
712                        ->where('date_' . $parts[0] . '.d_fact', '=', $parts[0])
713                        ->where('date_' . $parts[0] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
714                        ->where('date_' . $parts[0] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
715                }
716                unset($fields[$field_name]);
717            } elseif ($parts[0] === 'FAMS' && $parts[2] === 'DATE') {
718                // FAMS:*:DATE
719                $date = new Date($field_value);
720                if ($date->isOK()) {
721                    $delta = 365 * $modifiers[$field_name];
722                    $query
723                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
724                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
725                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
726                }
727                unset($fields[$field_name]);
728            } elseif ($parts[1] === 'PLAC') {
729                // *:PLAC
730                // SQL can only link a place to a person/family, not to an event.
731                $query->where('individual_places.p_place', 'LIKE', '%' . $field_value . '%');
732            } elseif ($parts[0] === 'FAMS' && $parts[2] === 'PLAC') {
733                // FAMS:*:PLAC
734                // SQL can only link a place to a person/family, not to an event.
735                $query->where('family_places.p_place', 'LIKE', '%' . $field_value . '%');
736            } elseif ($parts[0] === 'FAMC' && $parts[2] === 'NAME') {
737                $table = $parts[1] === 'HUSB' ? 'father_name' : 'mother_name';
738                // NAME:*
739                switch ($parts[3]) {
740                    case 'GIVN':
741                        switch ($modifiers[$field_name]) {
742                            case 'EXACT':
743                                $query->where($table . '.n_givn', '=', $field_value);
744                                break;
745                            case 'BEGINS':
746                                $query->where($table . '.n_givn', 'LIKE', $field_value . '%');
747                                break;
748                            case 'CONTAINS':
749                                $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
750                                break;
751                            case 'SDX_STD':
752                                $sdx = Soundex::russell($field_value);
753                                if ($sdx !== '') {
754                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
755                                } else {
756                                    // No phonetic content? Use a substring match
757                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
758                                }
759                                break;
760                            case 'SDX': // SDX uses DM by default.
761                            case 'SDX_DM':
762                                $sdx = Soundex::daitchMokotoff($field_value);
763                                if ($sdx !== '') {
764                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
765                                } else {
766                                    // No phonetic content? Use a substring match
767                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
768                                }
769                                break;
770                        }
771                        break;
772                    case 'SURN':
773                        switch ($modifiers[$field_name]) {
774                            case 'EXACT':
775                                $query->where($table . '.n_surn', '=', $field_value);
776                                break;
777                            case 'BEGINS':
778                                $query->where($table . '.n_surn', 'LIKE', $field_value . '%');
779                                break;
780                            case 'CONTAINS':
781                                $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
782                                break;
783                            case 'SDX_STD':
784                                $sdx = Soundex::russell($field_value);
785                                if ($sdx !== '') {
786                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
787                                } else {
788                                    // No phonetic content? Use a substring match
789                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
790                                }
791                                break;
792                            case 'SDX': // SDX uses DM by default.
793                            case 'SDX_DM':
794                                $sdx = Soundex::daitchMokotoff($field_value);
795                                if ($sdx !== '') {
796                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
797                                } else {
798                                    // No phonetic content? Use a substring match
799                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
800                                }
801                                break;
802                        }
803                        break;
804                }
805                unset($fields[$field_name]);
806            } elseif ($parts[0] === 'FAMS') {
807                // e.g. searches for occupation, religion, note, etc.
808                // Initial matching only.  Need PHP to apply filter.
809                $query->where('spouse_families.f_gedcom', 'LIKE', "%\n1 " . $parts[1] . ' %' . $field_value . '%');
810            } elseif ($parts[1] === 'TYPE') {
811                // e.g. FACT:TYPE or EVEN:TYPE
812                // Initial matching only.  Need PHP to apply filter.
813                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[0] . "%\n2 TYPE %" . $field_value . '%');
814            } else {
815                // e.g. searches for occupation, religion, note, etc.
816                // Initial matching only.  Need PHP to apply filter.
817                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[0] . '%' . $parts[1] . '%' . $field_value . '%');
818            }
819        }
820
821        return $query
822            ->get()
823            ->each($this->rowLimiter())
824            ->map($this->individualRowMapper())
825            ->filter(GedcomRecord::accessFilter())
826            ->filter(static function (Individual $individual) use ($fields): bool {
827                // Check for searches which were only partially matched by SQL
828                foreach ($fields as $field_name => $field_value) {
829                    $parts = explode(':', $field_name . '::::');
830
831                    // NAME:*
832                    if ($parts[0] === 'NAME') {
833                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[1] . ' .*' . preg_quote($field_value, '/') . '/i';
834
835                        if (preg_match($regex, $individual->gedcom())) {
836                            continue;
837                        }
838
839                        return false;
840                    }
841
842                    $regex = '/' . preg_quote($field_value, '/') . '/i';
843
844                    // *:PLAC
845                    if ($parts[1] === 'PLAC') {
846                        foreach ($individual->facts([$parts[0]]) as $fact) {
847                            if (preg_match($regex, $fact->place()->gedcomName())) {
848                                continue 2;
849                            }
850                        }
851                        return false;
852                    }
853
854                    // FAMS:*:PLAC
855                    if ($parts[0] === 'FAMS' && $parts[2] === 'PLAC') {
856                        foreach ($individual->spouseFamilies() as $family) {
857                            foreach ($family->facts([$parts[1]]) as $fact) {
858                                if (preg_match($regex, $fact->place()->gedcomName())) {
859                                    continue 2;
860                                }
861                            }
862                        }
863                        return false;
864                    }
865
866                    // e.g. searches for occupation, religion, note, etc.
867                    if ($parts[0] === 'FAMS') {
868                        foreach ($individual->spouseFamilies() as $family) {
869                            foreach ($family->facts([$parts[1]]) as $fact) {
870                                if (preg_match($regex, $fact->value())) {
871                                    continue 3;
872                                }
873                            }
874                        }
875                        return false;
876                    }
877
878                    // e.g. FACT:TYPE or EVEN:TYPE
879                    if ($parts[1] === 'TYPE' || $parts[1] === '_WT_USER') {
880                        foreach ($individual->facts([$parts[0]]) as $fact) {
881                            if (preg_match($regex, $fact->attribute($parts[1]))) {
882                                continue 2;
883                            }
884                        }
885
886                        return false;
887                    }
888                }
889
890                return true;
891            });
892    }
893
894    /**
895     * @param string $soundex
896     * @param string $lastname
897     * @param string $firstname
898     * @param string $place
899     * @param Tree[] $search_trees
900     *
901     * @return Collection<Individual>
902     */
903    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
904    {
905        switch ($soundex) {
906            default:
907            case 'Russell':
908                $givn_sdx   = Soundex::russell($firstname);
909                $surn_sdx   = Soundex::russell($lastname);
910                $plac_sdx   = Soundex::russell($place);
911                $givn_field = 'n_soundex_givn_std';
912                $surn_field = 'n_soundex_surn_std';
913                $plac_field = 'p_std_soundex';
914                break;
915            case 'DaitchM':
916                $givn_sdx   = Soundex::daitchMokotoff($firstname);
917                $surn_sdx   = Soundex::daitchMokotoff($lastname);
918                $plac_sdx   = Soundex::daitchMokotoff($place);
919                $givn_field = 'n_soundex_givn_dm';
920                $surn_field = 'n_soundex_surn_dm';
921                $plac_field = 'p_dm_soundex';
922                break;
923        }
924
925        // Nothing to search for? Return nothing.
926        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
927            return new Collection();
928        }
929
930        $query = DB::table('individuals')
931            ->select(['individuals.*'])
932            ->distinct();
933
934        $this->whereTrees($query, 'i_file', $search_trees);
935
936        if ($plac_sdx !== '') {
937            $query->join('placelinks', static function (JoinClause $join): void {
938                $join
939                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
940                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
941            });
942            $query->join('places', static function (JoinClause $join): void {
943                $join
944                    ->on('places.p_file', '=', 'placelinks.pl_file')
945                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
946            });
947
948            $this->wherePhonetic($query, $plac_field, $plac_sdx);
949        }
950
951        if ($givn_sdx !== '' || $surn_sdx !== '') {
952            $query->join('name', static function (JoinClause $join): void {
953                $join
954                    ->on('name.n_file', '=', 'individuals.i_file')
955                    ->on('name.n_id', '=', 'individuals.i_id');
956            });
957
958            $this->wherePhonetic($query, $givn_field, $givn_sdx);
959            $this->wherePhonetic($query, $surn_field, $surn_sdx);
960        }
961
962        return $query
963            ->get()
964            ->each($this->rowLimiter())
965            ->map($this->individualRowMapper())
966            ->filter(GedcomRecord::accessFilter());
967    }
968
969    /**
970     * Paginate a search query.
971     *
972     * @param Builder $query      Searches the database for the desired records.
973     * @param Closure $row_mapper Converts a row from the query into a record.
974     * @param Closure $row_filter
975     * @param int     $offset     Skip this many rows.
976     * @param int     $limit      Take this many rows.
977     *
978     * @return Collection<mixed>
979     */
980    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
981    {
982        $collection = new Collection();
983
984        foreach ($query->cursor() as $row) {
985            $record = $row_mapper($row);
986            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
987            // where individuals have multiple names - and we need to sort results by name.
988            if ($collection->containsStrict($record)) {
989                continue;
990            }
991            // If the object has a method "canShow()", then use it to filter for privacy.
992            if ($row_filter($record)) {
993                if ($offset > 0) {
994                    $offset--;
995                } else {
996                    if ($limit > 0) {
997                        $collection->push($record);
998                    }
999
1000                    $limit--;
1001
1002                    if ($limit === 0) {
1003                        break;
1004                    }
1005                }
1006            }
1007        }
1008
1009
1010        return $collection;
1011    }
1012
1013    /**
1014     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
1015     *
1016     * @param Builder           $query
1017     * @param Expression|string $field
1018     * @param string[]          $search_terms
1019     */
1020    private function whereSearch(Builder $query, $field, array $search_terms): void
1021    {
1022        if ($field instanceof Expression) {
1023            $field = $field->getValue();
1024        }
1025
1026        foreach ($search_terms as $search_term) {
1027            $query->where(new Expression($field), 'LIKE', '%' . addcslashes($search_term, '\\%_') . '%');
1028        }
1029    }
1030
1031    /**
1032     * Apply soundex search filters to a SQL query column.
1033     *
1034     * @param Builder           $query
1035     * @param Expression|string $field
1036     * @param string            $soundex
1037     */
1038    private function wherePhonetic(Builder $query, $field, string $soundex): void
1039    {
1040        if ($soundex !== '') {
1041            $query->where(static function (Builder $query) use ($soundex, $field): void {
1042                foreach (explode(':', $soundex) as $sdx) {
1043                    $query->orWhere($field, 'LIKE', '%' . $sdx . '%');
1044                }
1045            });
1046        }
1047    }
1048
1049    /**
1050     * @param Builder $query
1051     * @param string  $tree_id_field
1052     * @param Tree[]  $trees
1053     */
1054    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1055    {
1056        $tree_ids = array_map(static function (Tree $tree): int {
1057            return $tree->id();
1058        }, $trees);
1059
1060        $query->whereIn($tree_id_field, $tree_ids);
1061    }
1062
1063    /**
1064     * Find the media object that uses a particular media file.
1065     *
1066     * @param string $file
1067     *
1068     * @return Media[]
1069     */
1070    public function findMediaObjectsForMediaFile(string $file): array
1071    {
1072        return DB::table('media')
1073            ->join('media_file', static function (JoinClause $join): void {
1074                $join
1075                    ->on('media_file.m_file', '=', 'media.m_file')
1076                    ->on('media_file.m_id', '=', 'media.m_id');
1077            })
1078            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1079            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1080            ->select(['media.*'])
1081            ->distinct()
1082            ->get()
1083            ->map($this->mediaRowMapper())
1084            ->all();
1085    }
1086
1087    /**
1088     * A closure to filter records by privacy-filtered GEDCOM data.
1089     *
1090     * @param array<string> $search_terms
1091     *
1092     * @return Closure
1093     */
1094    private function rawGedcomFilter(array $search_terms): Closure
1095    {
1096        return static function (GedcomRecord $record) use ($search_terms): bool {
1097            // Ignore non-genealogy fields
1098            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1099
1100            // Ignore matches in links
1101            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1102
1103            // Re-apply the filtering
1104            foreach ($search_terms as $search_term) {
1105                if (mb_stripos($gedcom, $search_term) === false) {
1106                    return false;
1107                }
1108            }
1109
1110            return true;
1111        };
1112    }
1113
1114    /**
1115     * Searching for short or common text can give more results than the system can process.
1116     *
1117     * @param int $limit
1118     *
1119     * @return Closure
1120     */
1121    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1122    {
1123        return static function () use ($limit): void {
1124            static $n = 0;
1125
1126            if (++$n > $limit) {
1127                $message = I18N::translate('The search returned too many results.');
1128
1129                throw new HttpServiceUnavailableException($message);
1130            }
1131        };
1132    }
1133
1134    /**
1135     * Convert a row from any tree in the families table into a family object.
1136     *
1137     * @return Closure
1138     */
1139    private function familyRowMapper(): Closure
1140    {
1141        return function (stdClass $row): Family {
1142            $tree = $this->tree_service->find((int) $row->f_file);
1143
1144            return Registry::familyFactory()->mapper($tree)($row);
1145        };
1146    }
1147
1148    /**
1149     * Convert a row from any tree in the individuals table into an individual object.
1150     *
1151     * @return Closure
1152     */
1153    private function individualRowMapper(): Closure
1154    {
1155        return function (stdClass $row): Individual {
1156            $tree = $this->tree_service->find((int) $row->i_file);
1157
1158            return Registry::individualFactory()->mapper($tree)($row);
1159        };
1160    }
1161
1162    /**
1163     * Convert a row from any tree in the media table into an media object.
1164     *
1165     * @return Closure
1166     */
1167    private function mediaRowMapper(): Closure
1168    {
1169        return function (stdClass $row): Media {
1170            $tree = $this->tree_service->find((int) $row->m_file);
1171
1172            return Registry::mediaFactory()->mapper($tree)($row);
1173        };
1174    }
1175
1176    /**
1177     * Convert a row from any tree in the other table into a note object.
1178     *
1179     * @return Closure
1180     */
1181    private function noteRowMapper(): Closure
1182    {
1183        return function (stdClass $row): Note {
1184            $tree = $this->tree_service->find((int) $row->o_file);
1185
1186            return Registry::noteFactory()->mapper($tree)($row);
1187        };
1188    }
1189
1190    /**
1191     * Convert a row from any tree in the other table into a repository object.
1192     *
1193     * @return Closure
1194     */
1195    private function repositoryRowMapper(): Closure
1196    {
1197        return function (stdClass $row): Repository {
1198            $tree = $this->tree_service->find((int) $row->o_file);
1199
1200            return Registry::repositoryFactory()->mapper($tree)($row);
1201        };
1202    }
1203
1204    /**
1205     * Convert a row from any tree in the sources table into a source object.
1206     *
1207     * @return Closure
1208     */
1209    private function sourceRowMapper(): Closure
1210    {
1211        return function (stdClass $row): Source {
1212            $tree = $this->tree_service->find((int) $row->s_file);
1213
1214            return Registry::sourceFactory()->mapper($tree)($row);
1215        };
1216    }
1217
1218    /**
1219     * Convert a row from any tree in the other table into a submitter object.
1220     *
1221     * @return Closure
1222     */
1223    private function submitterRowMapper(): Closure
1224    {
1225        return function (stdClass $row): Submitter {
1226            $tree = $this->tree_service->find((int) $row->o_file);
1227
1228            return Registry::submitterFactory()->mapper($tree)($row);
1229        };
1230    }
1231}
1232