xref: /webtrees/app/Services/SearchService.php (revision b45eba7a8f95143507ff14772c1e2a243489ae21)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2020 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Exceptions\HttpServiceUnavailableException;
25use Fisharebest\Webtrees\Factory;
26use Fisharebest\Webtrees\Family;
27use Fisharebest\Webtrees\Gedcom;
28use Fisharebest\Webtrees\GedcomRecord;
29use Fisharebest\Webtrees\I18N;
30use Fisharebest\Webtrees\Individual;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Place;
34use Fisharebest\Webtrees\Repository;
35use Fisharebest\Webtrees\Soundex;
36use Fisharebest\Webtrees\Source;
37use Fisharebest\Webtrees\Submitter;
38use Fisharebest\Webtrees\Tree;
39use Illuminate\Database\Capsule\Manager as DB;
40use Illuminate\Database\Query\Builder;
41use Illuminate\Database\Query\Expression;
42use Illuminate\Database\Query\JoinClause;
43use Illuminate\Support\Collection;
44use stdClass;
45
46use function addcslashes;
47use function array_filter;
48use function array_map;
49use function array_unique;
50use function explode;
51use function implode;
52use function mb_stripos;
53use function preg_match;
54use function preg_quote;
55use function preg_replace;
56
57use const PHP_INT_MAX;
58
59/**
60 * Search trees for genealogy records.
61 */
62class SearchService
63{
64    // Do not attempt to show search results larger than this/
65    protected const MAX_SEARCH_RESULTS = 5000;
66
67    /** @var TreeService */
68    private $tree_service;
69
70    /**
71     * SearchService constructor.
72     *
73     * @param TreeService $tree_service
74     */
75    public function __construct(
76        TreeService $tree_service
77    ) {
78        $this->tree_service = $tree_service;
79    }
80
81    /**
82     * @param Tree[]   $trees
83     * @param string[] $search
84     *
85     * @return Collection<Family>
86     */
87    public function searchFamilies(array $trees, array $search): Collection
88    {
89        $query = DB::table('families');
90
91        $this->whereTrees($query, 'f_file', $trees);
92        $this->whereSearch($query, 'f_gedcom', $search);
93
94        return $query
95            ->get()
96            ->each($this->rowLimiter())
97            ->map($this->familyRowMapper())
98            ->filter(GedcomRecord::accessFilter())
99            ->filter($this->rawGedcomFilter($search));
100    }
101
102    /**
103     * Search for families by name.
104     *
105     * @param Tree[]   $trees
106     * @param string[] $search
107     * @param int      $offset
108     * @param int      $limit
109     *
110     * @return Collection<Family>
111     */
112    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
113    {
114        $query = DB::table('families')
115            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
116                $join
117                    ->on('husb_name.n_file', '=', 'families.f_file')
118                    ->on('husb_name.n_id', '=', 'families.f_husb')
119                    ->where('husb_name.n_type', '<>', '_MARNM');
120            })
121            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
122                $join
123                    ->on('wife_name.n_file', '=', 'families.f_file')
124                    ->on('wife_name.n_id', '=', 'families.f_wife')
125                    ->where('wife_name.n_type', '<>', '_MARNM');
126            });
127
128        $prefix = DB::connection()->getTablePrefix();
129        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
130
131        $this->whereTrees($query, 'f_file', $trees);
132        $this->whereSearch($query, $field, $search);
133
134        $query
135            ->orderBy('husb_name.n_sort')
136            ->orderBy('wife_name.n_sort')
137            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort']);
138
139        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
140    }
141
142    /**
143     * @param Place $place
144     *
145     * @return Collection<Family>
146     */
147    public function searchFamiliesInPlace(Place $place): Collection
148    {
149        return DB::table('families')
150            ->join('placelinks', static function (JoinClause $query) {
151                $query
152                    ->on('families.f_file', '=', 'placelinks.pl_file')
153                    ->on('families.f_id', '=', 'placelinks.pl_gid');
154            })
155            ->where('f_file', '=', $place->tree()->id())
156            ->where('pl_p_id', '=', $place->id())
157            ->select(['families.*'])
158            ->get()
159            ->each($this->rowLimiter())
160            ->map($this->familyRowMapper())
161            ->filter(GedcomRecord::accessFilter());
162    }
163
164    /**
165     * @param Tree[]   $trees
166     * @param string[] $search
167     *
168     * @return Collection<Individual>
169     */
170    public function searchIndividuals(array $trees, array $search): Collection
171    {
172        $query = DB::table('individuals');
173
174        $this->whereTrees($query, 'i_file', $trees);
175        $this->whereSearch($query, 'i_gedcom', $search);
176
177        return $query
178            ->get()
179            ->each($this->rowLimiter())
180            ->map($this->individualRowMapper())
181            ->filter(GedcomRecord::accessFilter())
182            ->filter($this->rawGedcomFilter($search));
183    }
184
185    /**
186     * Search for individuals by name.
187     *
188     * @param Tree[]   $trees
189     * @param string[] $search
190     * @param int      $offset
191     * @param int      $limit
192     *
193     * @return Collection<Individual>
194     */
195    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
196    {
197        $query = DB::table('individuals')
198            ->join('name', static function (JoinClause $join): void {
199                $join
200                    ->on('name.n_file', '=', 'individuals.i_file')
201                    ->on('name.n_id', '=', 'individuals.i_id');
202            })
203            ->orderBy('n_sort')
204            ->select(['individuals.*', 'n_sort']);
205
206        $this->whereTrees($query, 'i_file', $trees);
207        $this->whereSearch($query, 'n_full', $search);
208
209        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
210    }
211
212    /**
213     * @param Place $place
214     *
215     * @return Collection<Individual>
216     */
217    public function searchIndividualsInPlace(Place $place): Collection
218    {
219        return DB::table('individuals')
220            ->join('placelinks', static function (JoinClause $join) {
221                $join
222                    ->on('i_file', '=', 'pl_file')
223                    ->on('i_id', '=', 'pl_gid');
224            })
225            ->where('i_file', '=', $place->tree()->id())
226            ->where('pl_p_id', '=', $place->id())
227            ->select(['individuals.*'])
228            ->get()
229            ->each($this->rowLimiter())
230            ->map($this->individualRowMapper())
231            ->filter(GedcomRecord::accessFilter());
232    }
233
234    /**
235     * Search for media objects.
236     *
237     * @param Tree[]   $trees
238     * @param string[] $search
239     * @param int      $offset
240     * @param int      $limit
241     *
242     * @return Collection<Media>
243     */
244    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
245    {
246        $query = DB::table('media');
247
248        $this->whereTrees($query, 'media.m_file', $trees);
249        $this->whereSearch($query, 'm_gedcom', $search);
250
251        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
252    }
253
254    /**
255     * Search for notes.
256     *
257     * @param Tree[]   $trees
258     * @param string[] $search
259     * @param int      $offset
260     * @param int      $limit
261     *
262     * @return Collection<Note>
263     */
264    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
265    {
266        $query = DB::table('other')
267            ->where('o_type', '=', 'NOTE');
268
269        $this->whereTrees($query, 'o_file', $trees);
270        $this->whereSearch($query, 'o_gedcom', $search);
271
272        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
273    }
274
275    /**
276     * Search for repositories.
277     *
278     * @param Tree[]   $trees
279     * @param string[] $search
280     * @param int      $offset
281     * @param int      $limit
282     *
283     * @return Collection<Repository>
284     */
285    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
286    {
287        $query = DB::table('other')
288            ->where('o_type', '=', 'REPO');
289
290        $this->whereTrees($query, 'o_file', $trees);
291        $this->whereSearch($query, 'o_gedcom', $search);
292
293        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
294    }
295
296    /**
297     * Search for sources.
298     *
299     * @param Tree[]   $trees
300     * @param string[] $search
301     * @param int      $offset
302     * @param int      $limit
303     *
304     * @return Collection<Source>
305     */
306    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
307    {
308        $query = DB::table('sources');
309
310        $this->whereTrees($query, 's_file', $trees);
311        $this->whereSearch($query, 's_gedcom', $search);
312
313        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
314    }
315
316    /**
317     * Search for sources by name.
318     *
319     * @param Tree[]   $trees
320     * @param string[] $search
321     * @param int      $offset
322     * @param int      $limit
323     *
324     * @return Collection<Source>
325     */
326    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
327    {
328        $query = DB::table('sources')
329            ->orderBy('s_name');
330
331        $this->whereTrees($query, 's_file', $trees);
332        $this->whereSearch($query, 's_name', $search);
333
334        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
335    }
336
337    /**
338     * Search for submitters.
339     *
340     * @param Tree[]   $trees
341     * @param string[] $search
342     * @param int      $offset
343     * @param int      $limit
344     *
345     * @return Collection<Submitter>
346     */
347    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
348    {
349        $query = DB::table('other')
350            ->where('o_type', '=', 'SUBM');
351
352        $this->whereTrees($query, 'o_file', $trees);
353        $this->whereSearch($query, 'o_gedcom', $search);
354
355        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
356    }
357
358    /**
359     * Search for places.
360     *
361     * @param Tree   $tree
362     * @param string $search
363     * @param int    $offset
364     * @param int    $limit
365     *
366     * @return Collection<Place>
367     */
368    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
369    {
370        $query = DB::table('places AS p0')
371            ->where('p0.p_file', '=', $tree->id())
372            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
373            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
374            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
375            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
376            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
377            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
378            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
379            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
380            ->orderBy('p0.p_place')
381            ->orderBy('p1.p_place')
382            ->orderBy('p2.p_place')
383            ->orderBy('p3.p_place')
384            ->orderBy('p4.p_place')
385            ->orderBy('p5.p_place')
386            ->orderBy('p6.p_place')
387            ->orderBy('p7.p_place')
388            ->orderBy('p8.p_place')
389            ->select([
390                'p0.p_place AS place0',
391                'p1.p_place AS place1',
392                'p2.p_place AS place2',
393                'p3.p_place AS place3',
394                'p4.p_place AS place4',
395                'p5.p_place AS place5',
396                'p6.p_place AS place6',
397                'p7.p_place AS place7',
398                'p8.p_place AS place8',
399            ]);
400
401        // Filter each level of the hierarchy.
402        foreach (explode(',', $search, 9) as $level => $string) {
403            $query->where('p' . $level . '.p_place', 'LIKE', '%' . addcslashes($string, '\\%_') . '%');
404        }
405
406        $row_mapper = static function (stdClass $row) use ($tree): Place {
407            $place = implode(', ', array_filter((array) $row));
408
409            return new Place($place, $tree);
410        };
411
412        $filter = static function (): bool {
413            return true;
414        };
415
416        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
417    }
418
419    /**
420     * @param Tree[]   $trees
421     * @param string[] $fields
422     * @param string[] $modifiers
423     *
424     * @return Collection<Individual>
425     */
426    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
427    {
428        $fields = array_filter($fields);
429
430        $query = DB::table('individuals')
431            ->select(['individuals.*'])
432            ->distinct();
433
434        $this->whereTrees($query, 'i_file', $trees);
435
436        // Join the following tables
437        $father_name   = false;
438        $mother_name   = false;
439        $spouse_family = false;
440        $indi_name     = false;
441        $indi_dates    = [];
442        $fam_dates     = [];
443        $indi_plac     = false;
444        $fam_plac      = false;
445
446        foreach ($fields as $field_name => $field_value) {
447            if ($field_value !== '') {
448                // Fields can have up to 4 parts, but we only need the first 3 to identify
449                // which tables to select
450                $field_parts = explode(':', $field_name . '::');
451
452                if ($field_parts[0] === 'FAMC') {
453                    // Parent name - FAMC:[HUSB|WIFE]:NAME:[GIVN|SURN]
454                    if ($field_parts[1] === 'HUSB') {
455                        $father_name = true;
456                    } else {
457                        $mother_name = true;
458                    }
459                } elseif ($field_parts[0] === 'NAME') {
460                    // Individual name - NAME:[GIVN|SURN]
461                    $indi_name = true;
462                } elseif ($field_parts[0] === 'FAMS') {
463                    // Family facts - FAMS:NOTE or FAMS:[FACT]:[DATE|PLAC]
464                    $spouse_family = true;
465                    if ($field_parts[2] === 'DATE') {
466                        $fam_dates[] = $field_parts[1];
467                    } elseif ($field_parts[2] === 'PLAC') {
468                        $fam_plac = true;
469                    }
470                } else {
471                    // Individual facts - [FACT] or [FACT]:[DATE|PLAC]
472                    if ($field_parts[1] === 'DATE') {
473                        $indi_dates[] = $field_parts[0];
474                    } elseif ($field_parts[1] === 'PLAC') {
475                        $indi_plac = true;
476                    }
477                }
478            }
479        }
480
481        if ($father_name || $mother_name) {
482            $query->join('link AS l1', static function (JoinClause $join): void {
483                $join
484                    ->on('l1.l_file', '=', 'individuals.i_file')
485                    ->on('l1.l_from', '=', 'individuals.i_id')
486                    ->where('l1.l_type', '=', 'FAMC');
487            });
488
489            if ($father_name) {
490                $query->join('link AS l2', static function (JoinClause $join): void {
491                    $join
492                        ->on('l2.l_file', '=', 'l1.l_file')
493                        ->on('l2.l_from', '=', 'l1.l_to')
494                        ->where('l2.l_type', '=', 'HUSB');
495                });
496                $query->join('name AS father_name', static function (JoinClause $join): void {
497                    $join
498                        ->on('father_name.n_file', '=', 'l2.l_file')
499                        ->on('father_name.n_id', '=', 'l2.l_to');
500                });
501            }
502
503            if ($mother_name) {
504                $query->join('link AS l3', static function (JoinClause $join): void {
505                    $join
506                        ->on('l3.l_file', '=', 'l1.l_file')
507                        ->on('l3.l_from', '=', 'l1.l_to')
508                        ->where('l3.l_type', '=', 'WIFE');
509                });
510                $query->join('name AS mother_name', static function (JoinClause $join): void {
511                    $join
512                        ->on('mother_name.n_file', '=', 'l3.l_file')
513                        ->on('mother_name.n_id', '=', 'l3.l_to');
514                });
515            }
516        }
517
518        if ($spouse_family) {
519            $query->join('link AS l4', static function (JoinClause $join): void {
520                $join
521                    ->on('l4.l_file', '=', 'individuals.i_file')
522                    ->on('l4.l_from', '=', 'individuals.i_id')
523                    ->where('l4.l_type', '=', 'FAMS');
524            });
525            $query->join('families AS spouse_families', static function (JoinClause $join): void {
526                $join
527                    ->on('spouse_families.f_file', '=', 'l4.l_file')
528                    ->on('spouse_families.f_id', '=', 'l4.l_to');
529            });
530        }
531
532        if ($indi_name) {
533            $query->join('name AS individual_name', static function (JoinClause $join): void {
534                $join
535                    ->on('individual_name.n_file', '=', 'individuals.i_file')
536                    ->on('individual_name.n_id', '=', 'individuals.i_id');
537            });
538        }
539
540        foreach (array_unique($indi_dates) as $indi_date) {
541            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
542                $join
543                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
544                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
545            });
546        }
547
548        foreach (array_unique($fam_dates) as $fam_date) {
549            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
550                $join
551                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
552                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
553            });
554        }
555
556        if ($indi_plac) {
557            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
558                $join
559                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
560                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
561            });
562            $query->join('places AS individual_places', static function (JoinClause $join): void {
563                $join
564                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
565                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
566            });
567        }
568
569        if ($fam_plac) {
570            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
571                $join
572                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
573                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
574            });
575            $query->join('places AS family_places', static function (JoinClause $join): void {
576                $join
577                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
578                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
579            });
580        }
581
582        foreach ($fields as $field_name => $field_value) {
583            $parts = explode(':', $field_name . ':::');
584            if ($parts[0] === 'NAME') {
585                // NAME:*
586                switch ($parts[1]) {
587                    case 'GIVN':
588                        switch ($modifiers[$field_name]) {
589                            case 'EXACT':
590                                $query->where('individual_name.n_givn', '=', $field_value);
591                                break;
592                            case 'BEGINS':
593                                $query->where('individual_name.n_givn', 'LIKE', $field_value . '%');
594                                break;
595                            case 'CONTAINS':
596                                $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
597                                break;
598                            case 'SDX_STD':
599                                $sdx = Soundex::russell($field_value);
600                                if ($sdx !== '') {
601                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
602                                } else {
603                                    // No phonetic content? Use a substring match
604                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
605                                }
606                                break;
607                            case 'SDX': // SDX uses DM by default.
608                            case 'SDX_DM':
609                                $sdx = Soundex::daitchMokotoff($field_value);
610                                if ($sdx !== '') {
611                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
612                                } else {
613                                    // No phonetic content? Use a substring match
614                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
615                                }
616                                break;
617                        }
618                        unset($fields[$field_name]);
619                        break;
620                    case 'SURN':
621                        switch ($modifiers[$field_name]) {
622                            case 'EXACT':
623                                $query->where(function (Builder $query) use ($field_value): void {
624                                    $query
625                                        ->where('individual_name.n_surn', '=', $field_value)
626                                        ->orWhere('individual_name.n_surname', '=', $field_value);
627                                });
628                                break;
629                            case 'BEGINS':
630                                $query->where(function (Builder $query) use ($field_value): void {
631                                    $query
632                                        ->where('individual_name.n_surn', 'LIKE', $field_value . '%')
633                                        ->orWhere('individual_name.n_surname', 'LIKE', $field_value . '%');
634                                });
635                                break;
636                            case 'CONTAINS':
637                                $query->where(function (Builder $query) use ($field_value): void {
638                                    $query
639                                        ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
640                                        ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
641                                });
642                                break;
643                            case 'SDX_STD':
644                                $sdx = Soundex::russell($field_value);
645                                if ($sdx !== '') {
646                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
647                                } else {
648                                    // No phonetic content? Use a substring match
649                                    $query->where(function (Builder $query) use ($field_value): void {
650                                        $query
651                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
652                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
653                                    });
654                                }
655                                break;
656                            case 'SDX': // SDX uses DM by default.
657                            case 'SDX_DM':
658                                $sdx = Soundex::daitchMokotoff($field_value);
659                                if ($sdx !== '') {
660                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
661                                } else {
662                                    // No phonetic content? Use a substring match
663                                    $query->where(function (Builder $query) use ($field_value): void {
664                                        $query
665                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
666                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
667                                    });
668                                }
669                                break;
670                        }
671                        unset($fields[$field_name]);
672                        break;
673                    case 'NICK':
674                    case '_MARNM':
675                    case '_HEB':
676                    case '_AKA':
677                        $like = "%\n1 " . $parts[0] . "%\n2 " . $parts[1] . ' %' . preg_quote($field_value, '/') . '%';
678                        $query->where('individuals.i_gedcom', 'LIKE', $like);
679                        break;
680                }
681            } elseif ($parts[1] === 'DATE') {
682                // *:DATE
683                $date = new Date($field_value);
684                if ($date->isOK()) {
685                    $delta = 365 * ($modifiers[$field_name] ?? 0);
686                    $query
687                        ->where('date_' . $parts[0] . '.d_fact', '=', $parts[0])
688                        ->where('date_' . $parts[0] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
689                        ->where('date_' . $parts[0] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
690                }
691                unset($fields[$field_name]);
692            } elseif ($parts[0] === 'FAMS' && $parts[2] === 'DATE') {
693                // FAMS:*:DATE
694                $date = new Date($field_value);
695                if ($date->isOK()) {
696                    $delta = 365 * $modifiers[$field_name];
697                    $query
698                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
699                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
700                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
701                }
702                unset($fields[$field_name]);
703            } elseif ($parts[1] === 'PLAC') {
704                // *:PLAC
705                // SQL can only link a place to a person/family, not to an event.
706                $query->where('individual_places.p_place', 'LIKE', '%' . $field_value . '%');
707            } elseif ($parts[0] === 'FAMS' && $parts[2] === 'PLAC') {
708                // FAMS:*:PLAC
709                // SQL can only link a place to a person/family, not to an event.
710                $query->where('family_places.p_place', 'LIKE', '%' . $field_value . '%');
711            } elseif ($parts[0] === 'FAMC' && $parts[2] === 'NAME') {
712                $table = $parts[1] === 'HUSB' ? 'father_name' : 'mother_name';
713                // NAME:*
714                switch ($parts[3]) {
715                    case 'GIVN':
716                        switch ($modifiers[$field_name]) {
717                            case 'EXACT':
718                                $query->where($table . '.n_givn', '=', $field_value);
719                                break;
720                            case 'BEGINS':
721                                $query->where($table . '.n_givn', 'LIKE', $field_value . '%');
722                                break;
723                            case 'CONTAINS':
724                                $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
725                                break;
726                            case 'SDX_STD':
727                                $sdx = Soundex::russell($field_value);
728                                if ($sdx !== '') {
729                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
730                                } else {
731                                    // No phonetic content? Use a substring match
732                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
733                                }
734                                break;
735                            case 'SDX': // SDX uses DM by default.
736                            case 'SDX_DM':
737                                $sdx = Soundex::daitchMokotoff($field_value);
738                                if ($sdx !== '') {
739                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
740                                } else {
741                                    // No phonetic content? Use a substring match
742                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
743                                }
744                                break;
745                        }
746                        break;
747                    case 'SURN':
748                        switch ($modifiers[$field_name]) {
749                            case 'EXACT':
750                                $query->where($table . '.n_surn', '=', $field_value);
751                                break;
752                            case 'BEGINS':
753                                $query->where($table . '.n_surn', 'LIKE', $field_value . '%');
754                                break;
755                            case 'CONTAINS':
756                                $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
757                                break;
758                            case 'SDX_STD':
759                                $sdx = Soundex::russell($field_value);
760                                if ($sdx !== '') {
761                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
762                                } else {
763                                    // No phonetic content? Use a substring match
764                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
765                                }
766                                break;
767                            case 'SDX': // SDX uses DM by default.
768                            case 'SDX_DM':
769                                $sdx = Soundex::daitchMokotoff($field_value);
770                                if ($sdx !== '') {
771                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
772                                } else {
773                                    // No phonetic content? Use a substring match
774                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
775                                }
776                                break;
777                        }
778                        break;
779                }
780                unset($fields[$field_name]);
781            } elseif ($parts[0] === 'FAMS') {
782                // e.g. searches for occupation, religion, note, etc.
783                // Initial matching only.  Need PHP to apply filter.
784                $query->where('spouse_families.f_gedcom', 'LIKE', "%\n1 " . $parts[1] . ' %' . $field_value . '%');
785            } elseif ($parts[1] === 'TYPE') {
786                // e.g. FACT:TYPE or EVEN:TYPE
787                // Initial matching only.  Need PHP to apply filter.
788                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[0] . "%\n2 TYPE %" . $field_value . '%');
789            } else {
790                // e.g. searches for occupation, religion, note, etc.
791                // Initial matching only.  Need PHP to apply filter.
792                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[0] . '%' . $parts[1] . '%' . $field_value . '%');
793            }
794        }
795
796        return $query
797            ->get()
798            ->each($this->rowLimiter())
799            ->map($this->individualRowMapper())
800            ->filter(GedcomRecord::accessFilter())
801            ->filter(static function (Individual $individual) use ($fields): bool {
802                // Check for searches which were only partially matched by SQL
803                foreach ($fields as $field_name => $field_value) {
804                    $parts = explode(':', $field_name . '::::');
805
806                    // NAME:*
807                    if ($parts[0] === 'NAME') {
808                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[1] . ' .*' . preg_quote($field_value, '/') . '/i';
809
810                        if (preg_match($regex, $individual->gedcom())) {
811                            continue;
812                        }
813
814                        return false;
815                    }
816
817                    $regex = '/' . preg_quote($field_value, '/') . '/i';
818
819                    // *:PLAC
820                    if ($parts[1] === 'PLAC') {
821                        foreach ($individual->facts([$parts[0]]) as $fact) {
822                            if (preg_match($regex, $fact->place()->gedcomName())) {
823                                continue 2;
824                            }
825                        }
826                        return false;
827                    }
828
829                    // FAMS:*:PLAC
830                    if ($parts[0] === 'FAMS' && $parts[2] === 'PLAC') {
831                        foreach ($individual->spouseFamilies() as $family) {
832                            foreach ($family->facts([$parts[1]]) as $fact) {
833                                if (preg_match($regex, $fact->place()->gedcomName())) {
834                                    continue 2;
835                                }
836                            }
837                        }
838                        return false;
839                    }
840
841                    // e.g. searches for occupation, religion, note, etc.
842                    if ($parts[0] === 'FAMS') {
843                        foreach ($individual->spouseFamilies() as $family) {
844                            foreach ($family->facts([$parts[1]]) as $fact) {
845                                if (preg_match($regex, $fact->value())) {
846                                    continue 3;
847                                }
848                            }
849                        }
850                        return false;
851                    }
852
853                    // e.g. FACT:TYPE or EVEN:TYPE
854                    if ($parts[1] === 'TYPE' || $parts[1] === '_WT_USER') {
855                        foreach ($individual->facts([$parts[0]]) as $fact) {
856                            if (preg_match($regex, $fact->attribute($parts[1]))) {
857                                continue 2;
858                            }
859                        }
860
861                        return false;
862                    }
863                }
864
865                return true;
866            });
867    }
868
869    /**
870     * @param string $soundex
871     * @param string $lastname
872     * @param string $firstname
873     * @param string $place
874     * @param Tree[] $search_trees
875     *
876     * @return Collection<Individual>
877     */
878    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
879    {
880        switch ($soundex) {
881            default:
882            case 'Russell':
883                $givn_sdx   = Soundex::russell($firstname);
884                $surn_sdx   = Soundex::russell($lastname);
885                $plac_sdx   = Soundex::russell($place);
886                $givn_field = 'n_soundex_givn_std';
887                $surn_field = 'n_soundex_surn_std';
888                $plac_field = 'p_std_soundex';
889                break;
890            case 'DaitchM':
891                $givn_sdx   = Soundex::daitchMokotoff($firstname);
892                $surn_sdx   = Soundex::daitchMokotoff($lastname);
893                $plac_sdx   = Soundex::daitchMokotoff($place);
894                $givn_field = 'n_soundex_givn_dm';
895                $surn_field = 'n_soundex_surn_dm';
896                $plac_field = 'p_dm_soundex';
897                break;
898        }
899
900        // Nothing to search for? Return nothing.
901        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
902            return new Collection();
903        }
904
905        $query = DB::table('individuals')
906            ->select(['individuals.*'])
907            ->distinct();
908
909        $this->whereTrees($query, 'i_file', $search_trees);
910
911        if ($plac_sdx !== '') {
912            $query->join('placelinks', static function (JoinClause $join): void {
913                $join
914                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
915                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
916            });
917            $query->join('places', static function (JoinClause $join): void {
918                $join
919                    ->on('places.p_file', '=', 'placelinks.pl_file')
920                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
921            });
922
923            $this->wherePhonetic($query, $plac_field, $plac_sdx);
924        }
925
926        if ($givn_sdx !== '' || $surn_sdx !== '') {
927            $query->join('name', static function (JoinClause $join): void {
928                $join
929                    ->on('name.n_file', '=', 'individuals.i_file')
930                    ->on('name.n_id', '=', 'individuals.i_id');
931            });
932
933            $this->wherePhonetic($query, $givn_field, $givn_sdx);
934            $this->wherePhonetic($query, $surn_field, $surn_sdx);
935        }
936
937        return $query
938            ->get()
939            ->each($this->rowLimiter())
940            ->map($this->individualRowMapper())
941            ->filter(GedcomRecord::accessFilter());
942    }
943
944    /**
945     * Paginate a search query.
946     *
947     * @param Builder $query      Searches the database for the desired records.
948     * @param Closure $row_mapper Converts a row from the query into a record.
949     * @param Closure $row_filter
950     * @param int     $offset     Skip this many rows.
951     * @param int     $limit      Take this many rows.
952     *
953     * @return Collection<mixed>
954     */
955    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
956    {
957        $collection = new Collection();
958
959        foreach ($query->cursor() as $row) {
960            $record = $row_mapper($row);
961            // searchIndividualNames() and searchFamilyNames() can return duplicate rows,
962            // where individuals have multiple names - and we need to sort results by name.
963            if ($collection->containsStrict($record)) {
964                continue;
965            }
966            // If the object has a method "canShow()", then use it to filter for privacy.
967            if ($row_filter($record)) {
968                if ($offset > 0) {
969                    $offset--;
970                } else {
971                    if ($limit > 0) {
972                        $collection->push($record);
973                    }
974
975                    $limit--;
976
977                    if ($limit === 0) {
978                        break;
979                    }
980                }
981            }
982        }
983
984
985        return $collection;
986    }
987
988    /**
989     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
990     *
991     * @param Builder           $query
992     * @param Expression|string $field
993     * @param string[]          $search_terms
994     */
995    private function whereSearch(Builder $query, $field, array $search_terms): void
996    {
997        if ($field instanceof Expression) {
998            $field = $field->getValue();
999        }
1000
1001        foreach ($search_terms as $search_term) {
1002            $query->where(new Expression($field), 'LIKE', '%' . addcslashes($search_term, '\\%_') . '%');
1003        }
1004    }
1005
1006    /**
1007     * Apply soundex search filters to a SQL query column.
1008     *
1009     * @param Builder           $query
1010     * @param Expression|string $field
1011     * @param string            $soundex
1012     */
1013    private function wherePhonetic(Builder $query, $field, string $soundex): void
1014    {
1015        if ($soundex !== '') {
1016            $query->where(static function (Builder $query) use ($soundex, $field): void {
1017                foreach (explode(':', $soundex) as $sdx) {
1018                    $query->orWhere($field, 'LIKE', '%' . $sdx . '%');
1019                }
1020            });
1021        }
1022    }
1023
1024    /**
1025     * @param Builder $query
1026     * @param string  $tree_id_field
1027     * @param Tree[]  $trees
1028     */
1029    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1030    {
1031        $tree_ids = array_map(static function (Tree $tree): int {
1032            return $tree->id();
1033        }, $trees);
1034
1035        $query->whereIn($tree_id_field, $tree_ids);
1036    }
1037
1038    /**
1039     * Find the media object that uses a particular media file.
1040     *
1041     * @param string $file
1042     *
1043     * @return Media[]
1044     */
1045    public function findMediaObjectsForMediaFile(string $file): array
1046    {
1047        return DB::table('media')
1048            ->join('media_file', static function (JoinClause $join): void {
1049                $join
1050                    ->on('media_file.m_file', '=', 'media.m_file')
1051                    ->on('media_file.m_id', '=', 'media.m_id');
1052            })
1053            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1054            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1055            ->select(['media.*'])
1056            ->distinct()
1057            ->get()
1058            ->map($this->mediaRowMapper())
1059            ->all();
1060    }
1061
1062    /**
1063     * A closure to filter records by privacy-filtered GEDCOM data.
1064     *
1065     * @param array<string> $search_terms
1066     *
1067     * @return Closure
1068     */
1069    private function rawGedcomFilter(array $search_terms): Closure
1070    {
1071        return static function (GedcomRecord $record) use ($search_terms): bool {
1072            // Ignore non-genealogy fields
1073            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1074
1075            // Ignore matches in links
1076            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1077
1078            // Re-apply the filtering
1079            foreach ($search_terms as $search_term) {
1080                if (mb_stripos($gedcom, $search_term) === false) {
1081                    return false;
1082                }
1083            }
1084
1085            return true;
1086        };
1087    }
1088
1089    /**
1090     * Searching for short or common text can give more results than the system can process.
1091     *
1092     * @param int $limit
1093     *
1094     * @return Closure
1095     */
1096    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1097    {
1098        return static function () use ($limit): void {
1099            static $n = 0;
1100
1101            if (++$n > $limit) {
1102                $message = I18N::translate('The search returned too many results.');
1103
1104                throw new HttpServiceUnavailableException($message);
1105            }
1106        };
1107    }
1108
1109    /**
1110     * Convert a row from any tree in the families table into a family object.
1111     *
1112     * @return Closure
1113     */
1114    private function familyRowMapper(): Closure
1115    {
1116        return function (stdClass $row): Family {
1117            $tree = $this->tree_service->find((int) $row->f_file);
1118
1119            return Factory::family()->mapper($tree)($row);
1120        };
1121    }
1122
1123    /**
1124     * Convert a row from any tree in the individuals table into an individual object.
1125     *
1126     * @return Closure
1127     */
1128    private function individualRowMapper(): Closure
1129    {
1130        return function (stdClass $row): Individual {
1131            $tree = $this->tree_service->find((int) $row->i_file);
1132
1133            return Factory::individual()->mapper($tree)($row);
1134        };
1135    }
1136
1137    /**
1138     * Convert a row from any tree in the media table into an media object.
1139     *
1140     * @return Closure
1141     */
1142    private function mediaRowMapper(): Closure
1143    {
1144        return function (stdClass $row): Media {
1145            $tree = $this->tree_service->find((int) $row->m_file);
1146
1147            return Factory::media()->mapper($tree)($row);
1148        };
1149    }
1150
1151    /**
1152     * Convert a row from any tree in the other table into a note object.
1153     *
1154     * @return Closure
1155     */
1156    private function noteRowMapper(): Closure
1157    {
1158        return function (stdClass $row): Note {
1159            $tree = $this->tree_service->find((int) $row->o_file);
1160
1161            return Factory::note()->mapper($tree)($row);
1162        };
1163    }
1164
1165    /**
1166     * Convert a row from any tree in the other table into a repository object.
1167     *
1168     * @return Closure
1169     */
1170    private function repositoryRowMapper(): Closure
1171    {
1172        return function (stdClass $row): Repository {
1173            $tree = $this->tree_service->find((int) $row->o_file);
1174
1175            return Factory::repository()->mapper($tree)($row);
1176        };
1177    }
1178
1179    /**
1180     * Convert a row from any tree in the sources table into a source object.
1181     *
1182     * @return Closure
1183     */
1184    private function sourceRowMapper(): Closure
1185    {
1186        return function (stdClass $row): Source {
1187            $tree = $this->tree_service->find((int) $row->s_file);
1188
1189            return Factory::source()->mapper($tree)($row);
1190        };
1191    }
1192
1193    /**
1194     * Convert a row from any tree in the other table into a submitter object.
1195     *
1196     * @return Closure
1197     */
1198    private function submitterRowMapper(): Closure
1199    {
1200        return function (stdClass $row): Submitter {
1201            $tree = $this->tree_service->find((int) $row->o_file);
1202
1203            return Factory::submitter()->mapper($tree)($row);
1204        };
1205    }
1206}
1207