xref: /webtrees/app/Services/SearchService.php (revision b4a2f8851f56e7ca6fae19d0a51ff909996d7a30)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2020 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Services;
21
22use Closure;
23use Fisharebest\Webtrees\Date;
24use Fisharebest\Webtrees\Exceptions\HttpServiceUnavailableException;
25use Fisharebest\Webtrees\Factory;
26use Fisharebest\Webtrees\Family;
27use Fisharebest\Webtrees\Gedcom;
28use Fisharebest\Webtrees\GedcomRecord;
29use Fisharebest\Webtrees\I18N;
30use Fisharebest\Webtrees\Individual;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Place;
34use Fisharebest\Webtrees\Repository;
35use Fisharebest\Webtrees\Soundex;
36use Fisharebest\Webtrees\Source;
37use Fisharebest\Webtrees\Submitter;
38use Fisharebest\Webtrees\Tree;
39use Illuminate\Database\Capsule\Manager as DB;
40use Illuminate\Database\Query\Builder;
41use Illuminate\Database\Query\Expression;
42use Illuminate\Database\Query\JoinClause;
43use Illuminate\Support\Collection;
44use stdClass;
45
46use function addcslashes;
47use function array_filter;
48use function array_map;
49use function array_unique;
50use function explode;
51use function implode;
52use function mb_stripos;
53use function preg_match;
54use function preg_quote;
55use function preg_replace;
56
57use const PHP_INT_MAX;
58
59/**
60 * Search trees for genealogy records.
61 */
62class SearchService
63{
64    // Do not attempt to show search results larger than this/
65    protected const MAX_SEARCH_RESULTS = 5000;
66
67    /** @var TreeService */
68    private $tree_service;
69
70    /**
71     * SearchService constructor.
72     *
73     * @param TreeService $tree_service
74     */
75    public function __construct(
76        TreeService $tree_service
77    ) {
78        $this->tree_service = $tree_service;
79    }
80
81    /**
82     * @param Tree[]   $trees
83     * @param string[] $search
84     *
85     * @return Collection<Family>
86     */
87    public function searchFamilies(array $trees, array $search): Collection
88    {
89        $query = DB::table('families');
90
91        $this->whereTrees($query, 'f_file', $trees);
92        $this->whereSearch($query, 'f_gedcom', $search);
93
94        return $query
95            ->get()
96            ->each($this->rowLimiter())
97            ->map($this->familyRowMapper())
98            ->filter(GedcomRecord::accessFilter())
99            ->filter($this->rawGedcomFilter($search));
100    }
101
102    /**
103     * Search for families by name.
104     *
105     * @param Tree[]   $trees
106     * @param string[] $search
107     * @param int      $offset
108     * @param int      $limit
109     *
110     * @return Collection<Family>
111     */
112    public function searchFamilyNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
113    {
114        $query = DB::table('families')
115            ->leftJoin('name AS husb_name', static function (JoinClause $join): void {
116                $join
117                    ->on('husb_name.n_file', '=', 'families.f_file')
118                    ->on('husb_name.n_id', '=', 'families.f_husb')
119                    ->where('husb_name.n_type', '<>', '_MARNM');
120            })
121            ->leftJoin('name AS wife_name', static function (JoinClause $join): void {
122                $join
123                    ->on('wife_name.n_file', '=', 'families.f_file')
124                    ->on('wife_name.n_id', '=', 'families.f_wife')
125                    ->where('wife_name.n_type', '<>', '_MARNM');
126            });
127
128        $prefix = DB::connection()->getTablePrefix();
129        $field  = new Expression('COALESCE(' . $prefix . "husb_name.n_full, '') || COALESCE(" . $prefix . "wife_name.n_full, '')");
130
131        $this->whereTrees($query, 'f_file', $trees);
132        $this->whereSearch($query, $field, $search);
133
134        $query
135            ->orderBy('husb_name.n_sort')
136            ->orderBy('wife_name.n_sort')
137            ->select(['families.*', 'husb_name.n_sort', 'wife_name.n_sort'])
138            ->distinct();
139
140        return $this->paginateQuery($query, $this->familyRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
141    }
142
143    /**
144     * @param Place $place
145     *
146     * @return Collection<Family>
147     */
148    public function searchFamiliesInPlace(Place $place): Collection
149    {
150        return DB::table('families')
151            ->join('placelinks', static function (JoinClause $query) {
152                $query
153                    ->on('families.f_file', '=', 'placelinks.pl_file')
154                    ->on('families.f_id', '=', 'placelinks.pl_gid');
155            })
156            ->where('f_file', '=', $place->tree()->id())
157            ->where('pl_p_id', '=', $place->id())
158            ->select(['families.*'])
159            ->get()
160            ->each($this->rowLimiter())
161            ->map($this->familyRowMapper())
162            ->filter(GedcomRecord::accessFilter());
163    }
164
165    /**
166     * @param Tree[]   $trees
167     * @param string[] $search
168     *
169     * @return Collection<Individual>
170     */
171    public function searchIndividuals(array $trees, array $search): Collection
172    {
173        $query = DB::table('individuals');
174
175        $this->whereTrees($query, 'i_file', $trees);
176        $this->whereSearch($query, 'i_gedcom', $search);
177
178        return $query
179            ->get()
180            ->each($this->rowLimiter())
181            ->map($this->individualRowMapper())
182            ->filter(GedcomRecord::accessFilter())
183            ->filter($this->rawGedcomFilter($search));
184    }
185
186    /**
187     * Search for individuals by name.
188     *
189     * @param Tree[]   $trees
190     * @param string[] $search
191     * @param int      $offset
192     * @param int      $limit
193     *
194     * @return Collection<Individual>
195     */
196    public function searchIndividualNames(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
197    {
198        $query = DB::table('individuals')
199            ->join('name', static function (JoinClause $join): void {
200                $join
201                    ->on('name.n_file', '=', 'individuals.i_file')
202                    ->on('name.n_id', '=', 'individuals.i_id');
203            })
204            ->orderBy('n_sort')
205            ->distinct()
206            ->select(['individuals.*', 'n_sort']);
207
208        $this->whereTrees($query, 'i_file', $trees);
209        $this->whereSearch($query, 'n_full', $search);
210
211        return $this->paginateQuery($query, $this->individualRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
212    }
213
214    /**
215     * @param Place $place
216     *
217     * @return Collection<Individual>
218     */
219    public function searchIndividualsInPlace(Place $place): Collection
220    {
221        return DB::table('individuals')
222            ->join('placelinks', static function (JoinClause $join) {
223                $join
224                    ->on('i_file', '=', 'pl_file')
225                    ->on('i_id', '=', 'pl_gid');
226            })
227            ->where('i_file', '=', $place->tree()->id())
228            ->where('pl_p_id', '=', $place->id())
229            ->select(['individuals.*'])
230            ->get()
231            ->each($this->rowLimiter())
232            ->map($this->individualRowMapper())
233            ->filter(GedcomRecord::accessFilter());
234    }
235
236    /**
237     * Search for media objects.
238     *
239     * @param Tree[]   $trees
240     * @param string[] $search
241     * @param int      $offset
242     * @param int      $limit
243     *
244     * @return Collection<Media>
245     */
246    public function searchMedia(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
247    {
248        $query = DB::table('media');
249
250        $this->whereTrees($query, 'media.m_file', $trees);
251        $this->whereSearch($query, 'm_gedcom', $search);
252
253        return $this->paginateQuery($query, $this->mediaRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
254    }
255
256    /**
257     * Search for notes.
258     *
259     * @param Tree[]   $trees
260     * @param string[] $search
261     * @param int      $offset
262     * @param int      $limit
263     *
264     * @return Collection<Note>
265     */
266    public function searchNotes(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
267    {
268        $query = DB::table('other')
269            ->where('o_type', '=', 'NOTE');
270
271        $this->whereTrees($query, 'o_file', $trees);
272        $this->whereSearch($query, 'o_gedcom', $search);
273
274        return $this->paginateQuery($query, $this->noteRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
275    }
276
277    /**
278     * Search for repositories.
279     *
280     * @param Tree[]   $trees
281     * @param string[] $search
282     * @param int      $offset
283     * @param int      $limit
284     *
285     * @return Collection<Repository>
286     */
287    public function searchRepositories(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
288    {
289        $query = DB::table('other')
290            ->where('o_type', '=', 'REPO');
291
292        $this->whereTrees($query, 'o_file', $trees);
293        $this->whereSearch($query, 'o_gedcom', $search);
294
295        return $this->paginateQuery($query, $this->repositoryRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
296    }
297
298    /**
299     * Search for sources.
300     *
301     * @param Tree[]   $trees
302     * @param string[] $search
303     * @param int      $offset
304     * @param int      $limit
305     *
306     * @return Collection<Source>
307     */
308    public function searchSources(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
309    {
310        $query = DB::table('sources');
311
312        $this->whereTrees($query, 's_file', $trees);
313        $this->whereSearch($query, 's_gedcom', $search);
314
315        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
316    }
317
318    /**
319     * Search for sources by name.
320     *
321     * @param Tree[]   $trees
322     * @param string[] $search
323     * @param int      $offset
324     * @param int      $limit
325     *
326     * @return Collection<Source>
327     */
328    public function searchSourcesByName(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
329    {
330        $query = DB::table('sources')
331            ->orderBy('s_name');
332
333        $this->whereTrees($query, 's_file', $trees);
334        $this->whereSearch($query, 's_name', $search);
335
336        return $this->paginateQuery($query, $this->sourceRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
337    }
338
339    /**
340     * Search for submitters.
341     *
342     * @param Tree[]   $trees
343     * @param string[] $search
344     * @param int      $offset
345     * @param int      $limit
346     *
347     * @return Collection<Submitter>
348     */
349    public function searchSubmitters(array $trees, array $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
350    {
351        $query = DB::table('other')
352            ->where('o_type', '=', 'SUBM');
353
354        $this->whereTrees($query, 'o_file', $trees);
355        $this->whereSearch($query, 'o_gedcom', $search);
356
357        return $this->paginateQuery($query, $this->submitterRowMapper(), GedcomRecord::accessFilter(), $offset, $limit);
358    }
359
360    /**
361     * Search for places.
362     *
363     * @param Tree   $tree
364     * @param string $search
365     * @param int    $offset
366     * @param int    $limit
367     *
368     * @return Collection<Place>
369     */
370    public function searchPlaces(Tree $tree, string $search, int $offset = 0, int $limit = PHP_INT_MAX): Collection
371    {
372        $query = DB::table('places AS p0')
373            ->where('p0.p_file', '=', $tree->id())
374            ->leftJoin('places AS p1', 'p1.p_id', '=', 'p0.p_parent_id')
375            ->leftJoin('places AS p2', 'p2.p_id', '=', 'p1.p_parent_id')
376            ->leftJoin('places AS p3', 'p3.p_id', '=', 'p2.p_parent_id')
377            ->leftJoin('places AS p4', 'p4.p_id', '=', 'p3.p_parent_id')
378            ->leftJoin('places AS p5', 'p5.p_id', '=', 'p4.p_parent_id')
379            ->leftJoin('places AS p6', 'p6.p_id', '=', 'p5.p_parent_id')
380            ->leftJoin('places AS p7', 'p7.p_id', '=', 'p6.p_parent_id')
381            ->leftJoin('places AS p8', 'p8.p_id', '=', 'p7.p_parent_id')
382            ->orderBy('p0.p_place')
383            ->orderBy('p1.p_place')
384            ->orderBy('p2.p_place')
385            ->orderBy('p3.p_place')
386            ->orderBy('p4.p_place')
387            ->orderBy('p5.p_place')
388            ->orderBy('p6.p_place')
389            ->orderBy('p7.p_place')
390            ->orderBy('p8.p_place')
391            ->select([
392                'p0.p_place AS place0',
393                'p1.p_place AS place1',
394                'p2.p_place AS place2',
395                'p3.p_place AS place3',
396                'p4.p_place AS place4',
397                'p5.p_place AS place5',
398                'p6.p_place AS place6',
399                'p7.p_place AS place7',
400                'p8.p_place AS place8',
401            ]);
402
403        // Filter each level of the hierarchy.
404        foreach (explode(',', $search, 9) as $level => $string) {
405            $query->where('p' . $level . '.p_place', 'LIKE', '%' . addcslashes($string, '\\%_') . '%');
406        }
407
408        $row_mapper = static function (stdClass $row) use ($tree): Place {
409            $place = implode(', ', array_filter((array) $row));
410
411            return new Place($place, $tree);
412        };
413
414        $filter = static function (): bool {
415            return true;
416        };
417
418        return $this->paginateQuery($query, $row_mapper, $filter, $offset, $limit);
419    }
420
421    /**
422     * @param Tree[]   $trees
423     * @param string[] $fields
424     * @param string[] $modifiers
425     *
426     * @return Collection<Individual>
427     */
428    public function searchIndividualsAdvanced(array $trees, array $fields, array $modifiers): Collection
429    {
430        $fields = array_filter($fields);
431
432        $query = DB::table('individuals')
433            ->select(['individuals.*'])
434            ->distinct();
435
436        $this->whereTrees($query, 'i_file', $trees);
437
438        // Join the following tables
439        $father_name   = false;
440        $mother_name   = false;
441        $spouse_family = false;
442        $indi_name     = false;
443        $indi_dates    = [];
444        $fam_dates     = [];
445        $indi_plac     = false;
446        $fam_plac      = false;
447
448        foreach ($fields as $field_name => $field_value) {
449            if ($field_value !== '') {
450                // Fields can have up to 4 parts, but we only need the first 3 to identify
451                // which tables to select
452                $field_parts = explode(':', $field_name . '::');
453
454                if ($field_parts[0] === 'FAMC') {
455                    // Parent name - FAMC:[HUSB|WIFE]:NAME:[GIVN|SURN]
456                    if ($field_parts[1] === 'HUSB') {
457                        $father_name = true;
458                    } else {
459                        $mother_name = true;
460                    }
461                } elseif ($field_parts[0] === 'NAME') {
462                    // Individual name - NAME:[GIVN|SURN]
463                    $indi_name = true;
464                } elseif ($field_parts[0] === 'FAMS') {
465                    // Family facts - FAMS:NOTE or FAMS:[FACT]:[DATE|PLAC]
466                    $spouse_family = true;
467                    if ($field_parts[2] === 'DATE') {
468                        $fam_dates[] = $field_parts[1];
469                    } elseif ($field_parts[2] === 'PLAC') {
470                        $fam_plac = true;
471                    }
472                } else {
473                    // Individual facts - [FACT] or [FACT]:[DATE|PLAC]
474                    if ($field_parts[1] === 'DATE') {
475                        $indi_dates[] = $field_parts[0];
476                    } elseif ($field_parts[1] === 'PLAC') {
477                        $indi_plac = true;
478                    }
479                }
480            }
481        }
482
483        if ($father_name || $mother_name) {
484            $query->join('link AS l1', static function (JoinClause $join): void {
485                $join
486                    ->on('l1.l_file', '=', 'individuals.i_file')
487                    ->on('l1.l_from', '=', 'individuals.i_id')
488                    ->where('l1.l_type', '=', 'FAMC');
489            });
490
491            if ($father_name) {
492                $query->join('link AS l2', static function (JoinClause $join): void {
493                    $join
494                        ->on('l2.l_file', '=', 'l1.l_file')
495                        ->on('l2.l_from', '=', 'l1.l_to')
496                        ->where('l2.l_type', '=', 'HUSB');
497                });
498                $query->join('name AS father_name', static function (JoinClause $join): void {
499                    $join
500                        ->on('father_name.n_file', '=', 'l2.l_file')
501                        ->on('father_name.n_id', '=', 'l2.l_to');
502                });
503            }
504
505            if ($mother_name) {
506                $query->join('link AS l3', static function (JoinClause $join): void {
507                    $join
508                        ->on('l3.l_file', '=', 'l1.l_file')
509                        ->on('l3.l_from', '=', 'l1.l_to')
510                        ->where('l3.l_type', '=', 'WIFE');
511                });
512                $query->join('name AS mother_name', static function (JoinClause $join): void {
513                    $join
514                        ->on('mother_name.n_file', '=', 'l3.l_file')
515                        ->on('mother_name.n_id', '=', 'l3.l_to');
516                });
517            }
518        }
519
520        if ($spouse_family) {
521            $query->join('link AS l4', static function (JoinClause $join): void {
522                $join
523                    ->on('l4.l_file', '=', 'individuals.i_file')
524                    ->on('l4.l_from', '=', 'individuals.i_id')
525                    ->where('l4.l_type', '=', 'FAMS');
526            });
527            $query->join('families AS spouse_families', static function (JoinClause $join): void {
528                $join
529                    ->on('spouse_families.f_file', '=', 'l4.l_file')
530                    ->on('spouse_families.f_id', '=', 'l4.l_to');
531            });
532        }
533
534        if ($indi_name) {
535            $query->join('name AS individual_name', static function (JoinClause $join): void {
536                $join
537                    ->on('individual_name.n_file', '=', 'individuals.i_file')
538                    ->on('individual_name.n_id', '=', 'individuals.i_id');
539            });
540        }
541
542        foreach (array_unique($indi_dates) as $indi_date) {
543            $query->join('dates AS date_' . $indi_date, static function (JoinClause $join) use ($indi_date): void {
544                $join
545                    ->on('date_' . $indi_date . '.d_file', '=', 'individuals.i_file')
546                    ->on('date_' . $indi_date . '.d_gid', '=', 'individuals.i_id');
547            });
548        }
549
550        foreach (array_unique($fam_dates) as $fam_date) {
551            $query->join('dates AS date_' . $fam_date, static function (JoinClause $join) use ($fam_date): void {
552                $join
553                    ->on('date_' . $fam_date . '.d_file', '=', 'spouse_families.f_file')
554                    ->on('date_' . $fam_date . '.d_gid', '=', 'spouse_families.f_id');
555            });
556        }
557
558        if ($indi_plac) {
559            $query->join('placelinks AS individual_placelinks', static function (JoinClause $join): void {
560                $join
561                    ->on('individual_placelinks.pl_file', '=', 'individuals.i_file')
562                    ->on('individual_placelinks.pl_gid', '=', 'individuals.i_id');
563            });
564            $query->join('places AS individual_places', static function (JoinClause $join): void {
565                $join
566                    ->on('individual_places.p_file', '=', 'individual_placelinks.pl_file')
567                    ->on('individual_places.p_id', '=', 'individual_placelinks.pl_p_id');
568            });
569        }
570
571        if ($fam_plac) {
572            $query->join('placelinks AS familyl_placelinks', static function (JoinClause $join): void {
573                $join
574                    ->on('familyl_placelinks.pl_file', '=', 'individuals.i_file')
575                    ->on('familyl_placelinks.pl_gid', '=', 'individuals.i_id');
576            });
577            $query->join('places AS family_places', static function (JoinClause $join): void {
578                $join
579                    ->on('family_places.p_file', '=', 'familyl_placelinks.pl_file')
580                    ->on('family_places.p_id', '=', 'familyl_placelinks.pl_p_id');
581            });
582        }
583
584        foreach ($fields as $field_name => $field_value) {
585            $parts = explode(':', $field_name . ':::');
586            if ($parts[0] === 'NAME') {
587                // NAME:*
588                switch ($parts[1]) {
589                    case 'GIVN':
590                        switch ($modifiers[$field_name]) {
591                            case 'EXACT':
592                                $query->where('individual_name.n_givn', '=', $field_value);
593                                break;
594                            case 'BEGINS':
595                                $query->where('individual_name.n_givn', 'LIKE', $field_value . '%');
596                                break;
597                            case 'CONTAINS':
598                                $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
599                                break;
600                            case 'SDX_STD':
601                                $sdx = Soundex::russell($field_value);
602                                if ($sdx !== '') {
603                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_std', $sdx);
604                                } else {
605                                    // No phonetic content? Use a substring match
606                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
607                                }
608                                break;
609                            case 'SDX': // SDX uses DM by default.
610                            case 'SDX_DM':
611                                $sdx = Soundex::daitchMokotoff($field_value);
612                                if ($sdx !== '') {
613                                    $this->wherePhonetic($query, 'individual_name.n_soundex_givn_dm', $sdx);
614                                } else {
615                                    // No phonetic content? Use a substring match
616                                    $query->where('individual_name.n_givn', 'LIKE', '%' . $field_value . '%');
617                                }
618                                break;
619                        }
620                        unset($fields[$field_name]);
621                        break;
622                    case 'SURN':
623                        switch ($modifiers[$field_name]) {
624                            case 'EXACT':
625                                $query->where(function (Builder $query) use ($field_value): void {
626                                    $query
627                                        ->where('individual_name.n_surn', '=', $field_value)
628                                        ->orWhere('individual_name.n_surname', '=', $field_value);
629                                });
630                                break;
631                            case 'BEGINS':
632                                $query->where(function (Builder $query) use ($field_value): void {
633                                    $query
634                                        ->where('individual_name.n_surn', 'LIKE', $field_value . '%')
635                                        ->orWhere('individual_name.n_surname', 'LIKE', $field_value . '%');
636                                });
637                                break;
638                            case 'CONTAINS':
639                                $query->where(function (Builder $query) use ($field_value): void {
640                                    $query
641                                        ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
642                                        ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
643                                });
644                                break;
645                            case 'SDX_STD':
646                                $sdx = Soundex::russell($field_value);
647                                if ($sdx !== '') {
648                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_std', $sdx);
649                                } else {
650                                    // No phonetic content? Use a substring match
651                                    $query->where(function (Builder $query) use ($field_value): void {
652                                        $query
653                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
654                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
655                                    });
656                                }
657                                break;
658                            case 'SDX': // SDX uses DM by default.
659                            case 'SDX_DM':
660                                $sdx = Soundex::daitchMokotoff($field_value);
661                                if ($sdx !== '') {
662                                    $this->wherePhonetic($query, 'individual_name.n_soundex_surn_dm', $sdx);
663                                } else {
664                                    // No phonetic content? Use a substring match
665                                    $query->where(function (Builder $query) use ($field_value): void {
666                                        $query
667                                            ->where('individual_name.n_surn', 'LIKE', '%' . $field_value . '%')
668                                            ->orWhere('individual_name.n_surname', 'LIKE', '%' . $field_value . '%');
669                                    });
670                                }
671                                break;
672                        }
673                        unset($fields[$field_name]);
674                        break;
675                    case 'NICK':
676                    case '_MARNM':
677                    case '_HEB':
678                    case '_AKA':
679                        $like = "%\n1 " . $parts[0] . "%\n2 " . $parts[1] . ' %' . preg_quote($field_value, '/') . '%';
680                        $query->where('individuals.i_gedcom', 'LIKE', $like);
681                        break;
682                }
683            } elseif ($parts[1] === 'DATE') {
684                // *:DATE
685                $date = new Date($field_value);
686                if ($date->isOK()) {
687                    $delta = 365 * ($modifiers[$field_name] ?? 0);
688                    $query
689                        ->where('date_' . $parts[0] . '.d_fact', '=', $parts[0])
690                        ->where('date_' . $parts[0] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
691                        ->where('date_' . $parts[0] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
692                }
693                unset($fields[$field_name]);
694            } elseif ($parts[0] === 'FAMS' && $parts[2] === 'DATE') {
695                // FAMS:*:DATE
696                $date = new Date($field_value);
697                if ($date->isOK()) {
698                    $delta = 365 * $modifiers[$field_name];
699                    $query
700                        ->where('date_' . $parts[1] . '.d_fact', '=', $parts[1])
701                        ->where('date_' . $parts[1] . '.d_julianday1', '>=', $date->minimumJulianDay() - $delta)
702                        ->where('date_' . $parts[1] . '.d_julianday2', '<=', $date->maximumJulianDay() + $delta);
703                }
704                unset($fields[$field_name]);
705            } elseif ($parts[1] === 'PLAC') {
706                // *:PLAC
707                // SQL can only link a place to a person/family, not to an event.
708                $query->where('individual_places.p_place', 'LIKE', '%' . $field_value . '%');
709            } elseif ($parts[0] === 'FAMS' && $parts[2] === 'PLAC') {
710                // FAMS:*:PLAC
711                // SQL can only link a place to a person/family, not to an event.
712                $query->where('family_places.p_place', 'LIKE', '%' . $field_value . '%');
713            } elseif ($parts[0] === 'FAMC' && $parts[2] === 'NAME') {
714                $table = $parts[1] === 'HUSB' ? 'father_name' : 'mother_name';
715                // NAME:*
716                switch ($parts[3]) {
717                    case 'GIVN':
718                        switch ($modifiers[$field_name]) {
719                            case 'EXACT':
720                                $query->where($table . '.n_givn', '=', $field_value);
721                                break;
722                            case 'BEGINS':
723                                $query->where($table . '.n_givn', 'LIKE', $field_value . '%');
724                                break;
725                            case 'CONTAINS':
726                                $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
727                                break;
728                            case 'SDX_STD':
729                                $sdx = Soundex::russell($field_value);
730                                if ($sdx !== '') {
731                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_std', $sdx);
732                                } else {
733                                    // No phonetic content? Use a substring match
734                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
735                                }
736                                break;
737                            case 'SDX': // SDX uses DM by default.
738                            case 'SDX_DM':
739                                $sdx = Soundex::daitchMokotoff($field_value);
740                                if ($sdx !== '') {
741                                    $this->wherePhonetic($query, $table . '.n_soundex_givn_dm', $sdx);
742                                } else {
743                                    // No phonetic content? Use a substring match
744                                    $query->where($table . '.n_givn', 'LIKE', '%' . $field_value . '%');
745                                }
746                                break;
747                        }
748                        break;
749                    case 'SURN':
750                        switch ($modifiers[$field_name]) {
751                            case 'EXACT':
752                                $query->where($table . '.n_surn', '=', $field_value);
753                                break;
754                            case 'BEGINS':
755                                $query->where($table . '.n_surn', 'LIKE', $field_value . '%');
756                                break;
757                            case 'CONTAINS':
758                                $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
759                                break;
760                            case 'SDX_STD':
761                                $sdx = Soundex::russell($field_value);
762                                if ($sdx !== '') {
763                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_std', $sdx);
764                                } else {
765                                    // No phonetic content? Use a substring match
766                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
767                                }
768                                break;
769                            case 'SDX': // SDX uses DM by default.
770                            case 'SDX_DM':
771                                $sdx = Soundex::daitchMokotoff($field_value);
772                                if ($sdx !== '') {
773                                    $this->wherePhonetic($query, $table . '.n_soundex_surn_dm', $sdx);
774                                } else {
775                                    // No phonetic content? Use a substring match
776                                    $query->where($table . '.n_surn', 'LIKE', '%' . $field_value . '%');
777                                }
778                                break;
779                        }
780                        break;
781                }
782                unset($fields[$field_name]);
783            } elseif ($parts[0] === 'FAMS') {
784                // e.g. searches for occupation, religion, note, etc.
785                // Initial matching only.  Need PHP to apply filter.
786                $query->where('spouse_families.f_gedcom', 'LIKE', "%\n1 " . $parts[1] . ' %' . $field_value . '%');
787            } elseif ($parts[1] === 'TYPE') {
788                // e.g. FACT:TYPE or EVEN:TYPE
789                // Initial matching only.  Need PHP to apply filter.
790                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[0] . "%\n2 TYPE %" . $field_value . '%');
791            } else {
792                // e.g. searches for occupation, religion, note, etc.
793                // Initial matching only.  Need PHP to apply filter.
794                $query->where('individuals.i_gedcom', 'LIKE', "%\n1 " . $parts[0] . '%' . $parts[1] . '%' . $field_value . '%');
795            }
796        }
797
798        return $query
799            ->get()
800            ->each($this->rowLimiter())
801            ->map($this->individualRowMapper())
802            ->filter(GedcomRecord::accessFilter())
803            ->filter(static function (Individual $individual) use ($fields): bool {
804                // Check for searches which were only partially matched by SQL
805                foreach ($fields as $field_name => $field_value) {
806                    $parts = explode(':', $field_name . '::::');
807
808                    // NAME:*
809                    if ($parts[0] === 'NAME') {
810                        $regex = '/\n1 NAME.*(?:\n2.*)*\n2 ' . $parts[1] . ' .*' . preg_quote($field_value, '/') . '/i';
811
812                        if (preg_match($regex, $individual->gedcom())) {
813                            continue;
814                        }
815
816                        return false;
817                    }
818
819                    $regex = '/' . preg_quote($field_value, '/') . '/i';
820
821                    // *:PLAC
822                    if ($parts[1] === 'PLAC') {
823                        foreach ($individual->facts([$parts[0]]) as $fact) {
824                            if (preg_match($regex, $fact->place()->gedcomName())) {
825                                continue 2;
826                            }
827                        }
828                        return false;
829                    }
830
831                    // FAMS:*:PLAC
832                    if ($parts[0] === 'FAMS' && $parts[2] === 'PLAC') {
833                        foreach ($individual->spouseFamilies() as $family) {
834                            foreach ($family->facts([$parts[1]]) as $fact) {
835                                if (preg_match($regex, $fact->place()->gedcomName())) {
836                                    continue 2;
837                                }
838                            }
839                        }
840                        return false;
841                    }
842
843                    // e.g. searches for occupation, religion, note, etc.
844                    if ($parts[0] === 'FAMS') {
845                        foreach ($individual->spouseFamilies() as $family) {
846                            foreach ($family->facts([$parts[1]]) as $fact) {
847                                if (preg_match($regex, $fact->value())) {
848                                    continue 3;
849                                }
850                            }
851                        }
852                        return false;
853                    }
854
855                    // e.g. FACT:TYPE or EVEN:TYPE
856                    if ($parts[1] === 'TYPE' || $parts[1] === '_WT_USER') {
857                        foreach ($individual->facts([$parts[0]]) as $fact) {
858                            if (preg_match($regex, $fact->attribute($parts[1]))) {
859                                continue 2;
860                            }
861                        }
862
863                        return false;
864                    }
865                }
866
867                return true;
868            });
869    }
870
871    /**
872     * @param string $soundex
873     * @param string $lastname
874     * @param string $firstname
875     * @param string $place
876     * @param Tree[] $search_trees
877     *
878     * @return Collection<Individual>
879     */
880    public function searchIndividualsPhonetic(string $soundex, string $lastname, string $firstname, string $place, array $search_trees): Collection
881    {
882        switch ($soundex) {
883            default:
884            case 'Russell':
885                $givn_sdx   = Soundex::russell($firstname);
886                $surn_sdx   = Soundex::russell($lastname);
887                $plac_sdx   = Soundex::russell($place);
888                $givn_field = 'n_soundex_givn_std';
889                $surn_field = 'n_soundex_surn_std';
890                $plac_field = 'p_std_soundex';
891                break;
892            case 'DaitchM':
893                $givn_sdx   = Soundex::daitchMokotoff($firstname);
894                $surn_sdx   = Soundex::daitchMokotoff($lastname);
895                $plac_sdx   = Soundex::daitchMokotoff($place);
896                $givn_field = 'n_soundex_givn_dm';
897                $surn_field = 'n_soundex_surn_dm';
898                $plac_field = 'p_dm_soundex';
899                break;
900        }
901
902        // Nothing to search for? Return nothing.
903        if ($givn_sdx === '' && $surn_sdx === '' && $plac_sdx === '') {
904            return new Collection();
905        }
906
907        $query = DB::table('individuals')
908            ->select(['individuals.*'])
909            ->distinct();
910
911        $this->whereTrees($query, 'i_file', $search_trees);
912
913        if ($plac_sdx !== '') {
914            $query->join('placelinks', static function (JoinClause $join): void {
915                $join
916                    ->on('placelinks.pl_file', '=', 'individuals.i_file')
917                    ->on('placelinks.pl_gid', '=', 'individuals.i_id');
918            });
919            $query->join('places', static function (JoinClause $join): void {
920                $join
921                    ->on('places.p_file', '=', 'placelinks.pl_file')
922                    ->on('places.p_id', '=', 'placelinks.pl_p_id');
923            });
924
925            $this->wherePhonetic($query, $plac_field, $plac_sdx);
926        }
927
928        if ($givn_sdx !== '' || $surn_sdx !== '') {
929            $query->join('name', static function (JoinClause $join): void {
930                $join
931                    ->on('name.n_file', '=', 'individuals.i_file')
932                    ->on('name.n_id', '=', 'individuals.i_id');
933            });
934
935            $this->wherePhonetic($query, $givn_field, $givn_sdx);
936            $this->wherePhonetic($query, $surn_field, $surn_sdx);
937        }
938
939        return $query
940            ->get()
941            ->each($this->rowLimiter())
942            ->map($this->individualRowMapper())
943            ->filter(GedcomRecord::accessFilter());
944    }
945
946    /**
947     * Paginate a search query.
948     *
949     * @param Builder $query      Searches the database for the desired records.
950     * @param Closure $row_mapper Converts a row from the query into a record.
951     * @param Closure $row_filter
952     * @param int     $offset     Skip this many rows.
953     * @param int     $limit      Take this many rows.
954     *
955     * @return Collection<mixed>
956     */
957    private function paginateQuery(Builder $query, Closure $row_mapper, Closure $row_filter, int $offset, int $limit): Collection
958    {
959        $collection = new Collection();
960
961        foreach ($query->cursor() as $row) {
962            $record = $row_mapper($row);
963            // If the object has a method "canShow()", then use it to filter for privacy.
964            if ($row_filter($record)) {
965                if ($offset > 0) {
966                    $offset--;
967                } else {
968                    if ($limit > 0) {
969                        $collection->push($record);
970                    }
971
972                    $limit--;
973
974                    if ($limit === 0) {
975                        break;
976                    }
977                }
978            }
979        }
980
981
982        return $collection;
983    }
984
985    /**
986     * Apply search filters to a SQL query column.  Apply collation rules to MySQL.
987     *
988     * @param Builder           $query
989     * @param Expression|string $field
990     * @param string[]          $search_terms
991     */
992    private function whereSearch(Builder $query, $field, array $search_terms): void
993    {
994        if ($field instanceof Expression) {
995            $field = $field->getValue();
996        }
997
998        foreach ($search_terms as $search_term) {
999            $query->where(new Expression($field), 'LIKE', '%' . addcslashes($search_term, '\\%_') . '%');
1000        }
1001    }
1002
1003    /**
1004     * Apply soundex search filters to a SQL query column.
1005     *
1006     * @param Builder           $query
1007     * @param Expression|string $field
1008     * @param string            $soundex
1009     */
1010    private function wherePhonetic(Builder $query, $field, string $soundex): void
1011    {
1012        if ($soundex !== '') {
1013            $query->where(static function (Builder $query) use ($soundex, $field): void {
1014                foreach (explode(':', $soundex) as $sdx) {
1015                    $query->orWhere($field, 'LIKE', '%' . $sdx . '%');
1016                }
1017            });
1018        }
1019    }
1020
1021    /**
1022     * @param Builder $query
1023     * @param string  $tree_id_field
1024     * @param Tree[]  $trees
1025     */
1026    private function whereTrees(Builder $query, string $tree_id_field, array $trees): void
1027    {
1028        $tree_ids = array_map(static function (Tree $tree): int {
1029            return $tree->id();
1030        }, $trees);
1031
1032        $query->whereIn($tree_id_field, $tree_ids);
1033    }
1034
1035    /**
1036     * Find the media object that uses a particular media file.
1037     *
1038     * @param string $file
1039     *
1040     * @return Media[]
1041     */
1042    public function findMediaObjectsForMediaFile(string $file): array
1043    {
1044        return DB::table('media')
1045            ->join('media_file', static function (JoinClause $join): void {
1046                $join
1047                    ->on('media_file.m_file', '=', 'media.m_file')
1048                    ->on('media_file.m_id', '=', 'media.m_id');
1049            })
1050            ->join('gedcom_setting', 'media.m_file', '=', 'gedcom_setting.gedcom_id')
1051            ->where(new Expression('setting_value || multimedia_file_refn'), '=', $file)
1052            ->select(['media.*'])
1053            ->distinct()
1054            ->get()
1055            ->map($this->mediaRowMapper())
1056            ->all();
1057    }
1058
1059    /**
1060     * A closure to filter records by privacy-filtered GEDCOM data.
1061     *
1062     * @param array<string> $search_terms
1063     *
1064     * @return Closure
1065     */
1066    private function rawGedcomFilter(array $search_terms): Closure
1067    {
1068        return static function (GedcomRecord $record) use ($search_terms): bool {
1069            // Ignore non-genealogy fields
1070            $gedcom = preg_replace('/\n\d (?:_UID|_WT_USER) .*/', '', $record->gedcom());
1071
1072            // Ignore matches in links
1073            $gedcom = preg_replace('/\n\d ' . Gedcom::REGEX_TAG . '( @' . Gedcom::REGEX_XREF . '@)?/', '', $gedcom);
1074
1075            // Re-apply the filtering
1076            foreach ($search_terms as $search_term) {
1077                if (mb_stripos($gedcom, $search_term) === false) {
1078                    return false;
1079                }
1080            }
1081
1082            return true;
1083        };
1084    }
1085
1086    /**
1087     * Searching for short or common text can give more results than the system can process.
1088     *
1089     * @param int $limit
1090     *
1091     * @return Closure
1092     */
1093    private function rowLimiter(int $limit = self::MAX_SEARCH_RESULTS): Closure
1094    {
1095        return static function () use ($limit): void {
1096            static $n = 0;
1097
1098            if (++$n > $limit) {
1099                $message = I18N::translate('The search returned too many results.');
1100
1101                throw new HttpServiceUnavailableException($message);
1102            }
1103        };
1104    }
1105
1106    /**
1107     * Convert a row from any tree in the families table into a family object.
1108     *
1109     * @return Closure
1110     */
1111    private function familyRowMapper(): Closure
1112    {
1113        return function (stdClass $row): Family {
1114            $tree = $this->tree_service->find((int) $row->f_file);
1115
1116            return Factory::family()->mapper($tree)($row);
1117        };
1118    }
1119
1120    /**
1121     * Convert a row from any tree in the individuals table into an individual object.
1122     *
1123     * @return Closure
1124     */
1125    private function individualRowMapper(): Closure
1126    {
1127        return function (stdClass $row): Individual {
1128            $tree = $this->tree_service->find((int) $row->i_file);
1129
1130            return Factory::individual()->mapper($tree)($row);
1131        };
1132    }
1133
1134    /**
1135     * Convert a row from any tree in the media table into an media object.
1136     *
1137     * @return Closure
1138     */
1139    private function mediaRowMapper(): Closure
1140    {
1141        return function (stdClass $row): Media {
1142            $tree = $this->tree_service->find((int) $row->m_file);
1143
1144            return Factory::media()->mapper($tree)($row);
1145        };
1146    }
1147
1148    /**
1149     * Convert a row from any tree in the other table into a note object.
1150     *
1151     * @return Closure
1152     */
1153    private function noteRowMapper(): Closure
1154    {
1155        return function (stdClass $row): Note {
1156            $tree = $this->tree_service->find((int) $row->o_file);
1157
1158            return Factory::note()->mapper($tree)($row);
1159        };
1160    }
1161
1162    /**
1163     * Convert a row from any tree in the other table into a repository object.
1164     *
1165     * @return Closure
1166     */
1167    private function repositoryRowMapper(): Closure
1168    {
1169        return function (stdClass $row): Repository {
1170            $tree = $this->tree_service->find((int) $row->o_file);
1171
1172            return Factory::repository()->mapper($tree)($row);
1173        };
1174    }
1175
1176    /**
1177     * Convert a row from any tree in the sources table into a source object.
1178     *
1179     * @return Closure
1180     */
1181    private function sourceRowMapper(): Closure
1182    {
1183        return function (stdClass $row): Source {
1184            $tree = $this->tree_service->find((int) $row->s_file);
1185
1186            return Factory::source()->mapper($tree)($row);
1187        };
1188    }
1189
1190    /**
1191     * Convert a row from any tree in the other table into a submitter object.
1192     *
1193     * @return Closure
1194     */
1195    private function submitterRowMapper(): Closure
1196    {
1197        return function (stdClass $row): Submitter {
1198            $tree = $this->tree_service->find((int) $row->o_file);
1199
1200            return Factory::submitter()->mapper($tree)($row);
1201        };
1202    }
1203}
1204