xref: /webtrees/app/Http/RequestHandlers/CheckTree.php (revision cb62cb3c3ddb419590d1c5813411dd49230199cd)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Http\RequestHandlers;
21
22use Fisharebest\Webtrees\Gedcom;
23use Fisharebest\Webtrees\Header;
24use Fisharebest\Webtrees\Http\ViewResponseTrait;
25use Fisharebest\Webtrees\I18N;
26use Fisharebest\Webtrees\Tree;
27use Illuminate\Database\Capsule\Manager as DB;
28use Illuminate\Database\Query\Expression;
29use Psr\Http\Message\ResponseInterface;
30use Psr\Http\Message\ServerRequestInterface;
31use Psr\Http\Server\RequestHandlerInterface;
32
33use function array_key_exists;
34use function assert;
35use function e;
36use function in_array;
37use function preg_match;
38use function preg_match_all;
39use function route;
40use function strtoupper;
41
42use const PREG_SET_ORDER;
43
44/**
45 * Check a tree for errors.
46 */
47class CheckTree implements RequestHandlerInterface
48{
49    use ViewResponseTrait;
50
51    /**
52     * @param ServerRequestInterface $request
53     *
54     * @return ResponseInterface
55     */
56    public function handle(ServerRequestInterface $request): ResponseInterface
57    {
58        $this->layout = 'layouts/administration';
59
60        $tree = $request->getAttribute('tree');
61        assert($tree instanceof Tree);
62
63        // We need to work with raw GEDCOM data, as we are looking for errors
64        // which may prevent the GedcomRecord objects from working.
65
66        $q1 = DB::table('individuals')
67            ->where('i_file', '=', $tree->id())
68            ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]);
69        $q2 = DB::table('families')
70            ->where('f_file', '=', $tree->id())
71            ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]);
72        $q3 = DB::table('media')
73            ->where('m_file', '=', $tree->id())
74            ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]);
75        $q4 = DB::table('sources')
76            ->where('s_file', '=', $tree->id())
77            ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]);
78        $q5 = DB::table('other')
79            ->where('o_file', '=', $tree->id())
80            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
81            ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']);
82        $q6 = DB::table('change')
83            ->where('gedcom_id', '=', $tree->id())
84            ->where('status', '=', 'pending')
85            ->orderBy('change_id')
86            ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]);
87
88        $rows = $q1
89            ->unionAll($q2)
90            ->unionAll($q3)
91            ->unionAll($q4)
92            ->unionAll($q5)
93            ->unionAll($q6)
94            ->get()
95            ->map(static function (object $row): object {
96                // Extract type for pending record
97                if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match)) {
98                    $row->type = $match[1];
99                }
100
101                return $row;
102            });
103
104        $records = [];
105
106        foreach ($rows as $row) {
107            if ($row->gedcom !== '') {
108                // existing or updated record
109                $records[$row->xref] = $row;
110            } else {
111                // deleted record
112                unset($records[$row->xref]);
113            }
114        }
115
116        // LOOK FOR BROKEN LINKS
117        $XREF_LINKS = [
118            'NOTE'          => 'NOTE',
119            'SOUR'          => 'SOUR',
120            'REPO'          => 'REPO',
121            'OBJE'          => 'OBJE',
122            'SUBM'          => 'SUBM',
123            'FAMC'          => 'FAM',
124            'FAMS'          => 'FAM',
125            //'ADOP'=>'FAM', // Need to handle this case specially. We may have both ADOP and FAMC links to the same FAM, but only store one.
126            'HUSB'          => 'INDI',
127            'WIFE'          => 'INDI',
128            'CHIL'          => 'INDI',
129            'ASSO'          => 'INDI',
130            '_ASSO'         => 'INDI',
131            // A webtrees extension
132            'ALIA'          => 'INDI',
133            'AUTH'          => 'INDI',
134            // A webtrees extension
135            'ANCI'          => 'SUBM',
136            'DESI'          => 'SUBM',
137            '_WT_OBJE_SORT' => 'OBJE',
138            '_LOC'          => '_LOC',
139        ];
140
141        $RECORD_LINKS = [
142            'INDI' => [
143                'NOTE',
144                'OBJE',
145                'SOUR',
146                'SUBM',
147                'ASSO',
148                '_ASSO',
149                'FAMC',
150                'FAMS',
151                'ALIA',
152                '_WT_OBJE_SORT',
153                '_LOC',
154            ],
155            'FAM'  => [
156                'NOTE',
157                'OBJE',
158                'SOUR',
159                'SUBM',
160                'ASSO',
161                '_ASSO',
162                'HUSB',
163                'WIFE',
164                'CHIL',
165                '_LOC',
166            ],
167            'SOUR' => [
168                'NOTE',
169                'OBJE',
170                'REPO',
171                'AUTH',
172                '_LOC',
173            ],
174            'REPO' => ['NOTE'],
175            'OBJE' => ['NOTE'],
176            // The spec also allows SOUR, but we treat this as a warning
177            'NOTE' => [],
178            // The spec also allows SOUR, but we treat this as a warning
179            'SUBM' => [
180                'NOTE',
181                'OBJE',
182            ],
183            'SUBN' => ['SUBM'],
184            '_LOC' => [
185                'SOUR',
186                'OBJE',
187                '_LOC',
188                'NOTE',
189            ],
190        ];
191
192        $errors   = [];
193        $warnings = [];
194
195        // Generate lists of all links
196        $all_links   = [];
197        $upper_links = [];
198        foreach ($records as $record) {
199            $all_links[$record->xref]               = [];
200            $upper_links[strtoupper($record->xref)] = $record->xref;
201            preg_match_all('/\n\d (' . Gedcom::REGEX_TAG . ') @([^#@\n][^\n@]*)@/', $record->gedcom, $matches, PREG_SET_ORDER);
202            foreach ($matches as $match) {
203                $all_links[$record->xref][$match[2]] = $match[1];
204            }
205        }
206
207        foreach ($all_links as $xref1 => $links) {
208            // PHP converts array keys to integers.
209            $xref1 = (string) $xref1;
210
211            $type1 = $records[$xref1]->type;
212            foreach ($links as $xref2 => $type2) {
213                // PHP converts array keys to integers.
214                $xref2 = (string) $xref2;
215
216                $type3 = isset($records[$xref2]) ? $records[$xref2]->type : '';
217                if (!array_key_exists($xref2, $all_links)) {
218                    if (array_key_exists(strtoupper($xref2), $upper_links)) {
219                        $warnings[] =
220                            $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' .
221                            /* I18N: placeholders are GEDCOM XREFs, such as R123 */
222                            I18N::translate('%1$s does not exist. Did you mean %2$s?', $this->checkLink($tree, $xref2), $this->checkLink($tree, $upper_links[strtoupper($xref2)]));
223                    } else {
224                        /* I18N: placeholders are GEDCOM XREFs, such as R123 */
225                        $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not exist.', $this->checkLink($tree, $xref2));
226                    }
227                } elseif ($type2 === 'SOUR' && $type1 === 'NOTE') {
228                    // Notes are intended to add explanations and comments to other records. They should not have their own sources.
229                } elseif ($type2 === 'SOUR' && $type1 === 'OBJE') {
230                    // Media objects are intended to illustrate other records, facts, and source/citations. They should not have their own sources.
231                } elseif ($type2 === 'OBJE' && $type1 === 'REPO') {
232                    $warnings[] =
233                        $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) .
234                        ' ' .
235                        I18N::translate('This type of link is not allowed here.');
236                } elseif (!array_key_exists($type1, $RECORD_LINKS) || !in_array($type2, $RECORD_LINKS[$type1], true) || !array_key_exists($type2, $XREF_LINKS)) {
237                    $errors[] =
238                        $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) .
239                        ' ' .
240                        I18N::translate('This type of link is not allowed here.');
241                } elseif ($XREF_LINKS[$type2] !== $type3) {
242                    // Target XREF does exist - but is invalid
243                    $errors[] =
244                        $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' .
245                        /* I18N: %1$s is an internal ID number such as R123. %2$s and %3$s are record types, such as INDI or SOUR */
246                        I18N::translate('%1$s is a %2$s but a %3$s is expected.', $this->checkLink($tree, $xref2), $this->formatType($type3), $this->formatType($type2));
247                } elseif (
248                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMC', ['CHIL']) ||
249                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMS', ['HUSB', 'WIFE']) ||
250                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'CHIL', ['FAMC']) ||
251                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'HUSB', ['FAMS']) ||
252                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'WIFE', ['FAMS'])
253                ) {
254                    /* I18N: %1$s and %2$s are internal ID numbers such as R123 */
255                    $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not have a link back to %2$s.', $this->checkLink($tree, $xref2), $this->checkLink($tree, $xref1));
256                }
257            }
258        }
259
260        $title = I18N::translate('Check for errors') . ' — ' . e($tree->title());
261
262        return $this->viewResponse('admin/trees-check', [
263            'errors'   => $errors,
264            'title'    => $title,
265            'tree'     => $tree,
266            'warnings' => $warnings,
267        ]);
268    }
269
270    /**
271     * @param string               $type
272     * @param array<array<string>> $links
273     * @param string               $xref1
274     * @param string               $xref2
275     * @param string               $link
276     * @param array<string>        $reciprocal
277     *
278     * @return bool
279     */
280    private function checkReverseLink(string $type, array $links, string $xref1, string $xref2, string $link, array $reciprocal): bool
281    {
282        return $type === $link && (!array_key_exists($xref1, $links[$xref2]) || !in_array($links[$xref2][$xref1], $reciprocal, true));
283    }
284
285    /**
286     * Create a message linking one record to another.
287     *
288     * @param Tree   $tree
289     * @param string $type1
290     * @param string $xref1
291     * @param string $type2
292     * @param string $xref2
293     *
294     * @return string
295     */
296    private function checkLinkMessage(Tree $tree, string $type1, string $xref1, string $type2, string $xref2): string
297    {
298        /* I18N: The placeholders are GEDCOM XREFs and tags. e.g. “INDI I123 contains a FAMC link to F234.” */
299        return I18N::translate(
300            '%1$s %2$s has a %3$s link to %4$s.',
301            $this->formatType($type1),
302            $this->checkLink($tree, $xref1),
303            $this->formatType($type2),
304            $this->checkLink($tree, $xref2)
305        );
306    }
307
308    /**
309     * Format a link to a record.
310     *
311     * @param Tree   $tree
312     * @param string $xref
313     *
314     * @return string
315     */
316    private function checkLink(Tree $tree, string $xref): string
317    {
318        return '<b><a href="' . e(route(GedcomRecordPage::class, [
319                'xref' => $xref,
320                'tree' => $tree->name(),
321            ])) . '">' . $xref . '</a></b>';
322    }
323
324    /**
325     * Format a record type.
326     *
327     * @param string $type
328     *
329     * @return string
330     */
331    private function formatType(string $type): string
332    {
333        return '<b>' . $type . '</b>';
334    }
335}
336