xref: /webtrees/app/Http/RequestHandlers/CheckTree.php (revision 39b152e6a8c9fdb4e88d9d7445c553a73459aad7)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Http\RequestHandlers;
21
22use Fisharebest\Webtrees\Elements\AbstractXrefElement;
23use Fisharebest\Webtrees\Elements\MultimediaFileReference;
24use Fisharebest\Webtrees\Elements\MultimediaFormat;
25use Fisharebest\Webtrees\Elements\SubmitterText;
26use Fisharebest\Webtrees\Elements\UnknownElement;
27use Fisharebest\Webtrees\Elements\XrefFamily;
28use Fisharebest\Webtrees\Elements\XrefIndividual;
29use Fisharebest\Webtrees\Elements\XrefLocation;
30use Fisharebest\Webtrees\Elements\XrefMedia;
31use Fisharebest\Webtrees\Elements\XrefNote;
32use Fisharebest\Webtrees\Elements\XrefRepository;
33use Fisharebest\Webtrees\Elements\XrefSource;
34use Fisharebest\Webtrees\Elements\XrefSubmission;
35use Fisharebest\Webtrees\Elements\XrefSubmitter;
36use Fisharebest\Webtrees\Factories\ElementFactory;
37use Fisharebest\Webtrees\Factories\ImageFactory;
38use Fisharebest\Webtrees\Family;
39use Fisharebest\Webtrees\Gedcom;
40use Fisharebest\Webtrees\Header;
41use Fisharebest\Webtrees\Http\ViewResponseTrait;
42use Fisharebest\Webtrees\I18N;
43use Fisharebest\Webtrees\Individual;
44use Fisharebest\Webtrees\Location;
45use Fisharebest\Webtrees\Media;
46use Fisharebest\Webtrees\Mime;
47use Fisharebest\Webtrees\Note;
48use Fisharebest\Webtrees\Registry;
49use Fisharebest\Webtrees\Repository;
50use Fisharebest\Webtrees\Services\TimeoutService;
51use Fisharebest\Webtrees\Source;
52use Fisharebest\Webtrees\Submission;
53use Fisharebest\Webtrees\Submitter;
54use Fisharebest\Webtrees\Tree;
55use Fisharebest\Webtrees\Validator;
56use Illuminate\Database\Capsule\Manager as DB;
57use Illuminate\Database\Query\Expression;
58use Psr\Http\Message\ResponseInterface;
59use Psr\Http\Message\ServerRequestInterface;
60use Psr\Http\Server\RequestHandlerInterface;
61
62use function array_key_exists;
63use function array_slice;
64use function e;
65use function implode;
66use function in_array;
67use function preg_match;
68use function route;
69use function str_contains;
70use function str_starts_with;
71use function strtoupper;
72use function substr_count;
73
74/**
75 * Check a tree for errors.
76 */
77class CheckTree implements RequestHandlerInterface
78{
79    use ViewResponseTrait;
80
81    private Gedcom $gedcom;
82
83    private TimeoutService $timeout_service;
84
85    /**
86     * @param Gedcom         $gedcom
87     * @param TimeoutService $timeout_service
88     */
89    public function __construct(Gedcom $gedcom, TimeoutService $timeout_service)
90    {
91        $this->gedcom          = $gedcom;
92        $this->timeout_service = $timeout_service;
93    }
94
95    /**
96     * @param ServerRequestInterface $request
97     *
98     * @return ResponseInterface
99     */
100    public function handle(ServerRequestInterface $request): ResponseInterface
101    {
102        $this->layout = 'layouts/administration';
103
104        $tree    = Validator::attributes($request)->tree();
105        $skip_to = Validator::queryParams($request)->string('skip_to', '');
106
107        // We need to work with raw GEDCOM data, as we are looking for errors
108        // which may prevent the GedcomRecord objects from working.
109
110        $q1 = DB::table('individuals')
111            ->where('i_file', '=', $tree->id())
112            ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]);
113        $q2 = DB::table('families')
114            ->where('f_file', '=', $tree->id())
115            ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]);
116        $q3 = DB::table('media')
117            ->where('m_file', '=', $tree->id())
118            ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]);
119        $q4 = DB::table('sources')
120            ->where('s_file', '=', $tree->id())
121            ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]);
122        $q5 = DB::table('other')
123            ->where('o_file', '=', $tree->id())
124            ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']);
125        $q6 = DB::table('change')
126            ->where('gedcom_id', '=', $tree->id())
127            ->where('status', '=', 'pending')
128            ->orderBy('change_id')
129            ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]);
130
131        $rows = $q1
132            ->unionAll($q2)
133            ->unionAll($q3)
134            ->unionAll($q4)
135            ->unionAll($q5)
136            ->unionAll($q6)
137            ->get()
138            ->map(static function (object $row): object {
139                // Extract type for pending record
140                if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match) === 1) {
141                    $row->type = $match[1];
142                }
143
144                return $row;
145            });
146
147        $records = [];
148        $xrefs   = [];
149
150        foreach ($rows as $row) {
151            if ($row->gedcom !== '') {
152                // existing or updated record
153                $records[$row->xref] = $row;
154            } else {
155                // deleted record
156                unset($records[$row->xref]);
157            }
158
159            $xrefs[strtoupper($row->xref)] = $row->xref;
160        }
161
162        unset($rows);
163
164        $errors   = [];
165        $warnings = [];
166        $infos    = [];
167
168        $element_factory = new ElementFactory();
169        $this->gedcom->registerTags($element_factory, false);
170
171        foreach ($records as $record) {
172            // If we are nearly out of time, then stop processing here
173            if ($skip_to === $record->xref) {
174                $skip_to = '';
175            } elseif ($skip_to !== '') {
176                continue;
177            } elseif ($this->timeout_service->isTimeNearlyUp()) {
178                $skip_to = $record->xref;
179                break;
180            }
181
182            $lines = explode("\n", $record->gedcom);
183            array_shift($lines);
184
185            $last_level = 0;
186            $hierarchy  = [$record->type];
187
188            foreach ($lines as $line_number => $line) {
189                if (preg_match('/^(\d+) (\w+) ?(.*)/', $line, $match) !== 1) {
190                    $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, I18N::translate('Invalid GEDCOM record.'));
191                    break;
192                }
193
194                $level = (int) $match[1];
195                if ($level > $last_level + 1) {
196                    $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, I18N::translate('Invalid GEDCOM level number.'));
197                    break;
198                }
199
200                $tag               = $match[2];
201                $value             = $match[3];
202                $hierarchy[$level] = $tag;
203                $full_tag          = implode(':', array_slice($hierarchy, 0, 1 + $level));
204                $element           = $element_factory->make($full_tag);
205                $last_level        = $level;
206
207                if ($tag === 'CONT') {
208                    $element = new SubmitterText('CONT');
209                }
210
211                if ($element instanceof UnknownElement) {
212                    if (str_starts_with($tag, '_') || str_starts_with($full_tag, '_') || str_contains($full_tag, ':_')) {
213                        $message    = I18N::translate('Custom GEDCOM tags are discouraged. Try to use only standard GEDCOM tags.');
214                        $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
215                    } else {
216                        $message  = I18N::translate('Invalid GEDCOM tag.') . ' ' . $full_tag;
217                        $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
218                    }
219                } elseif ($element instanceof AbstractXrefElement) {
220                    if (preg_match('/@(' . Gedcom::REGEX_XREF . ')@/', $value, $match) === 1) {
221                        $xref1  = $match[1];
222                        $xref2  = $xrefs[strtoupper($xref1)] ?? null;
223                        $linked = $records[$xref2] ?? null;
224
225                        if ($linked === null) {
226                            $message  = I18N::translate('%s does not exist.', e($xref1));
227                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
228                        } elseif ($element instanceof XrefFamily && $linked->type !== Family::RECORD_TYPE) {
229                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Family::RECORD_TYPE);
230                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
231                        } elseif ($element instanceof XrefIndividual && $linked->type !== Individual::RECORD_TYPE) {
232                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Individual::RECORD_TYPE);
233                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
234                        } elseif ($element instanceof XrefMedia && $linked->type !== Media::RECORD_TYPE) {
235                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Media::RECORD_TYPE);
236                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
237                        } elseif ($element instanceof XrefNote && $linked->type !== Note::RECORD_TYPE) {
238                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Note::RECORD_TYPE);
239                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
240                        } elseif ($element instanceof XrefSource && $linked->type !== Source::RECORD_TYPE) {
241                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Source::RECORD_TYPE);
242                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
243                        } elseif ($element instanceof XrefRepository && $linked->type !== Repository::RECORD_TYPE) {
244                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Repository::RECORD_TYPE);
245                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
246                        } elseif ($element instanceof XrefSubmitter && $linked->type !== Submitter::RECORD_TYPE) {
247                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Submitter::RECORD_TYPE);
248                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
249                        } elseif ($element instanceof XrefSubmission && $linked->type !== Submission::RECORD_TYPE) {
250                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Submission::RECORD_TYPE);
251                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
252                        } elseif ($element instanceof XrefLocation && $linked->type !== Location::RECORD_TYPE) {
253                            $message  = $this->linkErrorMessage($tree, $xref1, $linked->type, Location::RECORD_TYPE);
254                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
255                        } elseif (($full_tag === 'FAM:HUSB' || $full_tag === 'FAM:WIFE') && !str_contains($linked->gedcom, "\n1 FAMS @" . $record->xref . '@')) {
256                            $link1    = $this->recordLink($tree, $linked->xref);
257                            $link2    = $this->recordLink($tree, $record->xref);
258                            $message  = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2);
259                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
260                        } elseif ($full_tag === 'FAM:CHIL' && !str_contains($linked->gedcom, "\n1 FAMC @" . $record->xref . '@')) {
261                            $link1    = $this->recordLink($tree, $linked->xref);
262                            $link2    = $this->recordLink($tree, $record->xref);
263                            $message  = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2);
264                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
265                        } elseif ($full_tag === 'INDI:FAMC' && !str_contains($linked->gedcom, "\n1 CHIL @" . $record->xref . '@')) {
266                            $link1    = $this->recordLink($tree, $linked->xref);
267                            $link2    = $this->recordLink($tree, $record->xref);
268                            $message  = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2);
269                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
270                        } elseif ($full_tag === 'INDI:FAMS' && !str_contains($linked->gedcom, "\n1 HUSB @" . $record->xref . '@') && !str_contains($linked->gedcom, "\n1 WIFE @" . $record->xref . '@')) {
271                            $link1    = $this->recordLink($tree, $linked->xref);
272                            $link2    = $this->recordLink($tree, $record->xref);
273                            $message  = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2);
274                            $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
275                        } elseif ($xref1 !== $xref2) {
276                            $message    = I18N::translate('%1$s does not exist. Did you mean %2$s?', e($xref1), e($xref2));
277                            $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
278                        }
279                    } elseif ($tag === 'SOUR') {
280                        $message    = I18N::translate('Inline-source records are discouraged.');
281                        $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
282                    } else {
283                        $message  = I18N::translate('Invalid GEDCOM value.');
284                        $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
285                    }
286                } elseif ($element->canonical($value) !== $value) {
287                    $expected = e($element->canonical($value));
288                    $actual   = strtr(e($value), ["\t" => '&rarr;']);
289                    $message  = I18N::translate('“%1$s” should be “%2$s”.', $actual, $expected);
290                    $infos[]  = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
291                } elseif ($element instanceof MultimediaFormat) {
292                    $mime = Mime::TYPES[$value] ?? Mime::DEFAULT_TYPE;
293
294                    if ($mime === Mime::DEFAULT_TYPE) {
295                        $message    = I18N::translate('webtrees does not recognise this file format.');
296                        $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
297                    } elseif (str_starts_with($mime, 'image/') && !array_key_exists($mime, ImageFactory::SUPPORTED_FORMATS)) {
298                        $message    = I18N::translate('webtrees cannot create thumbnails for this file format.');
299                        $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
300                    }
301                } elseif ($element instanceof MultimediaFileReference && $value === 'gedcom.ged') {
302                    $message  = I18N::translate('This filename is not compatible with the GEDZIP file format.');
303                    $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message);
304                }
305            }
306
307            if ($record->type === Family::RECORD_TYPE) {
308                if (substr_count($record->gedcom, "\n1 HUSB @") > 1) {
309                    $message  = I18N::translate('%s occurs too many times.', 'FAM:HUSB');
310                    $errors[] = $this->recordError($tree, $record->type, $record->xref, $message);
311                }
312                if (substr_count($record->gedcom, "\n1 WIFE @") > 1) {
313                    $message  = I18N::translate('%s occurs too many times.', 'FAM:WIFE');
314                    $errors[] = $this->recordError($tree, $record->type, $record->xref, $message);
315                }
316            }
317        }
318
319        $title = I18N::translate('Check for errors') . ' — ' . e($tree->title());
320
321        if ($skip_to === '') {
322            $more_url = '';
323        } else {
324            $more_url = route(self::class, ['tree' => $tree->name(), 'skip_to' => $skip_to]);
325        }
326
327        return $this->viewResponse('admin/trees-check', [
328            'errors'   => $errors,
329            'infos'    => $infos,
330            'more_url' => $more_url,
331            'title'    => $title,
332            'tree'     => $tree,
333            'warnings' => $warnings,
334        ]);
335    }
336
337    /**
338     * @param string $type
339     *
340     * @return string
341     */
342    private function recordType(string $type): string
343    {
344        $types = [
345            Family::RECORD_TYPE     => I18N::translate('Family'),
346            Header::RECORD_TYPE     => I18N::translate('Header'),
347            Individual::RECORD_TYPE => I18N::translate('Individual'),
348            Location::RECORD_TYPE   => I18N::translate('Location'),
349            Media::RECORD_TYPE      => I18N::translate('Media object'),
350            Note::RECORD_TYPE       => I18N::translate('Note'),
351            Repository::RECORD_TYPE => I18N::translate('Repository'),
352            Source::RECORD_TYPE     => I18N::translate('Source'),
353            Submission::RECORD_TYPE => I18N::translate('Submission'),
354            Submitter::RECORD_TYPE  => I18N::translate('Submitter'),
355        ];
356
357        return $types[$type] ?? e($type);
358    }
359
360    /**
361     * @param Tree   $tree
362     * @param string $xref
363     *
364     * @return string
365     */
366    private function recordLink(Tree $tree, string $xref): string
367    {
368        $url = route(GedcomRecordPage::class, ['xref' => $xref, 'tree' => $tree->name()]);
369
370        return '<a href="' . e($url) . '">' . e($xref) . '</a>';
371    }
372
373    /**
374     * Format a link to a record.
375     *
376     * @param Tree   $tree
377     * @param string $type
378     * @param string $xref
379     * @param int    $line_number
380     * @param string $line
381     * @param string $message
382     *
383     * @return string
384     */
385    private function lineError(Tree $tree, string $type, string $xref, int $line_number, string $line, string $message): string
386    {
387        return
388            I18N::translate('%1$s: %2$s', $this->recordType($type), $this->recordLink($tree, $xref)) .
389            ' — ' .
390            I18N::translate('%1$s: %2$s', I18N::translate('Line number'), I18N::number($line_number)) .
391            ' — ' .
392            '<code>' . e($line) . '</code>' .
393            '<br>' . $message;
394    }
395
396    /**
397     * Format a link to a record.
398     *
399     * @param Tree   $tree
400     * @param string $type
401     * @param string $xref
402     * @param string $message
403     *
404     * @return string
405     */
406    private function recordError(Tree $tree, string $type, string $xref, string $message): string
407    {
408        return I18N::translate('%1$s: %2$s', $this->recordType($type), $this->recordLink($tree, $xref)) . ' — ' . $message;
409    }
410
411    /**
412     * @param Tree   $tree
413     * @param string $xref
414     * @param string $type1
415     * @param string $type2
416     *
417     * @return string
418     */
419    private function linkErrorMessage(Tree $tree, string $xref, string $type1, string $type2): string
420    {
421        $link  = $this->recordLink($tree, $xref);
422        $type1 = $this->recordType($type1);
423        $type2 = $this->recordType($type2);
424
425        return I18N::translate('%1$s is a %2$s but a %3$s is expected.', $link, $type1, $type2);
426    }
427}
428