xref: /webtrees/app/Http/RequestHandlers/CheckTree.php (revision 24f2a3af38709f9bf0a739b30264240d20ba34e8)
16fd01894SGreg Roach<?php
26fd01894SGreg Roach
36fd01894SGreg Roach/**
46fd01894SGreg Roach * webtrees: online genealogy
589f7189bSGreg Roach * Copyright (C) 2021 webtrees development team
66fd01894SGreg Roach * This program is free software: you can redistribute it and/or modify
76fd01894SGreg Roach * it under the terms of the GNU General Public License as published by
86fd01894SGreg Roach * the Free Software Foundation, either version 3 of the License, or
96fd01894SGreg Roach * (at your option) any later version.
106fd01894SGreg Roach * This program is distributed in the hope that it will be useful,
116fd01894SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
126fd01894SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
136fd01894SGreg Roach * GNU General Public License for more details.
146fd01894SGreg Roach * You should have received a copy of the GNU General Public License
1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
166fd01894SGreg Roach */
176fd01894SGreg Roach
186fd01894SGreg Roachdeclare(strict_types=1);
196fd01894SGreg Roach
206fd01894SGreg Roachnamespace Fisharebest\Webtrees\Http\RequestHandlers;
216fd01894SGreg Roach
226fd01894SGreg Roachuse Fisharebest\Webtrees\Gedcom;
236fd01894SGreg Roachuse Fisharebest\Webtrees\GedcomTag;
246fd01894SGreg Roachuse Fisharebest\Webtrees\Header;
256fd01894SGreg Roachuse Fisharebest\Webtrees\Http\ViewResponseTrait;
266fd01894SGreg Roachuse Fisharebest\Webtrees\I18N;
276fd01894SGreg Roachuse Fisharebest\Webtrees\Tree;
286fd01894SGreg Roachuse Illuminate\Database\Capsule\Manager as DB;
296fd01894SGreg Roachuse Illuminate\Database\Query\Expression;
306fd01894SGreg Roachuse Psr\Http\Message\ResponseInterface;
316fd01894SGreg Roachuse Psr\Http\Message\ServerRequestInterface;
326fd01894SGreg Roachuse Psr\Http\Server\RequestHandlerInterface;
336fd01894SGreg Roachuse stdClass;
346fd01894SGreg Roach
356fd01894SGreg Roachuse function array_key_exists;
366fd01894SGreg Roachuse function assert;
376fd01894SGreg Roachuse function e;
386fd01894SGreg Roachuse function in_array;
396fd01894SGreg Roachuse function preg_match;
406fd01894SGreg Roachuse function preg_match_all;
416fd01894SGreg Roachuse function route;
426fd01894SGreg Roachuse function strtoupper;
436fd01894SGreg Roach
446fd01894SGreg Roachuse const PREG_SET_ORDER;
456fd01894SGreg Roach
466fd01894SGreg Roach/**
476fd01894SGreg Roach * Check a tree for errors.
486fd01894SGreg Roach */
496fd01894SGreg Roachclass CheckTree implements RequestHandlerInterface
506fd01894SGreg Roach{
516fd01894SGreg Roach    use ViewResponseTrait;
526fd01894SGreg Roach
536fd01894SGreg Roach    /**
546fd01894SGreg Roach     * @param ServerRequestInterface $request
556fd01894SGreg Roach     *
566fd01894SGreg Roach     * @return ResponseInterface
576fd01894SGreg Roach     */
586fd01894SGreg Roach    public function handle(ServerRequestInterface $request): ResponseInterface
596fd01894SGreg Roach    {
606fd01894SGreg Roach        $this->layout = 'layouts/administration';
616fd01894SGreg Roach
626fd01894SGreg Roach        $tree = $request->getAttribute('tree');
636fd01894SGreg Roach        assert($tree instanceof Tree);
646fd01894SGreg Roach
656fd01894SGreg Roach        // We need to work with raw GEDCOM data, as we are looking for errors
666fd01894SGreg Roach        // which may prevent the GedcomRecord objects from working.
676fd01894SGreg Roach
686fd01894SGreg Roach        $q1 = DB::table('individuals')
696fd01894SGreg Roach            ->where('i_file', '=', $tree->id())
706fd01894SGreg Roach            ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]);
716fd01894SGreg Roach        $q2 = DB::table('families')
726fd01894SGreg Roach            ->where('f_file', '=', $tree->id())
736fd01894SGreg Roach            ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]);
746fd01894SGreg Roach        $q3 = DB::table('media')
756fd01894SGreg Roach            ->where('m_file', '=', $tree->id())
766fd01894SGreg Roach            ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]);
776fd01894SGreg Roach        $q4 = DB::table('sources')
786fd01894SGreg Roach            ->where('s_file', '=', $tree->id())
796fd01894SGreg Roach            ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]);
806fd01894SGreg Roach        $q5 = DB::table('other')
816fd01894SGreg Roach            ->where('o_file', '=', $tree->id())
826fd01894SGreg Roach            ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR'])
836fd01894SGreg Roach            ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']);
846fd01894SGreg Roach        $q6 = DB::table('change')
856fd01894SGreg Roach            ->where('gedcom_id', '=', $tree->id())
866fd01894SGreg Roach            ->where('status', '=', 'pending')
876fd01894SGreg Roach            ->orderBy('change_id')
886fd01894SGreg Roach            ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]);
896fd01894SGreg Roach
906fd01894SGreg Roach        $rows = $q1
916fd01894SGreg Roach            ->unionAll($q2)
926fd01894SGreg Roach            ->unionAll($q3)
936fd01894SGreg Roach            ->unionAll($q4)
946fd01894SGreg Roach            ->unionAll($q5)
956fd01894SGreg Roach            ->unionAll($q6)
966fd01894SGreg Roach            ->get()
976fd01894SGreg Roach            ->map(static function (stdClass $row): stdClass {
986fd01894SGreg Roach                // Extract type for pending record
996fd01894SGreg Roach                if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match)) {
1006fd01894SGreg Roach                    $row->type = $match[1];
1016fd01894SGreg Roach                }
1026fd01894SGreg Roach
1036fd01894SGreg Roach                return $row;
1046fd01894SGreg Roach            });
1056fd01894SGreg Roach
1066fd01894SGreg Roach        $records = [];
1076fd01894SGreg Roach
1086fd01894SGreg Roach        foreach ($rows as $row) {
1096fd01894SGreg Roach            if ($row->gedcom !== '') {
1106fd01894SGreg Roach                // existing or updated record
1116fd01894SGreg Roach                $records[$row->xref] = $row;
1126fd01894SGreg Roach            } else {
1136fd01894SGreg Roach                // deleted record
1146fd01894SGreg Roach                unset($records[$row->xref]);
1156fd01894SGreg Roach            }
1166fd01894SGreg Roach        }
1176fd01894SGreg Roach
1186fd01894SGreg Roach        // LOOK FOR BROKEN LINKS
1196fd01894SGreg Roach        $XREF_LINKS = [
1206fd01894SGreg Roach            'NOTE'          => 'NOTE',
1216fd01894SGreg Roach            'SOUR'          => 'SOUR',
1226fd01894SGreg Roach            'REPO'          => 'REPO',
1236fd01894SGreg Roach            'OBJE'          => 'OBJE',
1246fd01894SGreg Roach            'SUBM'          => 'SUBM',
1256fd01894SGreg Roach            'FAMC'          => 'FAM',
1266fd01894SGreg Roach            'FAMS'          => 'FAM',
1276fd01894SGreg Roach            //'ADOP'=>'FAM', // Need to handle this case specially. We may have both ADOP and FAMC links to the same FAM, but only store one.
1286fd01894SGreg Roach            'HUSB'          => 'INDI',
1296fd01894SGreg Roach            'WIFE'          => 'INDI',
1306fd01894SGreg Roach            'CHIL'          => 'INDI',
1316fd01894SGreg Roach            'ASSO'          => 'INDI',
1326fd01894SGreg Roach            '_ASSO'         => 'INDI',
1336fd01894SGreg Roach            // A webtrees extension
1346fd01894SGreg Roach            'ALIA'          => 'INDI',
1356fd01894SGreg Roach            'AUTH'          => 'INDI',
1366fd01894SGreg Roach            // A webtrees extension
1376fd01894SGreg Roach            'ANCI'          => 'SUBM',
1386fd01894SGreg Roach            'DESI'          => 'SUBM',
1396fd01894SGreg Roach            '_WT_OBJE_SORT' => 'OBJE',
1406fd01894SGreg Roach            '_LOC'          => '_LOC',
1416fd01894SGreg Roach        ];
1426fd01894SGreg Roach
1436fd01894SGreg Roach        $RECORD_LINKS = [
1446fd01894SGreg Roach            'INDI' => [
1456fd01894SGreg Roach                'NOTE',
1466fd01894SGreg Roach                'OBJE',
1476fd01894SGreg Roach                'SOUR',
1486fd01894SGreg Roach                'SUBM',
1496fd01894SGreg Roach                'ASSO',
1506fd01894SGreg Roach                '_ASSO',
1516fd01894SGreg Roach                'FAMC',
1526fd01894SGreg Roach                'FAMS',
1536fd01894SGreg Roach                'ALIA',
1546fd01894SGreg Roach                '_WT_OBJE_SORT',
1556fd01894SGreg Roach                '_LOC',
1566fd01894SGreg Roach            ],
1576fd01894SGreg Roach            'FAM'  => [
1586fd01894SGreg Roach                'NOTE',
1596fd01894SGreg Roach                'OBJE',
1606fd01894SGreg Roach                'SOUR',
1616fd01894SGreg Roach                'SUBM',
1626fd01894SGreg Roach                'ASSO',
1636fd01894SGreg Roach                '_ASSO',
1646fd01894SGreg Roach                'HUSB',
1656fd01894SGreg Roach                'WIFE',
1666fd01894SGreg Roach                'CHIL',
1676fd01894SGreg Roach                '_LOC',
1686fd01894SGreg Roach            ],
1696fd01894SGreg Roach            'SOUR' => [
1706fd01894SGreg Roach                'NOTE',
1716fd01894SGreg Roach                'OBJE',
1726fd01894SGreg Roach                'REPO',
1736fd01894SGreg Roach                'AUTH',
1746fd01894SGreg Roach            ],
1756fd01894SGreg Roach            'REPO' => ['NOTE'],
1766fd01894SGreg Roach            'OBJE' => ['NOTE'],
1776fd01894SGreg Roach            // The spec also allows SOUR, but we treat this as a warning
1786fd01894SGreg Roach            'NOTE' => [],
1796fd01894SGreg Roach            // The spec also allows SOUR, but we treat this as a warning
1806fd01894SGreg Roach            'SUBM' => [
1816fd01894SGreg Roach                'NOTE',
1826fd01894SGreg Roach                'OBJE',
1836fd01894SGreg Roach            ],
1846fd01894SGreg Roach            'SUBN' => ['SUBM'],
1856fd01894SGreg Roach            '_LOC' => [
1866fd01894SGreg Roach                'SOUR',
1876fd01894SGreg Roach                'OBJE',
1886fd01894SGreg Roach                '_LOC',
1896fd01894SGreg Roach                'NOTE',
1906fd01894SGreg Roach            ],
1916fd01894SGreg Roach        ];
1926fd01894SGreg Roach
1936fd01894SGreg Roach        $errors   = [];
1946fd01894SGreg Roach        $warnings = [];
1956fd01894SGreg Roach
1966fd01894SGreg Roach        // Generate lists of all links
1976fd01894SGreg Roach        $all_links   = [];
1986fd01894SGreg Roach        $upper_links = [];
1996fd01894SGreg Roach        foreach ($records as $record) {
2006fd01894SGreg Roach            $all_links[$record->xref]               = [];
2016fd01894SGreg Roach            $upper_links[strtoupper($record->xref)] = $record->xref;
2026fd01894SGreg Roach            preg_match_all('/\n\d (' . Gedcom::REGEX_TAG . ') @([^#@\n][^\n@]*)@/', $record->gedcom, $matches, PREG_SET_ORDER);
2036fd01894SGreg Roach            foreach ($matches as $match) {
2046fd01894SGreg Roach                $all_links[$record->xref][$match[2]] = $match[1];
2056fd01894SGreg Roach            }
2066fd01894SGreg Roach        }
2076fd01894SGreg Roach
2086fd01894SGreg Roach        foreach ($all_links as $xref1 => $links) {
2096fd01894SGreg Roach            // PHP converts array keys to integers.
2106fd01894SGreg Roach            $xref1 = (string) $xref1;
2116fd01894SGreg Roach
2126fd01894SGreg Roach            $type1 = $records[$xref1]->type;
2136fd01894SGreg Roach            foreach ($links as $xref2 => $type2) {
2146fd01894SGreg Roach                // PHP converts array keys to integers.
2156fd01894SGreg Roach                $xref2 = (string) $xref2;
2166fd01894SGreg Roach
2176fd01894SGreg Roach                $type3 = isset($records[$xref2]) ? $records[$xref2]->type : '';
2186fd01894SGreg Roach                if (!array_key_exists($xref2, $all_links)) {
2196fd01894SGreg Roach                    if (array_key_exists(strtoupper($xref2), $upper_links)) {
2206fd01894SGreg Roach                        $warnings[] =
2216fd01894SGreg Roach                            $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' .
2226fd01894SGreg Roach                            /* I18N: placeholders are GEDCOM XREFs, such as R123 */
2236fd01894SGreg Roach                            I18N::translate('%1$s does not exist. Did you mean %2$s?', $this->checkLink($tree, $xref2), $this->checkLink($tree, $upper_links[strtoupper($xref2)]));
2246fd01894SGreg Roach                    } else {
2256fd01894SGreg Roach                        /* I18N: placeholders are GEDCOM XREFs, such as R123 */
2266fd01894SGreg Roach                        $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not exist.', $this->checkLink($tree, $xref2));
2276fd01894SGreg Roach                    }
2286fd01894SGreg Roach                } elseif ($type2 === 'SOUR' && $type1 === 'NOTE') {
2296fd01894SGreg Roach                    // Notes are intended to add explanations and comments to other records. They should not have their own sources.
2306fd01894SGreg Roach                } elseif ($type2 === 'SOUR' && $type1 === 'OBJE') {
2316fd01894SGreg Roach                    // Media objects are intended to illustrate other records, facts, and source/citations. They should not have their own sources.
2326fd01894SGreg Roach                } elseif ($type2 === 'OBJE' && $type1 === 'REPO') {
2336fd01894SGreg Roach                    $warnings[] =
2346fd01894SGreg Roach                        $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) .
2356fd01894SGreg Roach                        ' ' .
2366fd01894SGreg Roach                        I18N::translate('This type of link is not allowed here.');
2376fd01894SGreg Roach                } elseif (!array_key_exists($type1, $RECORD_LINKS) || !in_array($type2, $RECORD_LINKS[$type1], true) || !array_key_exists($type2, $XREF_LINKS)) {
2386fd01894SGreg Roach                    $errors[] =
2396fd01894SGreg Roach                        $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) .
2406fd01894SGreg Roach                        ' ' .
2416fd01894SGreg Roach                        I18N::translate('This type of link is not allowed here.');
2426fd01894SGreg Roach                } elseif ($XREF_LINKS[$type2] !== $type3) {
2436fd01894SGreg Roach                    // Target XREF does exist - but is invalid
2446fd01894SGreg Roach                    $errors[] =
2456fd01894SGreg Roach                        $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' .
2466fd01894SGreg Roach                        /* I18N: %1$s is an internal ID number such as R123. %2$s and %3$s are record types, such as INDI or SOUR */
2476fd01894SGreg Roach                        I18N::translate('%1$s is a %2$s but a %3$s is expected.', $this->checkLink($tree, $xref2), $this->formatType($type3), $this->formatType($type2));
2486fd01894SGreg Roach                } elseif (
2496fd01894SGreg Roach                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMC', ['CHIL']) ||
2506fd01894SGreg Roach                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMS', ['HUSB', 'WIFE']) ||
2516fd01894SGreg Roach                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'CHIL', ['FAMC']) ||
2526fd01894SGreg Roach                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'HUSB', ['FAMS']) ||
2536fd01894SGreg Roach                    $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'WIFE', ['FAMS'])
2546fd01894SGreg Roach                ) {
2556fd01894SGreg Roach                    /* I18N: %1$s and %2$s are internal ID numbers such as R123 */
2566fd01894SGreg Roach                    $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not have a link back to %2$s.', $this->checkLink($tree, $xref2), $this->checkLink($tree, $xref1));
2576fd01894SGreg Roach                }
2586fd01894SGreg Roach            }
2596fd01894SGreg Roach        }
2606fd01894SGreg Roach
2616fd01894SGreg Roach        $title = I18N::translate('Check for errors') . ' — ' . e($tree->title());
2626fd01894SGreg Roach
2636fd01894SGreg Roach        return $this->viewResponse('admin/trees-check', [
2646fd01894SGreg Roach            'errors'   => $errors,
2656fd01894SGreg Roach            'title'    => $title,
2666fd01894SGreg Roach            'tree'     => $tree,
2676fd01894SGreg Roach            'warnings' => $warnings,
2686fd01894SGreg Roach        ]);
2696fd01894SGreg Roach    }
2706fd01894SGreg Roach
2716fd01894SGreg Roach    /**
2726fd01894SGreg Roach     * @param string     $type
2736fd01894SGreg Roach     * @param string[][] $links
2746fd01894SGreg Roach     * @param string     $xref1
2756fd01894SGreg Roach     * @param string     $xref2
2766fd01894SGreg Roach     * @param string     $link
2776fd01894SGreg Roach     * @param string[]   $reciprocal
2786fd01894SGreg Roach     *
2796fd01894SGreg Roach     * @return bool
2806fd01894SGreg Roach     */
2816fd01894SGreg Roach    private function checkReverseLink(string $type, array $links, string $xref1, string $xref2, string $link, array $reciprocal): bool
2826fd01894SGreg Roach    {
2836fd01894SGreg Roach        return $type === $link && (!array_key_exists($xref1, $links[$xref2]) || !in_array($links[$xref2][$xref1], $reciprocal, true));
2846fd01894SGreg Roach    }
2856fd01894SGreg Roach
2866fd01894SGreg Roach    /**
2876fd01894SGreg Roach     * Create a message linking one record to another.
2886fd01894SGreg Roach     *
2896fd01894SGreg Roach     * @param Tree   $tree
2906fd01894SGreg Roach     * @param string $type1
2916fd01894SGreg Roach     * @param string $xref1
2926fd01894SGreg Roach     * @param string $type2
2936fd01894SGreg Roach     * @param string $xref2
2946fd01894SGreg Roach     *
2956fd01894SGreg Roach     * @return string
2966fd01894SGreg Roach     */
297*24f2a3afSGreg Roach    private function checkLinkMessage(Tree $tree, string $type1, string $xref1, string $type2, string $xref2): string
2986fd01894SGreg Roach    {
2996fd01894SGreg Roach        /* I18N: The placeholders are GEDCOM XREFs and tags. e.g. “INDI I123 contains a FAMC link to F234.” */
3006fd01894SGreg Roach        return I18N::translate(
3016fd01894SGreg Roach            '%1$s %2$s has a %3$s link to %4$s.',
3026fd01894SGreg Roach            $this->formatType($type1),
3036fd01894SGreg Roach            $this->checkLink($tree, $xref1),
3046fd01894SGreg Roach            $this->formatType($type2),
3056fd01894SGreg Roach            $this->checkLink($tree, $xref2)
3066fd01894SGreg Roach        );
3076fd01894SGreg Roach    }
3086fd01894SGreg Roach
3096fd01894SGreg Roach    /**
3106fd01894SGreg Roach     * Format a link to a record.
3116fd01894SGreg Roach     *
3126fd01894SGreg Roach     * @param Tree   $tree
3136fd01894SGreg Roach     * @param string $xref
3146fd01894SGreg Roach     *
3156fd01894SGreg Roach     * @return string
3166fd01894SGreg Roach     */
3176fd01894SGreg Roach    private function checkLink(Tree $tree, string $xref): string
3186fd01894SGreg Roach    {
3196fd01894SGreg Roach        return '<b><a href="' . e(route(GedcomRecordPage::class, [
3206fd01894SGreg Roach                'xref' => $xref,
3216fd01894SGreg Roach                'tree' => $tree->name(),
3226fd01894SGreg Roach            ])) . '">' . $xref . '</a></b>';
3236fd01894SGreg Roach    }
3246fd01894SGreg Roach
3256fd01894SGreg Roach    /**
3266fd01894SGreg Roach     * Format a record type.
3276fd01894SGreg Roach     *
3286fd01894SGreg Roach     * @param string $type
3296fd01894SGreg Roach     *
3306fd01894SGreg Roach     * @return string
3316fd01894SGreg Roach     */
3326fd01894SGreg Roach    private function formatType(string $type): string
3336fd01894SGreg Roach    {
3346fd01894SGreg Roach        return '<b title="' . GedcomTag::getLabel($type) . '">' . $type . '</b>';
3356fd01894SGreg Roach    }
3366fd01894SGreg Roach}
337