16fd01894SGreg Roach<?php 26fd01894SGreg Roach 36fd01894SGreg Roach/** 46fd01894SGreg Roach * webtrees: online genealogy 589f7189bSGreg Roach * Copyright (C) 2021 webtrees development team 66fd01894SGreg Roach * This program is free software: you can redistribute it and/or modify 76fd01894SGreg Roach * it under the terms of the GNU General Public License as published by 86fd01894SGreg Roach * the Free Software Foundation, either version 3 of the License, or 96fd01894SGreg Roach * (at your option) any later version. 106fd01894SGreg Roach * This program is distributed in the hope that it will be useful, 116fd01894SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 126fd01894SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 136fd01894SGreg Roach * GNU General Public License for more details. 146fd01894SGreg Roach * You should have received a copy of the GNU General Public License 1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 166fd01894SGreg Roach */ 176fd01894SGreg Roach 186fd01894SGreg Roachdeclare(strict_types=1); 196fd01894SGreg Roach 206fd01894SGreg Roachnamespace Fisharebest\Webtrees\Http\RequestHandlers; 216fd01894SGreg Roach 226fd01894SGreg Roachuse Fisharebest\Webtrees\Gedcom; 236fd01894SGreg Roachuse Fisharebest\Webtrees\GedcomTag; 246fd01894SGreg Roachuse Fisharebest\Webtrees\Header; 256fd01894SGreg Roachuse Fisharebest\Webtrees\Http\ViewResponseTrait; 266fd01894SGreg Roachuse Fisharebest\Webtrees\I18N; 276fd01894SGreg Roachuse Fisharebest\Webtrees\Tree; 286fd01894SGreg Roachuse Illuminate\Database\Capsule\Manager as DB; 296fd01894SGreg Roachuse Illuminate\Database\Query\Expression; 306fd01894SGreg Roachuse Psr\Http\Message\ResponseInterface; 316fd01894SGreg Roachuse Psr\Http\Message\ServerRequestInterface; 326fd01894SGreg Roachuse Psr\Http\Server\RequestHandlerInterface; 336fd01894SGreg Roachuse stdClass; 346fd01894SGreg Roach 356fd01894SGreg Roachuse function array_key_exists; 366fd01894SGreg Roachuse function assert; 376fd01894SGreg Roachuse function e; 386fd01894SGreg Roachuse function in_array; 396fd01894SGreg Roachuse function preg_match; 406fd01894SGreg Roachuse function preg_match_all; 416fd01894SGreg Roachuse function route; 426fd01894SGreg Roachuse function strtoupper; 436fd01894SGreg Roach 446fd01894SGreg Roachuse const PREG_SET_ORDER; 456fd01894SGreg Roach 466fd01894SGreg Roach/** 476fd01894SGreg Roach * Check a tree for errors. 486fd01894SGreg Roach */ 496fd01894SGreg Roachclass CheckTree implements RequestHandlerInterface 506fd01894SGreg Roach{ 516fd01894SGreg Roach use ViewResponseTrait; 526fd01894SGreg Roach 536fd01894SGreg Roach /** 546fd01894SGreg Roach * @param ServerRequestInterface $request 556fd01894SGreg Roach * 566fd01894SGreg Roach * @return ResponseInterface 576fd01894SGreg Roach */ 586fd01894SGreg Roach public function handle(ServerRequestInterface $request): ResponseInterface 596fd01894SGreg Roach { 606fd01894SGreg Roach $this->layout = 'layouts/administration'; 616fd01894SGreg Roach 626fd01894SGreg Roach $tree = $request->getAttribute('tree'); 636fd01894SGreg Roach assert($tree instanceof Tree); 646fd01894SGreg Roach 656fd01894SGreg Roach // We need to work with raw GEDCOM data, as we are looking for errors 666fd01894SGreg Roach // which may prevent the GedcomRecord objects from working. 676fd01894SGreg Roach 686fd01894SGreg Roach $q1 = DB::table('individuals') 696fd01894SGreg Roach ->where('i_file', '=', $tree->id()) 706fd01894SGreg Roach ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]); 716fd01894SGreg Roach $q2 = DB::table('families') 726fd01894SGreg Roach ->where('f_file', '=', $tree->id()) 736fd01894SGreg Roach ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]); 746fd01894SGreg Roach $q3 = DB::table('media') 756fd01894SGreg Roach ->where('m_file', '=', $tree->id()) 766fd01894SGreg Roach ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]); 776fd01894SGreg Roach $q4 = DB::table('sources') 786fd01894SGreg Roach ->where('s_file', '=', $tree->id()) 796fd01894SGreg Roach ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]); 806fd01894SGreg Roach $q5 = DB::table('other') 816fd01894SGreg Roach ->where('o_file', '=', $tree->id()) 826fd01894SGreg Roach ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 836fd01894SGreg Roach ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']); 846fd01894SGreg Roach $q6 = DB::table('change') 856fd01894SGreg Roach ->where('gedcom_id', '=', $tree->id()) 866fd01894SGreg Roach ->where('status', '=', 'pending') 876fd01894SGreg Roach ->orderBy('change_id') 886fd01894SGreg Roach ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]); 896fd01894SGreg Roach 906fd01894SGreg Roach $rows = $q1 916fd01894SGreg Roach ->unionAll($q2) 926fd01894SGreg Roach ->unionAll($q3) 936fd01894SGreg Roach ->unionAll($q4) 946fd01894SGreg Roach ->unionAll($q5) 956fd01894SGreg Roach ->unionAll($q6) 966fd01894SGreg Roach ->get() 976fd01894SGreg Roach ->map(static function (stdClass $row): stdClass { 986fd01894SGreg Roach // Extract type for pending record 996fd01894SGreg Roach if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match)) { 1006fd01894SGreg Roach $row->type = $match[1]; 1016fd01894SGreg Roach } 1026fd01894SGreg Roach 1036fd01894SGreg Roach return $row; 1046fd01894SGreg Roach }); 1056fd01894SGreg Roach 1066fd01894SGreg Roach $records = []; 1076fd01894SGreg Roach 1086fd01894SGreg Roach foreach ($rows as $row) { 1096fd01894SGreg Roach if ($row->gedcom !== '') { 1106fd01894SGreg Roach // existing or updated record 1116fd01894SGreg Roach $records[$row->xref] = $row; 1126fd01894SGreg Roach } else { 1136fd01894SGreg Roach // deleted record 1146fd01894SGreg Roach unset($records[$row->xref]); 1156fd01894SGreg Roach } 1166fd01894SGreg Roach } 1176fd01894SGreg Roach 1186fd01894SGreg Roach // LOOK FOR BROKEN LINKS 1196fd01894SGreg Roach $XREF_LINKS = [ 1206fd01894SGreg Roach 'NOTE' => 'NOTE', 1216fd01894SGreg Roach 'SOUR' => 'SOUR', 1226fd01894SGreg Roach 'REPO' => 'REPO', 1236fd01894SGreg Roach 'OBJE' => 'OBJE', 1246fd01894SGreg Roach 'SUBM' => 'SUBM', 1256fd01894SGreg Roach 'FAMC' => 'FAM', 1266fd01894SGreg Roach 'FAMS' => 'FAM', 1276fd01894SGreg Roach //'ADOP'=>'FAM', // Need to handle this case specially. We may have both ADOP and FAMC links to the same FAM, but only store one. 1286fd01894SGreg Roach 'HUSB' => 'INDI', 1296fd01894SGreg Roach 'WIFE' => 'INDI', 1306fd01894SGreg Roach 'CHIL' => 'INDI', 1316fd01894SGreg Roach 'ASSO' => 'INDI', 1326fd01894SGreg Roach '_ASSO' => 'INDI', 1336fd01894SGreg Roach // A webtrees extension 1346fd01894SGreg Roach 'ALIA' => 'INDI', 1356fd01894SGreg Roach 'AUTH' => 'INDI', 1366fd01894SGreg Roach // A webtrees extension 1376fd01894SGreg Roach 'ANCI' => 'SUBM', 1386fd01894SGreg Roach 'DESI' => 'SUBM', 1396fd01894SGreg Roach '_WT_OBJE_SORT' => 'OBJE', 1406fd01894SGreg Roach '_LOC' => '_LOC', 1416fd01894SGreg Roach ]; 1426fd01894SGreg Roach 1436fd01894SGreg Roach $RECORD_LINKS = [ 1446fd01894SGreg Roach 'INDI' => [ 1456fd01894SGreg Roach 'NOTE', 1466fd01894SGreg Roach 'OBJE', 1476fd01894SGreg Roach 'SOUR', 1486fd01894SGreg Roach 'SUBM', 1496fd01894SGreg Roach 'ASSO', 1506fd01894SGreg Roach '_ASSO', 1516fd01894SGreg Roach 'FAMC', 1526fd01894SGreg Roach 'FAMS', 1536fd01894SGreg Roach 'ALIA', 1546fd01894SGreg Roach '_WT_OBJE_SORT', 1556fd01894SGreg Roach '_LOC', 1566fd01894SGreg Roach ], 1576fd01894SGreg Roach 'FAM' => [ 1586fd01894SGreg Roach 'NOTE', 1596fd01894SGreg Roach 'OBJE', 1606fd01894SGreg Roach 'SOUR', 1616fd01894SGreg Roach 'SUBM', 1626fd01894SGreg Roach 'ASSO', 1636fd01894SGreg Roach '_ASSO', 1646fd01894SGreg Roach 'HUSB', 1656fd01894SGreg Roach 'WIFE', 1666fd01894SGreg Roach 'CHIL', 1676fd01894SGreg Roach '_LOC', 1686fd01894SGreg Roach ], 1696fd01894SGreg Roach 'SOUR' => [ 1706fd01894SGreg Roach 'NOTE', 1716fd01894SGreg Roach 'OBJE', 1726fd01894SGreg Roach 'REPO', 1736fd01894SGreg Roach 'AUTH', 1746fd01894SGreg Roach ], 1756fd01894SGreg Roach 'REPO' => ['NOTE'], 1766fd01894SGreg Roach 'OBJE' => ['NOTE'], 1776fd01894SGreg Roach // The spec also allows SOUR, but we treat this as a warning 1786fd01894SGreg Roach 'NOTE' => [], 1796fd01894SGreg Roach // The spec also allows SOUR, but we treat this as a warning 1806fd01894SGreg Roach 'SUBM' => [ 1816fd01894SGreg Roach 'NOTE', 1826fd01894SGreg Roach 'OBJE', 1836fd01894SGreg Roach ], 1846fd01894SGreg Roach 'SUBN' => ['SUBM'], 1856fd01894SGreg Roach '_LOC' => [ 1866fd01894SGreg Roach 'SOUR', 1876fd01894SGreg Roach 'OBJE', 1886fd01894SGreg Roach '_LOC', 1896fd01894SGreg Roach 'NOTE', 1906fd01894SGreg Roach ], 1916fd01894SGreg Roach ]; 1926fd01894SGreg Roach 1936fd01894SGreg Roach $errors = []; 1946fd01894SGreg Roach $warnings = []; 1956fd01894SGreg Roach 1966fd01894SGreg Roach // Generate lists of all links 1976fd01894SGreg Roach $all_links = []; 1986fd01894SGreg Roach $upper_links = []; 1996fd01894SGreg Roach foreach ($records as $record) { 2006fd01894SGreg Roach $all_links[$record->xref] = []; 2016fd01894SGreg Roach $upper_links[strtoupper($record->xref)] = $record->xref; 2026fd01894SGreg Roach preg_match_all('/\n\d (' . Gedcom::REGEX_TAG . ') @([^#@\n][^\n@]*)@/', $record->gedcom, $matches, PREG_SET_ORDER); 2036fd01894SGreg Roach foreach ($matches as $match) { 2046fd01894SGreg Roach $all_links[$record->xref][$match[2]] = $match[1]; 2056fd01894SGreg Roach } 2066fd01894SGreg Roach } 2076fd01894SGreg Roach 2086fd01894SGreg Roach foreach ($all_links as $xref1 => $links) { 2096fd01894SGreg Roach // PHP converts array keys to integers. 2106fd01894SGreg Roach $xref1 = (string) $xref1; 2116fd01894SGreg Roach 2126fd01894SGreg Roach $type1 = $records[$xref1]->type; 2136fd01894SGreg Roach foreach ($links as $xref2 => $type2) { 2146fd01894SGreg Roach // PHP converts array keys to integers. 2156fd01894SGreg Roach $xref2 = (string) $xref2; 2166fd01894SGreg Roach 2176fd01894SGreg Roach $type3 = isset($records[$xref2]) ? $records[$xref2]->type : ''; 2186fd01894SGreg Roach if (!array_key_exists($xref2, $all_links)) { 2196fd01894SGreg Roach if (array_key_exists(strtoupper($xref2), $upper_links)) { 2206fd01894SGreg Roach $warnings[] = 2216fd01894SGreg Roach $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 2226fd01894SGreg Roach /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 2236fd01894SGreg Roach I18N::translate('%1$s does not exist. Did you mean %2$s?', $this->checkLink($tree, $xref2), $this->checkLink($tree, $upper_links[strtoupper($xref2)])); 2246fd01894SGreg Roach } else { 2256fd01894SGreg Roach /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 2266fd01894SGreg Roach $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not exist.', $this->checkLink($tree, $xref2)); 2276fd01894SGreg Roach } 2286fd01894SGreg Roach } elseif ($type2 === 'SOUR' && $type1 === 'NOTE') { 2296fd01894SGreg Roach // Notes are intended to add explanations and comments to other records. They should not have their own sources. 2306fd01894SGreg Roach } elseif ($type2 === 'SOUR' && $type1 === 'OBJE') { 2316fd01894SGreg Roach // Media objects are intended to illustrate other records, facts, and source/citations. They should not have their own sources. 2326fd01894SGreg Roach } elseif ($type2 === 'OBJE' && $type1 === 'REPO') { 2336fd01894SGreg Roach $warnings[] = 2346fd01894SGreg Roach $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 2356fd01894SGreg Roach ' ' . 2366fd01894SGreg Roach I18N::translate('This type of link is not allowed here.'); 2376fd01894SGreg Roach } elseif (!array_key_exists($type1, $RECORD_LINKS) || !in_array($type2, $RECORD_LINKS[$type1], true) || !array_key_exists($type2, $XREF_LINKS)) { 2386fd01894SGreg Roach $errors[] = 2396fd01894SGreg Roach $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 2406fd01894SGreg Roach ' ' . 2416fd01894SGreg Roach I18N::translate('This type of link is not allowed here.'); 2426fd01894SGreg Roach } elseif ($XREF_LINKS[$type2] !== $type3) { 2436fd01894SGreg Roach // Target XREF does exist - but is invalid 2446fd01894SGreg Roach $errors[] = 2456fd01894SGreg Roach $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 2466fd01894SGreg Roach /* I18N: %1$s is an internal ID number such as R123. %2$s and %3$s are record types, such as INDI or SOUR */ 2476fd01894SGreg Roach I18N::translate('%1$s is a %2$s but a %3$s is expected.', $this->checkLink($tree, $xref2), $this->formatType($type3), $this->formatType($type2)); 2486fd01894SGreg Roach } elseif ( 2496fd01894SGreg Roach $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMC', ['CHIL']) || 2506fd01894SGreg Roach $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMS', ['HUSB', 'WIFE']) || 2516fd01894SGreg Roach $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'CHIL', ['FAMC']) || 2526fd01894SGreg Roach $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'HUSB', ['FAMS']) || 2536fd01894SGreg Roach $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'WIFE', ['FAMS']) 2546fd01894SGreg Roach ) { 2556fd01894SGreg Roach /* I18N: %1$s and %2$s are internal ID numbers such as R123 */ 2566fd01894SGreg Roach $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not have a link back to %2$s.', $this->checkLink($tree, $xref2), $this->checkLink($tree, $xref1)); 2576fd01894SGreg Roach } 2586fd01894SGreg Roach } 2596fd01894SGreg Roach } 2606fd01894SGreg Roach 2616fd01894SGreg Roach $title = I18N::translate('Check for errors') . ' — ' . e($tree->title()); 2626fd01894SGreg Roach 2636fd01894SGreg Roach return $this->viewResponse('admin/trees-check', [ 2646fd01894SGreg Roach 'errors' => $errors, 2656fd01894SGreg Roach 'title' => $title, 2666fd01894SGreg Roach 'tree' => $tree, 2676fd01894SGreg Roach 'warnings' => $warnings, 2686fd01894SGreg Roach ]); 2696fd01894SGreg Roach } 2706fd01894SGreg Roach 2716fd01894SGreg Roach /** 2726fd01894SGreg Roach * @param string $type 2736fd01894SGreg Roach * @param string[][] $links 2746fd01894SGreg Roach * @param string $xref1 2756fd01894SGreg Roach * @param string $xref2 2766fd01894SGreg Roach * @param string $link 2776fd01894SGreg Roach * @param string[] $reciprocal 2786fd01894SGreg Roach * 2796fd01894SGreg Roach * @return bool 2806fd01894SGreg Roach */ 2816fd01894SGreg Roach private function checkReverseLink(string $type, array $links, string $xref1, string $xref2, string $link, array $reciprocal): bool 2826fd01894SGreg Roach { 2836fd01894SGreg Roach return $type === $link && (!array_key_exists($xref1, $links[$xref2]) || !in_array($links[$xref2][$xref1], $reciprocal, true)); 2846fd01894SGreg Roach } 2856fd01894SGreg Roach 2866fd01894SGreg Roach /** 2876fd01894SGreg Roach * Create a message linking one record to another. 2886fd01894SGreg Roach * 2896fd01894SGreg Roach * @param Tree $tree 2906fd01894SGreg Roach * @param string $type1 2916fd01894SGreg Roach * @param string $xref1 2926fd01894SGreg Roach * @param string $type2 2936fd01894SGreg Roach * @param string $xref2 2946fd01894SGreg Roach * 2956fd01894SGreg Roach * @return string 2966fd01894SGreg Roach */ 297*24f2a3afSGreg Roach private function checkLinkMessage(Tree $tree, string $type1, string $xref1, string $type2, string $xref2): string 2986fd01894SGreg Roach { 2996fd01894SGreg Roach /* I18N: The placeholders are GEDCOM XREFs and tags. e.g. “INDI I123 contains a FAMC link to F234.” */ 3006fd01894SGreg Roach return I18N::translate( 3016fd01894SGreg Roach '%1$s %2$s has a %3$s link to %4$s.', 3026fd01894SGreg Roach $this->formatType($type1), 3036fd01894SGreg Roach $this->checkLink($tree, $xref1), 3046fd01894SGreg Roach $this->formatType($type2), 3056fd01894SGreg Roach $this->checkLink($tree, $xref2) 3066fd01894SGreg Roach ); 3076fd01894SGreg Roach } 3086fd01894SGreg Roach 3096fd01894SGreg Roach /** 3106fd01894SGreg Roach * Format a link to a record. 3116fd01894SGreg Roach * 3126fd01894SGreg Roach * @param Tree $tree 3136fd01894SGreg Roach * @param string $xref 3146fd01894SGreg Roach * 3156fd01894SGreg Roach * @return string 3166fd01894SGreg Roach */ 3176fd01894SGreg Roach private function checkLink(Tree $tree, string $xref): string 3186fd01894SGreg Roach { 3196fd01894SGreg Roach return '<b><a href="' . e(route(GedcomRecordPage::class, [ 3206fd01894SGreg Roach 'xref' => $xref, 3216fd01894SGreg Roach 'tree' => $tree->name(), 3226fd01894SGreg Roach ])) . '">' . $xref . '</a></b>'; 3236fd01894SGreg Roach } 3246fd01894SGreg Roach 3256fd01894SGreg Roach /** 3266fd01894SGreg Roach * Format a record type. 3276fd01894SGreg Roach * 3286fd01894SGreg Roach * @param string $type 3296fd01894SGreg Roach * 3306fd01894SGreg Roach * @return string 3316fd01894SGreg Roach */ 3326fd01894SGreg Roach private function formatType(string $type): string 3336fd01894SGreg Roach { 3346fd01894SGreg Roach return '<b title="' . GedcomTag::getLabel($type) . '">' . $type . '</b>'; 3356fd01894SGreg Roach } 3366fd01894SGreg Roach} 337