1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Fisharebest\Webtrees\Gedcom; 23use Fisharebest\Webtrees\Header; 24use Fisharebest\Webtrees\Http\ViewResponseTrait; 25use Fisharebest\Webtrees\I18N; 26use Fisharebest\Webtrees\Tree; 27use Illuminate\Database\Capsule\Manager as DB; 28use Illuminate\Database\Query\Expression; 29use Psr\Http\Message\ResponseInterface; 30use Psr\Http\Message\ServerRequestInterface; 31use Psr\Http\Server\RequestHandlerInterface; 32 33use function array_key_exists; 34use function assert; 35use function e; 36use function in_array; 37use function preg_match; 38use function preg_match_all; 39use function route; 40use function strtoupper; 41 42use const PREG_SET_ORDER; 43 44/** 45 * Check a tree for errors. 46 */ 47class CheckTree implements RequestHandlerInterface 48{ 49 use ViewResponseTrait; 50 51 /** 52 * @param ServerRequestInterface $request 53 * 54 * @return ResponseInterface 55 */ 56 public function handle(ServerRequestInterface $request): ResponseInterface 57 { 58 $this->layout = 'layouts/administration'; 59 60 $tree = $request->getAttribute('tree'); 61 assert($tree instanceof Tree); 62 63 // We need to work with raw GEDCOM data, as we are looking for errors 64 // which may prevent the GedcomRecord objects from working. 65 66 $q1 = DB::table('individuals') 67 ->where('i_file', '=', $tree->id()) 68 ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]); 69 $q2 = DB::table('families') 70 ->where('f_file', '=', $tree->id()) 71 ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]); 72 $q3 = DB::table('media') 73 ->where('m_file', '=', $tree->id()) 74 ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]); 75 $q4 = DB::table('sources') 76 ->where('s_file', '=', $tree->id()) 77 ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]); 78 $q5 = DB::table('other') 79 ->where('o_file', '=', $tree->id()) 80 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 81 ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']); 82 $q6 = DB::table('change') 83 ->where('gedcom_id', '=', $tree->id()) 84 ->where('status', '=', 'pending') 85 ->orderBy('change_id') 86 ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]); 87 88 $rows = $q1 89 ->unionAll($q2) 90 ->unionAll($q3) 91 ->unionAll($q4) 92 ->unionAll($q5) 93 ->unionAll($q6) 94 ->get() 95 ->map(static function (object $row): object { 96 // Extract type for pending record 97 if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match)) { 98 $row->type = $match[1]; 99 } 100 101 return $row; 102 }); 103 104 $records = []; 105 106 foreach ($rows as $row) { 107 if ($row->gedcom !== '') { 108 // existing or updated record 109 $records[$row->xref] = $row; 110 } else { 111 // deleted record 112 unset($records[$row->xref]); 113 } 114 } 115 116 // LOOK FOR BROKEN LINKS 117 $XREF_LINKS = [ 118 'NOTE' => 'NOTE', 119 'SOUR' => 'SOUR', 120 'REPO' => 'REPO', 121 'OBJE' => 'OBJE', 122 'SUBM' => 'SUBM', 123 'FAMC' => 'FAM', 124 'FAMS' => 'FAM', 125 //'ADOP'=>'FAM', // Need to handle this case specially. We may have both ADOP and FAMC links to the same FAM, but only store one. 126 'HUSB' => 'INDI', 127 'WIFE' => 'INDI', 128 'CHIL' => 'INDI', 129 'ASSO' => 'INDI', 130 '_ASSO' => 'INDI', 131 // A webtrees extension 132 'ALIA' => 'INDI', 133 'AUTH' => 'INDI', 134 // A webtrees extension 135 'ANCI' => 'SUBM', 136 'DESI' => 'SUBM', 137 '_WT_OBJE_SORT' => 'OBJE', 138 '_LOC' => '_LOC', 139 ]; 140 141 $RECORD_LINKS = [ 142 'INDI' => [ 143 'NOTE', 144 'OBJE', 145 'SOUR', 146 'SUBM', 147 'ASSO', 148 '_ASSO', 149 'FAMC', 150 'FAMS', 151 'ALIA', 152 '_WT_OBJE_SORT', 153 '_LOC', 154 ], 155 'FAM' => [ 156 'NOTE', 157 'OBJE', 158 'SOUR', 159 'SUBM', 160 'ASSO', 161 '_ASSO', 162 'HUSB', 163 'WIFE', 164 'CHIL', 165 '_LOC', 166 ], 167 'SOUR' => [ 168 'NOTE', 169 'OBJE', 170 'REPO', 171 'AUTH', 172 ], 173 'REPO' => ['NOTE'], 174 'OBJE' => ['NOTE'], 175 // The spec also allows SOUR, but we treat this as a warning 176 'NOTE' => [], 177 // The spec also allows SOUR, but we treat this as a warning 178 'SUBM' => [ 179 'NOTE', 180 'OBJE', 181 ], 182 'SUBN' => ['SUBM'], 183 '_LOC' => [ 184 'SOUR', 185 'OBJE', 186 '_LOC', 187 'NOTE', 188 ], 189 ]; 190 191 $errors = []; 192 $warnings = []; 193 194 // Generate lists of all links 195 $all_links = []; 196 $upper_links = []; 197 foreach ($records as $record) { 198 $all_links[$record->xref] = []; 199 $upper_links[strtoupper($record->xref)] = $record->xref; 200 preg_match_all('/\n\d (' . Gedcom::REGEX_TAG . ') @([^#@\n][^\n@]*)@/', $record->gedcom, $matches, PREG_SET_ORDER); 201 foreach ($matches as $match) { 202 $all_links[$record->xref][$match[2]] = $match[1]; 203 } 204 } 205 206 foreach ($all_links as $xref1 => $links) { 207 // PHP converts array keys to integers. 208 $xref1 = (string) $xref1; 209 210 $type1 = $records[$xref1]->type; 211 foreach ($links as $xref2 => $type2) { 212 // PHP converts array keys to integers. 213 $xref2 = (string) $xref2; 214 215 $type3 = isset($records[$xref2]) ? $records[$xref2]->type : ''; 216 if (!array_key_exists($xref2, $all_links)) { 217 if (array_key_exists(strtoupper($xref2), $upper_links)) { 218 $warnings[] = 219 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 220 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 221 I18N::translate('%1$s does not exist. Did you mean %2$s?', $this->checkLink($tree, $xref2), $this->checkLink($tree, $upper_links[strtoupper($xref2)])); 222 } else { 223 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 224 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not exist.', $this->checkLink($tree, $xref2)); 225 } 226 } elseif ($type2 === 'SOUR' && $type1 === 'NOTE') { 227 // Notes are intended to add explanations and comments to other records. They should not have their own sources. 228 } elseif ($type2 === 'SOUR' && $type1 === 'OBJE') { 229 // Media objects are intended to illustrate other records, facts, and source/citations. They should not have their own sources. 230 } elseif ($type2 === 'OBJE' && $type1 === 'REPO') { 231 $warnings[] = 232 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 233 ' ' . 234 I18N::translate('This type of link is not allowed here.'); 235 } elseif (!array_key_exists($type1, $RECORD_LINKS) || !in_array($type2, $RECORD_LINKS[$type1], true) || !array_key_exists($type2, $XREF_LINKS)) { 236 $errors[] = 237 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 238 ' ' . 239 I18N::translate('This type of link is not allowed here.'); 240 } elseif ($XREF_LINKS[$type2] !== $type3) { 241 // Target XREF does exist - but is invalid 242 $errors[] = 243 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 244 /* I18N: %1$s is an internal ID number such as R123. %2$s and %3$s are record types, such as INDI or SOUR */ 245 I18N::translate('%1$s is a %2$s but a %3$s is expected.', $this->checkLink($tree, $xref2), $this->formatType($type3), $this->formatType($type2)); 246 } elseif ( 247 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMC', ['CHIL']) || 248 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMS', ['HUSB', 'WIFE']) || 249 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'CHIL', ['FAMC']) || 250 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'HUSB', ['FAMS']) || 251 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'WIFE', ['FAMS']) 252 ) { 253 /* I18N: %1$s and %2$s are internal ID numbers such as R123 */ 254 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not have a link back to %2$s.', $this->checkLink($tree, $xref2), $this->checkLink($tree, $xref1)); 255 } 256 } 257 } 258 259 $title = I18N::translate('Check for errors') . ' — ' . e($tree->title()); 260 261 return $this->viewResponse('admin/trees-check', [ 262 'errors' => $errors, 263 'title' => $title, 264 'tree' => $tree, 265 'warnings' => $warnings, 266 ]); 267 } 268 269 /** 270 * @param string $type 271 * @param array<array<string>> $links 272 * @param string $xref1 273 * @param string $xref2 274 * @param string $link 275 * @param array<string> $reciprocal 276 * 277 * @return bool 278 */ 279 private function checkReverseLink(string $type, array $links, string $xref1, string $xref2, string $link, array $reciprocal): bool 280 { 281 return $type === $link && (!array_key_exists($xref1, $links[$xref2]) || !in_array($links[$xref2][$xref1], $reciprocal, true)); 282 } 283 284 /** 285 * Create a message linking one record to another. 286 * 287 * @param Tree $tree 288 * @param string $type1 289 * @param string $xref1 290 * @param string $type2 291 * @param string $xref2 292 * 293 * @return string 294 */ 295 private function checkLinkMessage(Tree $tree, string $type1, string $xref1, string $type2, string $xref2): string 296 { 297 /* I18N: The placeholders are GEDCOM XREFs and tags. e.g. “INDI I123 contains a FAMC link to F234.” */ 298 return I18N::translate( 299 '%1$s %2$s has a %3$s link to %4$s.', 300 $this->formatType($type1), 301 $this->checkLink($tree, $xref1), 302 $this->formatType($type2), 303 $this->checkLink($tree, $xref2) 304 ); 305 } 306 307 /** 308 * Format a link to a record. 309 * 310 * @param Tree $tree 311 * @param string $xref 312 * 313 * @return string 314 */ 315 private function checkLink(Tree $tree, string $xref): string 316 { 317 return '<b><a href="' . e(route(GedcomRecordPage::class, [ 318 'xref' => $xref, 319 'tree' => $tree->name(), 320 ])) . '">' . $xref . '</a></b>'; 321 } 322 323 /** 324 * Format a record type. 325 * 326 * @param string $type 327 * 328 * @return string 329 */ 330 private function formatType(string $type): string 331 { 332 return '<b>' . $type . '</b>'; 333 } 334} 335