1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Fisharebest\Webtrees\Gedcom; 23use Fisharebest\Webtrees\Header; 24use Fisharebest\Webtrees\Http\ViewResponseTrait; 25use Fisharebest\Webtrees\I18N; 26use Fisharebest\Webtrees\Tree; 27use Illuminate\Database\Capsule\Manager as DB; 28use Illuminate\Database\Query\Expression; 29use Psr\Http\Message\ResponseInterface; 30use Psr\Http\Message\ServerRequestInterface; 31use Psr\Http\Server\RequestHandlerInterface; 32use stdClass; 33 34use function array_key_exists; 35use function assert; 36use function e; 37use function in_array; 38use function preg_match; 39use function preg_match_all; 40use function route; 41use function strtoupper; 42 43use const PREG_SET_ORDER; 44 45/** 46 * Check a tree for errors. 47 */ 48class CheckTree implements RequestHandlerInterface 49{ 50 use ViewResponseTrait; 51 52 /** 53 * @param ServerRequestInterface $request 54 * 55 * @return ResponseInterface 56 */ 57 public function handle(ServerRequestInterface $request): ResponseInterface 58 { 59 $this->layout = 'layouts/administration'; 60 61 $tree = $request->getAttribute('tree'); 62 assert($tree instanceof Tree); 63 64 // We need to work with raw GEDCOM data, as we are looking for errors 65 // which may prevent the GedcomRecord objects from working. 66 67 $q1 = DB::table('individuals') 68 ->where('i_file', '=', $tree->id()) 69 ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]); 70 $q2 = DB::table('families') 71 ->where('f_file', '=', $tree->id()) 72 ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]); 73 $q3 = DB::table('media') 74 ->where('m_file', '=', $tree->id()) 75 ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]); 76 $q4 = DB::table('sources') 77 ->where('s_file', '=', $tree->id()) 78 ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]); 79 $q5 = DB::table('other') 80 ->where('o_file', '=', $tree->id()) 81 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 82 ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']); 83 $q6 = DB::table('change') 84 ->where('gedcom_id', '=', $tree->id()) 85 ->where('status', '=', 'pending') 86 ->orderBy('change_id') 87 ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]); 88 89 $rows = $q1 90 ->unionAll($q2) 91 ->unionAll($q3) 92 ->unionAll($q4) 93 ->unionAll($q5) 94 ->unionAll($q6) 95 ->get() 96 ->map(static function (stdClass $row): stdClass { 97 // Extract type for pending record 98 if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match)) { 99 $row->type = $match[1]; 100 } 101 102 return $row; 103 }); 104 105 $records = []; 106 107 foreach ($rows as $row) { 108 if ($row->gedcom !== '') { 109 // existing or updated record 110 $records[$row->xref] = $row; 111 } else { 112 // deleted record 113 unset($records[$row->xref]); 114 } 115 } 116 117 // LOOK FOR BROKEN LINKS 118 $XREF_LINKS = [ 119 'NOTE' => 'NOTE', 120 'SOUR' => 'SOUR', 121 'REPO' => 'REPO', 122 'OBJE' => 'OBJE', 123 'SUBM' => 'SUBM', 124 'FAMC' => 'FAM', 125 'FAMS' => 'FAM', 126 //'ADOP'=>'FAM', // Need to handle this case specially. We may have both ADOP and FAMC links to the same FAM, but only store one. 127 'HUSB' => 'INDI', 128 'WIFE' => 'INDI', 129 'CHIL' => 'INDI', 130 'ASSO' => 'INDI', 131 '_ASSO' => 'INDI', 132 // A webtrees extension 133 'ALIA' => 'INDI', 134 'AUTH' => 'INDI', 135 // A webtrees extension 136 'ANCI' => 'SUBM', 137 'DESI' => 'SUBM', 138 '_WT_OBJE_SORT' => 'OBJE', 139 '_LOC' => '_LOC', 140 ]; 141 142 $RECORD_LINKS = [ 143 'INDI' => [ 144 'NOTE', 145 'OBJE', 146 'SOUR', 147 'SUBM', 148 'ASSO', 149 '_ASSO', 150 'FAMC', 151 'FAMS', 152 'ALIA', 153 '_WT_OBJE_SORT', 154 '_LOC', 155 ], 156 'FAM' => [ 157 'NOTE', 158 'OBJE', 159 'SOUR', 160 'SUBM', 161 'ASSO', 162 '_ASSO', 163 'HUSB', 164 'WIFE', 165 'CHIL', 166 '_LOC', 167 ], 168 'SOUR' => [ 169 'NOTE', 170 'OBJE', 171 'REPO', 172 'AUTH', 173 ], 174 'REPO' => ['NOTE'], 175 'OBJE' => ['NOTE'], 176 // The spec also allows SOUR, but we treat this as a warning 177 'NOTE' => [], 178 // The spec also allows SOUR, but we treat this as a warning 179 'SUBM' => [ 180 'NOTE', 181 'OBJE', 182 ], 183 'SUBN' => ['SUBM'], 184 '_LOC' => [ 185 'SOUR', 186 'OBJE', 187 '_LOC', 188 'NOTE', 189 ], 190 ]; 191 192 $errors = []; 193 $warnings = []; 194 195 // Generate lists of all links 196 $all_links = []; 197 $upper_links = []; 198 foreach ($records as $record) { 199 $all_links[$record->xref] = []; 200 $upper_links[strtoupper($record->xref)] = $record->xref; 201 preg_match_all('/\n\d (' . Gedcom::REGEX_TAG . ') @([^#@\n][^\n@]*)@/', $record->gedcom, $matches, PREG_SET_ORDER); 202 foreach ($matches as $match) { 203 $all_links[$record->xref][$match[2]] = $match[1]; 204 } 205 } 206 207 foreach ($all_links as $xref1 => $links) { 208 // PHP converts array keys to integers. 209 $xref1 = (string) $xref1; 210 211 $type1 = $records[$xref1]->type; 212 foreach ($links as $xref2 => $type2) { 213 // PHP converts array keys to integers. 214 $xref2 = (string) $xref2; 215 216 $type3 = isset($records[$xref2]) ? $records[$xref2]->type : ''; 217 if (!array_key_exists($xref2, $all_links)) { 218 if (array_key_exists(strtoupper($xref2), $upper_links)) { 219 $warnings[] = 220 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 221 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 222 I18N::translate('%1$s does not exist. Did you mean %2$s?', $this->checkLink($tree, $xref2), $this->checkLink($tree, $upper_links[strtoupper($xref2)])); 223 } else { 224 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 225 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not exist.', $this->checkLink($tree, $xref2)); 226 } 227 } elseif ($type2 === 'SOUR' && $type1 === 'NOTE') { 228 // Notes are intended to add explanations and comments to other records. They should not have their own sources. 229 } elseif ($type2 === 'SOUR' && $type1 === 'OBJE') { 230 // Media objects are intended to illustrate other records, facts, and source/citations. They should not have their own sources. 231 } elseif ($type2 === 'OBJE' && $type1 === 'REPO') { 232 $warnings[] = 233 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 234 ' ' . 235 I18N::translate('This type of link is not allowed here.'); 236 } elseif (!array_key_exists($type1, $RECORD_LINKS) || !in_array($type2, $RECORD_LINKS[$type1], true) || !array_key_exists($type2, $XREF_LINKS)) { 237 $errors[] = 238 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 239 ' ' . 240 I18N::translate('This type of link is not allowed here.'); 241 } elseif ($XREF_LINKS[$type2] !== $type3) { 242 // Target XREF does exist - but is invalid 243 $errors[] = 244 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 245 /* I18N: %1$s is an internal ID number such as R123. %2$s and %3$s are record types, such as INDI or SOUR */ 246 I18N::translate('%1$s is a %2$s but a %3$s is expected.', $this->checkLink($tree, $xref2), $this->formatType($type3), $this->formatType($type2)); 247 } elseif ( 248 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMC', ['CHIL']) || 249 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMS', ['HUSB', 'WIFE']) || 250 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'CHIL', ['FAMC']) || 251 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'HUSB', ['FAMS']) || 252 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'WIFE', ['FAMS']) 253 ) { 254 /* I18N: %1$s and %2$s are internal ID numbers such as R123 */ 255 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not have a link back to %2$s.', $this->checkLink($tree, $xref2), $this->checkLink($tree, $xref1)); 256 } 257 } 258 } 259 260 $title = I18N::translate('Check for errors') . ' — ' . e($tree->title()); 261 262 return $this->viewResponse('admin/trees-check', [ 263 'errors' => $errors, 264 'title' => $title, 265 'tree' => $tree, 266 'warnings' => $warnings, 267 ]); 268 } 269 270 /** 271 * @param string $type 272 * @param array<array<string>> $links 273 * @param string $xref1 274 * @param string $xref2 275 * @param string $link 276 * @param array<string> $reciprocal 277 * 278 * @return bool 279 */ 280 private function checkReverseLink(string $type, array $links, string $xref1, string $xref2, string $link, array $reciprocal): bool 281 { 282 return $type === $link && (!array_key_exists($xref1, $links[$xref2]) || !in_array($links[$xref2][$xref1], $reciprocal, true)); 283 } 284 285 /** 286 * Create a message linking one record to another. 287 * 288 * @param Tree $tree 289 * @param string $type1 290 * @param string $xref1 291 * @param string $type2 292 * @param string $xref2 293 * 294 * @return string 295 */ 296 private function checkLinkMessage(Tree $tree, string $type1, string $xref1, string $type2, string $xref2): string 297 { 298 /* I18N: The placeholders are GEDCOM XREFs and tags. e.g. “INDI I123 contains a FAMC link to F234.” */ 299 return I18N::translate( 300 '%1$s %2$s has a %3$s link to %4$s.', 301 $this->formatType($type1), 302 $this->checkLink($tree, $xref1), 303 $this->formatType($type2), 304 $this->checkLink($tree, $xref2) 305 ); 306 } 307 308 /** 309 * Format a link to a record. 310 * 311 * @param Tree $tree 312 * @param string $xref 313 * 314 * @return string 315 */ 316 private function checkLink(Tree $tree, string $xref): string 317 { 318 return '<b><a href="' . e(route(GedcomRecordPage::class, [ 319 'xref' => $xref, 320 'tree' => $tree->name(), 321 ])) . '">' . $xref . '</a></b>'; 322 } 323 324 /** 325 * Format a record type. 326 * 327 * @param string $type 328 * 329 * @return string 330 */ 331 private function formatType(string $type): string 332 { 333 return '<b>' . $type . '</b>'; 334 } 335} 336