1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Fisharebest\Webtrees\Gedcom; 23use Fisharebest\Webtrees\Header; 24use Fisharebest\Webtrees\Http\ViewResponseTrait; 25use Fisharebest\Webtrees\I18N; 26use Fisharebest\Webtrees\Tree; 27use Fisharebest\Webtrees\Validator; 28use Illuminate\Database\Capsule\Manager as DB; 29use Illuminate\Database\Query\Expression; 30use Psr\Http\Message\ResponseInterface; 31use Psr\Http\Message\ServerRequestInterface; 32use Psr\Http\Server\RequestHandlerInterface; 33 34use function array_key_exists; 35use function e; 36use function in_array; 37use function preg_match; 38use function preg_match_all; 39use function route; 40use function strtoupper; 41 42use const PREG_SET_ORDER; 43 44/** 45 * Check a tree for errors. 46 */ 47class CheckTree implements RequestHandlerInterface 48{ 49 use ViewResponseTrait; 50 51 /** 52 * @param ServerRequestInterface $request 53 * 54 * @return ResponseInterface 55 */ 56 public function handle(ServerRequestInterface $request): ResponseInterface 57 { 58 $this->layout = 'layouts/administration'; 59 60 $tree = Validator::attributes($request)->tree(); 61 62 // We need to work with raw GEDCOM data, as we are looking for errors 63 // which may prevent the GedcomRecord objects from working. 64 65 $q1 = DB::table('individuals') 66 ->where('i_file', '=', $tree->id()) 67 ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]); 68 $q2 = DB::table('families') 69 ->where('f_file', '=', $tree->id()) 70 ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]); 71 $q3 = DB::table('media') 72 ->where('m_file', '=', $tree->id()) 73 ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]); 74 $q4 = DB::table('sources') 75 ->where('s_file', '=', $tree->id()) 76 ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]); 77 $q5 = DB::table('other') 78 ->where('o_file', '=', $tree->id()) 79 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 80 ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']); 81 $q6 = DB::table('change') 82 ->where('gedcom_id', '=', $tree->id()) 83 ->where('status', '=', 'pending') 84 ->orderBy('change_id') 85 ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]); 86 87 $rows = $q1 88 ->unionAll($q2) 89 ->unionAll($q3) 90 ->unionAll($q4) 91 ->unionAll($q5) 92 ->unionAll($q6) 93 ->get() 94 ->map(static function (object $row): object { 95 // Extract type for pending record 96 if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match)) { 97 $row->type = $match[1]; 98 } 99 100 return $row; 101 }); 102 103 $records = []; 104 105 foreach ($rows as $row) { 106 if ($row->gedcom !== '') { 107 // existing or updated record 108 $records[$row->xref] = $row; 109 } else { 110 // deleted record 111 unset($records[$row->xref]); 112 } 113 } 114 115 // LOOK FOR BROKEN LINKS 116 $XREF_LINKS = [ 117 'NOTE' => 'NOTE', 118 'SOUR' => 'SOUR', 119 'REPO' => 'REPO', 120 'OBJE' => 'OBJE', 121 'SUBM' => 'SUBM', 122 'FAMC' => 'FAM', 123 'FAMS' => 'FAM', 124 //'ADOP'=>'FAM', // Need to handle this case specially. We may have both ADOP and FAMC links to the same FAM, but only store one. 125 'HUSB' => 'INDI', 126 'WIFE' => 'INDI', 127 'CHIL' => 'INDI', 128 'ASSO' => 'INDI', 129 '_ASSO' => 'INDI', 130 // A webtrees extension 131 'ALIA' => 'INDI', 132 'AUTH' => 'INDI', 133 // A webtrees extension 134 'ANCI' => 'SUBM', 135 'DESI' => 'SUBM', 136 '_WT_OBJE_SORT' => 'OBJE', 137 '_LOC' => '_LOC', 138 ]; 139 140 $RECORD_LINKS = [ 141 'INDI' => [ 142 'NOTE', 143 'OBJE', 144 'SOUR', 145 'SUBM', 146 'ASSO', 147 '_ASSO', 148 'FAMC', 149 'FAMS', 150 'ALIA', 151 '_WT_OBJE_SORT', 152 '_LOC', 153 ], 154 'FAM' => [ 155 'NOTE', 156 'OBJE', 157 'SOUR', 158 'SUBM', 159 'ASSO', 160 '_ASSO', 161 'HUSB', 162 'WIFE', 163 'CHIL', 164 '_LOC', 165 ], 166 'SOUR' => [ 167 'NOTE', 168 'OBJE', 169 'REPO', 170 'AUTH', 171 '_LOC', 172 ], 173 'REPO' => ['NOTE'], 174 'OBJE' => ['NOTE'], 175 // The spec also allows SOUR, but we treat this as a warning 176 'NOTE' => [], 177 // The spec also allows SOUR, but we treat this as a warning 178 'SUBM' => [ 179 'NOTE', 180 'OBJE', 181 ], 182 'SUBN' => ['SUBM'], 183 '_LOC' => [ 184 'SOUR', 185 'OBJE', 186 '_LOC', 187 'NOTE', 188 ], 189 ]; 190 191 $errors = []; 192 $warnings = []; 193 194 // Generate lists of all links 195 $all_links = []; 196 $upper_links = []; 197 foreach ($records as $record) { 198 $all_links[$record->xref] = []; 199 $upper_links[strtoupper($record->xref)] = $record->xref; 200 preg_match_all('/\n\d (' . Gedcom::REGEX_TAG . ') @([^#@\n][^\n@]*)@/', $record->gedcom, $matches, PREG_SET_ORDER); 201 foreach ($matches as $match) { 202 $all_links[$record->xref][$match[2]] = $match[1]; 203 } 204 } 205 206 foreach ($all_links as $xref1 => $links) { 207 // PHP converts array keys to integers. 208 $xref1 = (string) $xref1; 209 210 $type1 = $records[$xref1]->type; 211 foreach ($links as $xref2 => $type2) { 212 // PHP converts array keys to integers. 213 $xref2 = (string) $xref2; 214 215 $type3 = isset($records[$xref2]) ? $records[$xref2]->type : ''; 216 if (!array_key_exists($xref2, $all_links)) { 217 if (array_key_exists(strtoupper($xref2), $upper_links)) { 218 $warnings[] = 219 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 220 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 221 I18N::translate('%1$s does not exist. Did you mean %2$s?', $this->checkLink($tree, $xref2), $this->checkLink($tree, $upper_links[strtoupper($xref2)])); 222 } else { 223 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 224 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%s does not exist.', $this->checkLink($tree, $xref2)); 225 } 226 } elseif ($type2 === 'SOUR' && $type1 === 'NOTE') { 227 // Notes are intended to add explanations and comments to other records. They should not have their own sources. 228 } elseif ($type2 === 'SOUR' && $type1 === 'OBJE') { 229 // Media objects are intended to illustrate other records, facts, and source/citations. They should not have their own sources. 230 } elseif ($type2 === 'OBJE' && $type1 === 'REPO') { 231 $warnings[] = 232 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 233 ' ' . 234 I18N::translate('This type of link is not allowed here.'); 235 } elseif (!array_key_exists($type1, $RECORD_LINKS) || !in_array($type2, $RECORD_LINKS[$type1], true) || !array_key_exists($type2, $XREF_LINKS)) { 236 $errors[] = 237 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 238 ' ' . 239 I18N::translate('This type of link is not allowed here.'); 240 } elseif ($XREF_LINKS[$type2] !== $type3) { 241 // Target XREF does exist - but is invalid 242 $errors[] = 243 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 244 /* I18N: %1$s is an internal ID number such as R123. %2$s and %3$s are record types, such as INDI or SOUR */ 245 I18N::translate('%1$s is a %2$s but a %3$s is expected.', $this->checkLink($tree, $xref2), $this->formatType($type3), $this->formatType($type2)); 246 } elseif ( 247 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMC', ['CHIL']) || 248 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMS', ['HUSB', 'WIFE']) || 249 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'CHIL', ['FAMC']) || 250 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'HUSB', ['FAMS']) || 251 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'WIFE', ['FAMS']) 252 ) { 253 /* I18N: %1$s and %2$s are internal ID numbers such as R123 */ 254 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not have a link back to %2$s.', $this->checkLink($tree, $xref2), $this->checkLink($tree, $xref1)); 255 } 256 } 257 } 258 259 $title = I18N::translate('Check for errors') . ' — ' . e($tree->title()); 260 261 return $this->viewResponse('admin/trees-check', [ 262 'errors' => $errors, 263 'title' => $title, 264 'tree' => $tree, 265 'warnings' => $warnings, 266 ]); 267 } 268 269 /** 270 * @param string $type 271 * @param array<array<string>> $links 272 * @param string $xref1 273 * @param string $xref2 274 * @param string $link 275 * @param array<string> $reciprocal 276 * 277 * @return bool 278 */ 279 private function checkReverseLink(string $type, array $links, string $xref1, string $xref2, string $link, array $reciprocal): bool 280 { 281 return $type === $link && (!array_key_exists($xref1, $links[$xref2]) || !in_array($links[$xref2][$xref1], $reciprocal, true)); 282 } 283 284 /** 285 * Create a message linking one record to another. 286 * 287 * @param Tree $tree 288 * @param string $type1 289 * @param string $xref1 290 * @param string $type2 291 * @param string $xref2 292 * 293 * @return string 294 */ 295 private function checkLinkMessage(Tree $tree, string $type1, string $xref1, string $type2, string $xref2): string 296 { 297 /* I18N: The placeholders are GEDCOM XREFs and tags. e.g. “INDI I123 contains a FAMC link to F234.” */ 298 return I18N::translate( 299 '%1$s %2$s has a %3$s link to %4$s.', 300 $this->formatType($type1), 301 $this->checkLink($tree, $xref1), 302 $this->formatType($type2), 303 $this->checkLink($tree, $xref2) 304 ); 305 } 306 307 /** 308 * Format a link to a record. 309 * 310 * @param Tree $tree 311 * @param string $xref 312 * 313 * @return string 314 */ 315 private function checkLink(Tree $tree, string $xref): string 316 { 317 return '<b><a href="' . e(route(GedcomRecordPage::class, [ 318 'xref' => $xref, 319 'tree' => $tree->name(), 320 ])) . '">' . $xref . '</a></b>'; 321 } 322 323 /** 324 * Format a record type. 325 * 326 * @param string $type 327 * 328 * @return string 329 */ 330 private function formatType(string $type): string 331 { 332 return '<b>' . $type . '</b>'; 333 } 334} 335