1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Fisharebest\Webtrees\Elements\AbstractXrefElement; 23use Fisharebest\Webtrees\Elements\MultimediaFileReference; 24use Fisharebest\Webtrees\Elements\MultimediaFormat; 25use Fisharebest\Webtrees\Elements\SubmitterText; 26use Fisharebest\Webtrees\Elements\UnknownElement; 27use Fisharebest\Webtrees\Elements\XrefFamily; 28use Fisharebest\Webtrees\Elements\XrefIndividual; 29use Fisharebest\Webtrees\Elements\XrefLocation; 30use Fisharebest\Webtrees\Elements\XrefMedia; 31use Fisharebest\Webtrees\Elements\XrefNote; 32use Fisharebest\Webtrees\Elements\XrefRepository; 33use Fisharebest\Webtrees\Elements\XrefSource; 34use Fisharebest\Webtrees\Elements\XrefSubmission; 35use Fisharebest\Webtrees\Elements\XrefSubmitter; 36use Fisharebest\Webtrees\Factories\ElementFactory; 37use Fisharebest\Webtrees\Factories\ImageFactory; 38use Fisharebest\Webtrees\Family; 39use Fisharebest\Webtrees\Gedcom; 40use Fisharebest\Webtrees\Header; 41use Fisharebest\Webtrees\Http\ViewResponseTrait; 42use Fisharebest\Webtrees\I18N; 43use Fisharebest\Webtrees\Individual; 44use Fisharebest\Webtrees\Location; 45use Fisharebest\Webtrees\Media; 46use Fisharebest\Webtrees\Mime; 47use Fisharebest\Webtrees\Note; 48use Fisharebest\Webtrees\Repository; 49use Fisharebest\Webtrees\Services\TimeoutService; 50use Fisharebest\Webtrees\Source; 51use Fisharebest\Webtrees\Submission; 52use Fisharebest\Webtrees\Submitter; 53use Fisharebest\Webtrees\Tree; 54use Fisharebest\Webtrees\Validator; 55use Illuminate\Database\Capsule\Manager as DB; 56use Illuminate\Database\Query\Expression; 57use Psr\Http\Message\ResponseInterface; 58use Psr\Http\Message\ServerRequestInterface; 59use Psr\Http\Server\RequestHandlerInterface; 60 61use function array_key_exists; 62use function array_slice; 63use function e; 64use function implode; 65use function preg_match; 66use function route; 67use function str_contains; 68use function str_starts_with; 69use function strtoupper; 70use function substr_count; 71 72/** 73 * Check a tree for errors. 74 */ 75class CheckTree implements RequestHandlerInterface 76{ 77 use ViewResponseTrait; 78 79 private Gedcom $gedcom; 80 81 private TimeoutService $timeout_service; 82 83 /** 84 * @param Gedcom $gedcom 85 * @param TimeoutService $timeout_service 86 */ 87 public function __construct(Gedcom $gedcom, TimeoutService $timeout_service) 88 { 89 $this->gedcom = $gedcom; 90 $this->timeout_service = $timeout_service; 91 } 92 93 /** 94 * @param ServerRequestInterface $request 95 * 96 * @return ResponseInterface 97 */ 98 public function handle(ServerRequestInterface $request): ResponseInterface 99 { 100 $this->layout = 'layouts/administration'; 101 102 $tree = Validator::attributes($request)->tree(); 103 $skip_to = Validator::queryParams($request)->string('skip_to', ''); 104 105 // We need to work with raw GEDCOM data, as we are looking for errors 106 // which may prevent the GedcomRecord objects from working. 107 108 $q1 = DB::table('individuals') 109 ->where('i_file', '=', $tree->id()) 110 ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]); 111 $q2 = DB::table('families') 112 ->where('f_file', '=', $tree->id()) 113 ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]); 114 $q3 = DB::table('media') 115 ->where('m_file', '=', $tree->id()) 116 ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]); 117 $q4 = DB::table('sources') 118 ->where('s_file', '=', $tree->id()) 119 ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]); 120 $q5 = DB::table('other') 121 ->where('o_file', '=', $tree->id()) 122 ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']); 123 $q6 = DB::table('change') 124 ->where('gedcom_id', '=', $tree->id()) 125 ->where('status', '=', 'pending') 126 ->orderBy('change_id') 127 ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]); 128 129 $rows = $q1 130 ->unionAll($q2) 131 ->unionAll($q3) 132 ->unionAll($q4) 133 ->unionAll($q5) 134 ->unionAll($q6) 135 ->get() 136 ->map(static function (object $row): object { 137 // Extract type for pending record 138 if ($row->type === '' && str_starts_with($row->gedcom, '0 HEAD')) { 139 $row->type = 'HEAD'; 140 } 141 142 if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match) === 1) { 143 $row->type = $match[1]; 144 } 145 146 return $row; 147 }); 148 149 $records = []; 150 $xrefs = []; 151 152 foreach ($rows as $row) { 153 if ($row->gedcom !== '') { 154 // existing or updated record 155 $records[$row->xref] = $row; 156 } else { 157 // deleted record 158 unset($records[$row->xref]); 159 } 160 161 $xrefs[strtoupper($row->xref)] = $row->xref; 162 } 163 164 unset($rows); 165 166 $errors = []; 167 $warnings = []; 168 $infos = []; 169 170 $element_factory = new ElementFactory(); 171 $this->gedcom->registerTags($element_factory, false); 172 173 foreach ($records as $record) { 174 // If we are nearly out of time, then stop processing here 175 if ($skip_to === $record->xref) { 176 $skip_to = ''; 177 } elseif ($skip_to !== '') { 178 continue; 179 } elseif ($this->timeout_service->isTimeNearlyUp()) { 180 $skip_to = $record->xref; 181 break; 182 } 183 184 $lines = explode("\n", $record->gedcom); 185 array_shift($lines); 186 187 $last_level = 0; 188 $hierarchy = [$record->type]; 189 190 foreach ($lines as $line_number => $line) { 191 if (preg_match('/^(\d+) (\w+) ?(.*)/', $line, $match) !== 1) { 192 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, I18N::translate('Invalid GEDCOM record.'), ''); 193 break; 194 } 195 196 $level = (int) $match[1]; 197 if ($level > $last_level + 1) { 198 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, I18N::translate('Invalid GEDCOM level number.'), ''); 199 break; 200 } 201 202 $tag = $match[2]; 203 $value = $match[3]; 204 $hierarchy[$level] = $tag; 205 $full_tag = implode(':', array_slice($hierarchy, 0, 1 + $level)); 206 $element = $element_factory->make($full_tag); 207 $last_level = $level; 208 209 if ($tag === 'CONT') { 210 $element = new SubmitterText('CONT'); 211 } 212 213 if ($element instanceof UnknownElement) { 214 if (str_starts_with($tag, '_') || str_starts_with($full_tag, '_') || str_contains($full_tag, ':_')) { 215 $message = I18N::translate('Custom GEDCOM tags are discouraged. Try to use only standard GEDCOM tags.'); 216 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag); 217 } else { 218 $message = I18N::translate('Invalid GEDCOM tag.') . ' ' . $full_tag; 219 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag); 220 } 221 } elseif ($element instanceof AbstractXrefElement) { 222 if (preg_match('/@(' . Gedcom::REGEX_XREF . ')@/', $value, $match) === 1) { 223 $xref1 = $match[1]; 224 $xref2 = $xrefs[strtoupper($xref1)] ?? null; 225 $linked = $records[$xref2] ?? null; 226 227 if ($linked === null) { 228 $message = I18N::translate('%s does not exist.', e($xref1)); 229 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $tag . '-' . $xref1); 230 } elseif ($element instanceof XrefFamily && $linked->type !== Family::RECORD_TYPE) { 231 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Family::RECORD_TYPE); 232 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 233 } elseif ($element instanceof XrefIndividual && $linked->type !== Individual::RECORD_TYPE) { 234 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Individual::RECORD_TYPE); 235 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 236 } elseif ($element instanceof XrefMedia && $linked->type !== Media::RECORD_TYPE) { 237 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Media::RECORD_TYPE); 238 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 239 } elseif ($element instanceof XrefNote && $linked->type !== Note::RECORD_TYPE) { 240 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Note::RECORD_TYPE); 241 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 242 } elseif ($element instanceof XrefSource && $linked->type !== Source::RECORD_TYPE) { 243 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Source::RECORD_TYPE); 244 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 245 } elseif ($element instanceof XrefRepository && $linked->type !== Repository::RECORD_TYPE) { 246 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Repository::RECORD_TYPE); 247 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 248 } elseif ($element instanceof XrefSubmitter && $linked->type !== Submitter::RECORD_TYPE) { 249 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Submitter::RECORD_TYPE); 250 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 251 } elseif ($element instanceof XrefSubmission && $linked->type !== Submission::RECORD_TYPE) { 252 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Submission::RECORD_TYPE); 253 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 254 } elseif ($element instanceof XrefLocation && $linked->type !== Location::RECORD_TYPE) { 255 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Location::RECORD_TYPE); 256 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-type'); 257 } elseif (($full_tag === 'FAM:HUSB' || $full_tag === 'FAM:WIFE') && !str_contains($linked->gedcom, "\n1 FAMS @" . $record->xref . '@')) { 258 $link1 = $this->recordLink($tree, $linked->xref); 259 $link2 = $this->recordLink($tree, $record->xref); 260 $message = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2); 261 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-FAMS'); 262 } elseif ($full_tag === 'FAM:CHIL' && !str_contains($linked->gedcom, "\n1 FAMC @" . $record->xref . '@')) { 263 $link1 = $this->recordLink($tree, $linked->xref); 264 $link2 = $this->recordLink($tree, $record->xref); 265 $message = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2); 266 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-FAMC'); 267 } elseif ($full_tag === 'INDI:FAMC' && !str_contains($linked->gedcom, "\n1 CHIL @" . $record->xref . '@')) { 268 $link1 = $this->recordLink($tree, $linked->xref); 269 $link2 = $this->recordLink($tree, $record->xref); 270 $message = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2); 271 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-CHIL'); 272 } elseif ($full_tag === 'INDI:FAMS' && !str_contains($linked->gedcom, "\n1 HUSB @" . $record->xref . '@') && !str_contains($linked->gedcom, "\n1 WIFE @" . $record->xref . '@')) { 273 $link1 = $this->recordLink($tree, $linked->xref); 274 $link2 = $this->recordLink($tree, $record->xref); 275 $message = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2); 276 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-HUSB-WIFE'); 277 } elseif ($xref1 !== $xref2) { 278 $message = I18N::translate('%1$s does not exist. Did you mean %2$s?', e($xref1), e($xref2)); 279 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $tag . '-' . $xref1); 280 } 281 } elseif ($tag === 'SOUR') { 282 $message = I18N::translate('Inline-source records are discouraged.'); 283 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-inline'); 284 } else { 285 $message = I18N::translate('Invalid GEDCOM value.'); 286 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-value-' . e($value)); 287 } 288 } elseif ($element->canonical($value) !== $value) { 289 $expected = e($element->canonical($value)); 290 $actual = strtr(e($value), ["\t" => '→']); 291 $message = I18N::translate('“%1$s” should be “%2$s”.', $actual, $expected); 292 if (strtoupper($element->canonical($value)) !== strtoupper($value)) { 293 // This will be relevant for GEDCOM 7.0. It's not relevant now, and causes confusion. 294 $infos[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-value'); 295 } 296 } elseif ($element instanceof MultimediaFormat) { 297 $mime = Mime::TYPES[$value] ?? Mime::DEFAULT_TYPE; 298 299 if ($mime === Mime::DEFAULT_TYPE) { 300 $message = I18N::translate('webtrees does not recognise this file format.'); 301 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-' . e($value)); 302 } elseif (str_starts_with($mime, 'image/') && !array_key_exists($mime, ImageFactory::SUPPORTED_FORMATS)) { 303 $message = I18N::translate('webtrees cannot create thumbnails for this file format.'); 304 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '-' . e($value)); 305 } 306 } elseif ($element instanceof MultimediaFileReference && $value === 'gedcom.ged') { 307 $message = I18N::translate('This filename is not compatible with the GEDZIP file format.'); 308 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message, $full_tag . '_' . e($value)); 309 } 310 } 311 312 if ($record->type === Family::RECORD_TYPE) { 313 if (substr_count($record->gedcom, "\n1 HUSB @") > 1) { 314 $message = I18N::translate('%s occurs too many times.', 'FAM:HUSB'); 315 $errors[] = $this->recordError($tree, $record->type, $record->xref, $message, 'FAM:HUSB-count'); 316 } 317 if (substr_count($record->gedcom, "\n1 WIFE @") > 1) { 318 $message = I18N::translate('%s occurs too many times.', 'FAM:WIFE'); 319 $errors[] = $this->recordError($tree, $record->type, $record->xref, $message, 'FAM:WIFE-count'); 320 } 321 } 322 } 323 324 $title = I18N::translate('Check for errors') . ' — ' . e($tree->title()); 325 326 if ($skip_to === '') { 327 $more_url = ''; 328 } else { 329 $more_url = route(self::class, ['tree' => $tree->name(), 'skip_to' => $skip_to]); 330 } 331 332 return $this->viewResponse('admin/trees-check', [ 333 'errors' => $errors, 334 'infos' => $infos, 335 'more_url' => $more_url, 336 'title' => $title, 337 'tree' => $tree, 338 'warnings' => $warnings, 339 ]); 340 } 341 342 /** 343 * @param string $type 344 * 345 * @return string 346 */ 347 private function recordType(string $type): string 348 { 349 $types = [ 350 Family::RECORD_TYPE => I18N::translate('Family'), 351 Header::RECORD_TYPE => I18N::translate('Header'), 352 Individual::RECORD_TYPE => I18N::translate('Individual'), 353 Location::RECORD_TYPE => I18N::translate('Location'), 354 Media::RECORD_TYPE => I18N::translate('Media object'), 355 Note::RECORD_TYPE => I18N::translate('Note'), 356 Repository::RECORD_TYPE => I18N::translate('Repository'), 357 Source::RECORD_TYPE => I18N::translate('Source'), 358 Submission::RECORD_TYPE => I18N::translate('Submission'), 359 Submitter::RECORD_TYPE => I18N::translate('Submitter'), 360 ]; 361 362 return $types[$type] ?? e($type); 363 } 364 365 /** 366 * @param Tree $tree 367 * @param string $xref 368 * 369 * @return string 370 */ 371 private function recordLink(Tree $tree, string $xref): string 372 { 373 $url = route(GedcomRecordPage::class, ['xref' => $xref, 'tree' => $tree->name()]); 374 375 return '<a href="' . e($url) . '">' . e($xref) . '</a>'; 376 } 377 378 /** 379 * @param Tree $tree 380 * @param string $xref 381 * @param string $type1 382 * @param string $type2 383 * 384 * @return string 385 */ 386 private function linkErrorMessage(Tree $tree, string $xref, string $type1, string $type2): string 387 { 388 $link = $this->recordLink($tree, $xref); 389 $type1 = $this->recordType($type1); 390 $type2 = $this->recordType($type2); 391 392 return I18N::translate('%1$s is a %2$s but a %3$s is expected.', $link, $type1, $type2); 393 } 394 395 /** 396 * Format a link to a record. 397 * 398 * @param Tree $tree 399 * @param string $type 400 * @param string $xref 401 * @param int $line_number 402 * @param string $line 403 * @param string $message 404 * @param string $tag 405 * 406 * @return object 407 */ 408 private function lineError( 409 Tree $tree, 410 string $type, 411 string $xref, 412 int $line_number, 413 string $line, 414 string $message, 415 string $tag 416 ): object { 417 $message = 418 I18N::translate('%1$s: %2$s', $this->recordType($type), $this->recordLink($tree, $xref)) . 419 ' — ' . 420 I18N::translate('%1$s: %2$s', I18N::translate('Line number'), I18N::number($line_number)) . 421 ' — ' . 422 '<code>' . e($line) . '</code>' . 423 '<br>' . $message; 424 425 return (object) [ 426 'message' => $message, 427 'tag' => $tag, 428 ]; 429 } 430 431 /** 432 * Format a link to a record. 433 * 434 * @param Tree $tree 435 * @param string $type 436 * @param string $xref 437 * @param string $message 438 * @param string $tag 439 * 440 * @return object 441 */ 442 private function recordError(Tree $tree, string $type, string $xref, string $message, string $tag): object 443 { 444 $message = I18N::translate('%1$s: %2$s', $this->recordType($type), $this->recordLink($tree, $xref)) . ' — ' . $message; 445 446 return (object) [ 447 'message' => $message, 448 'tag' => $tag, 449 ]; 450 } 451} 452