1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Fisharebest\Webtrees\Elements\AbstractXrefElement; 23use Fisharebest\Webtrees\Elements\MultimediaFileReference; 24use Fisharebest\Webtrees\Elements\MultimediaFormat; 25use Fisharebest\Webtrees\Elements\SubmitterText; 26use Fisharebest\Webtrees\Elements\UnknownElement; 27use Fisharebest\Webtrees\Elements\XrefFamily; 28use Fisharebest\Webtrees\Elements\XrefIndividual; 29use Fisharebest\Webtrees\Elements\XrefLocation; 30use Fisharebest\Webtrees\Elements\XrefMedia; 31use Fisharebest\Webtrees\Elements\XrefNote; 32use Fisharebest\Webtrees\Elements\XrefRepository; 33use Fisharebest\Webtrees\Elements\XrefSource; 34use Fisharebest\Webtrees\Elements\XrefSubmission; 35use Fisharebest\Webtrees\Elements\XrefSubmitter; 36use Fisharebest\Webtrees\Factories\ElementFactory; 37use Fisharebest\Webtrees\Factories\ImageFactory; 38use Fisharebest\Webtrees\Family; 39use Fisharebest\Webtrees\Gedcom; 40use Fisharebest\Webtrees\Header; 41use Fisharebest\Webtrees\Http\ViewResponseTrait; 42use Fisharebest\Webtrees\I18N; 43use Fisharebest\Webtrees\Individual; 44use Fisharebest\Webtrees\Location; 45use Fisharebest\Webtrees\Media; 46use Fisharebest\Webtrees\Mime; 47use Fisharebest\Webtrees\Note; 48use Fisharebest\Webtrees\Registry; 49use Fisharebest\Webtrees\Repository; 50use Fisharebest\Webtrees\Services\TimeoutService; 51use Fisharebest\Webtrees\Source; 52use Fisharebest\Webtrees\Submission; 53use Fisharebest\Webtrees\Submitter; 54use Fisharebest\Webtrees\Tree; 55use Fisharebest\Webtrees\Validator; 56use Illuminate\Database\Capsule\Manager as DB; 57use Illuminate\Database\Query\Expression; 58use Psr\Http\Message\ResponseInterface; 59use Psr\Http\Message\ServerRequestInterface; 60use Psr\Http\Server\RequestHandlerInterface; 61 62use function array_key_exists; 63use function array_slice; 64use function e; 65use function implode; 66use function in_array; 67use function preg_match; 68use function route; 69use function str_contains; 70use function str_starts_with; 71use function strtoupper; 72use function substr_count; 73 74/** 75 * Check a tree for errors. 76 */ 77class CheckTree implements RequestHandlerInterface 78{ 79 use ViewResponseTrait; 80 81 private Gedcom $gedcom; 82 83 private TimeoutService $timeout_service; 84 85 /** 86 * @param Gedcom $gedcom 87 * @param TimeoutService $timeout_service 88 */ 89 public function __construct(Gedcom $gedcom, TimeoutService $timeout_service) 90 { 91 $this->gedcom = $gedcom; 92 $this->timeout_service = $timeout_service; 93 } 94 95 /** 96 * @param ServerRequestInterface $request 97 * 98 * @return ResponseInterface 99 */ 100 public function handle(ServerRequestInterface $request): ResponseInterface 101 { 102 $this->layout = 'layouts/administration'; 103 104 $tree = Validator::attributes($request)->tree(); 105 $skip_to = Validator::queryParams($request)->string('skip_to', ''); 106 107 // We need to work with raw GEDCOM data, as we are looking for errors 108 // which may prevent the GedcomRecord objects from working. 109 110 $q1 = DB::table('individuals') 111 ->where('i_file', '=', $tree->id()) 112 ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]); 113 $q2 = DB::table('families') 114 ->where('f_file', '=', $tree->id()) 115 ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]); 116 $q3 = DB::table('media') 117 ->where('m_file', '=', $tree->id()) 118 ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]); 119 $q4 = DB::table('sources') 120 ->where('s_file', '=', $tree->id()) 121 ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]); 122 $q5 = DB::table('other') 123 ->where('o_file', '=', $tree->id()) 124 ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']); 125 $q6 = DB::table('change') 126 ->where('gedcom_id', '=', $tree->id()) 127 ->where('status', '=', 'pending') 128 ->orderBy('change_id') 129 ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]); 130 131 $rows = $q1 132 ->unionAll($q2) 133 ->unionAll($q3) 134 ->unionAll($q4) 135 ->unionAll($q5) 136 ->unionAll($q6) 137 ->get() 138 ->map(static function (object $row): object { 139 // Extract type for pending record 140 if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match) === 1) { 141 $row->type = $match[1]; 142 } 143 144 return $row; 145 }); 146 147 $records = []; 148 $xrefs = []; 149 150 foreach ($rows as $row) { 151 if ($row->gedcom !== '') { 152 // existing or updated record 153 $records[$row->xref] = $row; 154 } else { 155 // deleted record 156 unset($records[$row->xref]); 157 } 158 159 $xrefs[strtoupper($row->xref)] = $row->xref; 160 } 161 162 unset($rows); 163 164 $errors = []; 165 $warnings = []; 166 $infos = []; 167 168 $element_factory = new ElementFactory(); 169 $this->gedcom->registerTags($element_factory, false); 170 171 foreach ($records as $record) { 172 // If we are nearly out of time, then stop processing here 173 if ($skip_to === $record->xref) { 174 $skip_to = ''; 175 } elseif ($skip_to !== '') { 176 continue; 177 } elseif ($this->timeout_service->isTimeNearlyUp()) { 178 $skip_to = $record->xref; 179 break; 180 } 181 182 $lines = explode("\n", $record->gedcom); 183 array_shift($lines); 184 185 $last_level = 0; 186 $hierarchy = [$record->type]; 187 188 foreach ($lines as $line_number => $line) { 189 if (preg_match('/^(\d+) (\w+) ?(.*)/', $line, $match) !== 1) { 190 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, I18N::translate('Invalid GEDCOM record.')); 191 break; 192 } 193 194 $level = (int) $match[1]; 195 if ($level > $last_level + 1) { 196 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, I18N::translate('Invalid GEDCOM level number.')); 197 break; 198 } 199 200 $tag = $match[2]; 201 $value = $match[3]; 202 $hierarchy[$level] = $tag; 203 $full_tag = implode(':', array_slice($hierarchy, 0, 1 + $level)); 204 $element = $element_factory->make($full_tag); 205 $last_level = $level; 206 207 if ($tag === 'CONT') { 208 $element = new SubmitterText('CONT'); 209 } 210 211 if ($element instanceof UnknownElement) { 212 if (str_starts_with($tag, '_') || str_starts_with($full_tag, '_') || str_contains($full_tag, ':_')) { 213 $message = I18N::translate('Custom GEDCOM tags are discouraged. Try to use only standard GEDCOM tags.'); 214 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 215 } else { 216 $message = I18N::translate('Invalid GEDCOM tag.') . ' ' . $full_tag; 217 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 218 } 219 } elseif ($element instanceof AbstractXrefElement) { 220 if (preg_match('/@(' . Gedcom::REGEX_XREF . ')@/', $value, $match) === 1) { 221 $xref1 = $match[1]; 222 $xref2 = $xrefs[strtoupper($xref1)] ?? null; 223 $linked = $records[$xref2] ?? null; 224 225 if ($linked === null) { 226 $message = I18N::translate('%s does not exist.', e($xref1)); 227 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 228 } elseif ($element instanceof XrefFamily && $linked->type !== Family::RECORD_TYPE) { 229 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Family::RECORD_TYPE); 230 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 231 } elseif ($element instanceof XrefIndividual && $linked->type !== Individual::RECORD_TYPE) { 232 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Individual::RECORD_TYPE); 233 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 234 } elseif ($element instanceof XrefMedia && $linked->type !== Media::RECORD_TYPE) { 235 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Media::RECORD_TYPE); 236 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 237 } elseif ($element instanceof XrefNote && $linked->type !== Note::RECORD_TYPE) { 238 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Note::RECORD_TYPE); 239 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 240 } elseif ($element instanceof XrefSource && $linked->type !== Source::RECORD_TYPE) { 241 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Source::RECORD_TYPE); 242 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 243 } elseif ($element instanceof XrefRepository && $linked->type !== Repository::RECORD_TYPE) { 244 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Repository::RECORD_TYPE); 245 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 246 } elseif ($element instanceof XrefSubmitter && $linked->type !== Submitter::RECORD_TYPE) { 247 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Submitter::RECORD_TYPE); 248 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 249 } elseif ($element instanceof XrefSubmission && $linked->type !== Submission::RECORD_TYPE) { 250 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Submission::RECORD_TYPE); 251 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 252 } elseif ($element instanceof XrefLocation && $linked->type !== Location::RECORD_TYPE) { 253 $message = $this->linkErrorMessage($tree, $xref1, $linked->type, Location::RECORD_TYPE); 254 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 255 } elseif (($full_tag === 'FAM:HUSB' || $full_tag === 'FAM:WIFE') && !str_contains($linked->gedcom, "\n1 FAMS @" . $record->xref . '@')) { 256 $link1 = $this->recordLink($tree, $linked->xref); 257 $link2 = $this->recordLink($tree, $record->xref); 258 $message = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2); 259 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 260 } elseif ($full_tag === 'FAM:CHIL' && !str_contains($linked->gedcom, "\n1 FAMC @" . $record->xref . '@')) { 261 $link1 = $this->recordLink($tree, $linked->xref); 262 $link2 = $this->recordLink($tree, $record->xref); 263 $message = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2); 264 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 265 } elseif ($full_tag === 'INDI:FAMC' && !str_contains($linked->gedcom, "\n1 CHIL @" . $record->xref . '@')) { 266 $link1 = $this->recordLink($tree, $linked->xref); 267 $link2 = $this->recordLink($tree, $record->xref); 268 $message = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2); 269 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 270 } elseif ($full_tag === 'INDI:FAMS' && !str_contains($linked->gedcom, "\n1 HUSB @" . $record->xref . '@') && !str_contains($linked->gedcom, "\n1 WIFE @" . $record->xref . '@')) { 271 $link1 = $this->recordLink($tree, $linked->xref); 272 $link2 = $this->recordLink($tree, $record->xref); 273 $message = I18N::translate('%1$s does not have a link back to %2$s.', $link1, $link2); 274 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 275 } elseif ($xref1 !== $xref2) { 276 $message = I18N::translate('%1$s does not exist. Did you mean %2$s?', e($xref1), e($xref2)); 277 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 278 } 279 } elseif ($tag === 'SOUR') { 280 $message = I18N::translate('Inline-source records are discouraged.'); 281 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 282 } else { 283 $message = I18N::translate('Invalid GEDCOM value.'); 284 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 285 } 286 } elseif ($element->canonical($value) !== $value) { 287 $expected = e($element->canonical($value)); 288 $actual = strtr(e($value), ["\t" => '→']); 289 $message = I18N::translate('“%1$s” should be “%2$s”.', $actual, $expected); 290 $infos[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 291 } elseif ($element instanceof MultimediaFormat) { 292 $mime = Mime::TYPES[$value] ?? Mime::DEFAULT_TYPE; 293 294 if ($mime === Mime::DEFAULT_TYPE) { 295 $message = I18N::translate('webtrees does not recognise this file format.'); 296 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 297 } elseif (str_starts_with($mime, 'image/') && !array_key_exists($mime, ImageFactory::SUPPORTED_FORMATS)) { 298 $message = I18N::translate('webtrees cannot create thumbnails for this file format.'); 299 $warnings[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 300 } 301 } elseif ($element instanceof MultimediaFileReference && $value === 'gedcom.ged') { 302 $message = I18N::translate('This filename is not compatible with the GEDZIP file format.'); 303 $errors[] = $this->lineError($tree, $record->type, $record->xref, $line_number, $line, $message); 304 } 305 } 306 307 if ($record->type === Family::RECORD_TYPE) { 308 if (substr_count($record->gedcom, "\n1 HUSB @") > 1) { 309 $message = I18N::translate('%s occurs too many times.', 'FAM:HUSB'); 310 $errors[] = $this->recordError($tree, $record->type, $record->xref, $message); 311 } 312 if (substr_count($record->gedcom, "\n1 WIFE @") > 1) { 313 $message = I18N::translate('%s occurs too many times.', 'FAM:WIFE'); 314 $errors[] = $this->recordError($tree, $record->type, $record->xref, $message); 315 } 316 } 317 } 318 319 $title = I18N::translate('Check for errors') . ' — ' . e($tree->title()); 320 321 if ($skip_to === '') { 322 $more_url = ''; 323 } else { 324 $more_url = route(self::class, ['tree' => $tree->name(), 'skip_to' => $skip_to]); 325 } 326 327 return $this->viewResponse('admin/trees-check', [ 328 'errors' => $errors, 329 'infos' => $infos, 330 'more_url' => $more_url, 331 'title' => $title, 332 'tree' => $tree, 333 'warnings' => $warnings, 334 ]); 335 } 336 337 /** 338 * @param string $type 339 * 340 * @return string 341 */ 342 private function recordType(string $type): string 343 { 344 $types = [ 345 Family::RECORD_TYPE => I18N::translate('Family'), 346 Header::RECORD_TYPE => I18N::translate('Header'), 347 Individual::RECORD_TYPE => I18N::translate('Individual'), 348 Location::RECORD_TYPE => I18N::translate('Location'), 349 Media::RECORD_TYPE => I18N::translate('Media object'), 350 Note::RECORD_TYPE => I18N::translate('Note'), 351 Repository::RECORD_TYPE => I18N::translate('Repository'), 352 Source::RECORD_TYPE => I18N::translate('Source'), 353 Submission::RECORD_TYPE => I18N::translate('Submission'), 354 Submitter::RECORD_TYPE => I18N::translate('Submitter'), 355 ]; 356 357 return $types[$type] ?? e($type); 358 } 359 360 /** 361 * @param Tree $tree 362 * @param string $xref 363 * 364 * @return string 365 */ 366 private function recordLink(Tree $tree, string $xref): string 367 { 368 $url = route(GedcomRecordPage::class, ['xref' => $xref, 'tree' => $tree->name()]); 369 370 return '<a href="' . e($url) . '">' . e($xref) . '</a>'; 371 } 372 373 /** 374 * Format a link to a record. 375 * 376 * @param Tree $tree 377 * @param string $type 378 * @param string $xref 379 * @param int $line_number 380 * @param string $line 381 * @param string $message 382 * 383 * @return string 384 */ 385 private function lineError(Tree $tree, string $type, string $xref, int $line_number, string $line, string $message): string 386 { 387 return 388 I18N::translate('%1$s: %2$s', $this->recordType($type), $this->recordLink($tree, $xref)) . 389 ' — ' . 390 I18N::translate('%1$s: %2$s', I18N::translate('Line number'), I18N::number($line_number)) . 391 ' — ' . 392 '<code>' . e($line) . '</code>' . 393 '<br>' . $message; 394 } 395 396 /** 397 * Format a link to a record. 398 * 399 * @param Tree $tree 400 * @param string $type 401 * @param string $xref 402 * @param string $message 403 * 404 * @return string 405 */ 406 private function recordError(Tree $tree, string $type, string $xref, string $message): string 407 { 408 return I18N::translate('%1$s: %2$s', $this->recordType($type), $this->recordLink($tree, $xref)) . ' — ' . $message; 409 } 410 411 /** 412 * @param Tree $tree 413 * @param string $xref 414 * @param string $type1 415 * @param string $type2 416 * 417 * @return string 418 */ 419 private function linkErrorMessage(Tree $tree, string $xref, string $type1, string $type2): string 420 { 421 $link = $this->recordLink($tree, $xref); 422 $type1 = $this->recordType($type1); 423 $type2 = $this->recordType($type2); 424 425 return I18N::translate('%1$s is a %2$s but a %3$s is expected.', $link, $type1, $type2); 426 } 427} 428