1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Fisharebest\Webtrees\Gedcom; 23use Fisharebest\Webtrees\GedcomTag; 24use Fisharebest\Webtrees\Header; 25use Fisharebest\Webtrees\Http\ViewResponseTrait; 26use Fisharebest\Webtrees\I18N; 27use Fisharebest\Webtrees\Tree; 28use Illuminate\Database\Capsule\Manager as DB; 29use Illuminate\Database\Query\Expression; 30use Psr\Http\Message\ResponseInterface; 31use Psr\Http\Message\ServerRequestInterface; 32use Psr\Http\Server\RequestHandlerInterface; 33use stdClass; 34 35use function array_key_exists; 36use function assert; 37use function e; 38use function in_array; 39use function preg_match; 40use function preg_match_all; 41use function route; 42use function strtoupper; 43 44use const PREG_SET_ORDER; 45 46/** 47 * Check a tree for errors. 48 */ 49class CheckTree implements RequestHandlerInterface 50{ 51 use ViewResponseTrait; 52 53 /** 54 * @param ServerRequestInterface $request 55 * 56 * @return ResponseInterface 57 */ 58 public function handle(ServerRequestInterface $request): ResponseInterface 59 { 60 $this->layout = 'layouts/administration'; 61 62 $tree = $request->getAttribute('tree'); 63 assert($tree instanceof Tree); 64 65 // We need to work with raw GEDCOM data, as we are looking for errors 66 // which may prevent the GedcomRecord objects from working. 67 68 $q1 = DB::table('individuals') 69 ->where('i_file', '=', $tree->id()) 70 ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]); 71 $q2 = DB::table('families') 72 ->where('f_file', '=', $tree->id()) 73 ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]); 74 $q3 = DB::table('media') 75 ->where('m_file', '=', $tree->id()) 76 ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]); 77 $q4 = DB::table('sources') 78 ->where('s_file', '=', $tree->id()) 79 ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]); 80 $q5 = DB::table('other') 81 ->where('o_file', '=', $tree->id()) 82 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 83 ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']); 84 $q6 = DB::table('change') 85 ->where('gedcom_id', '=', $tree->id()) 86 ->where('status', '=', 'pending') 87 ->orderBy('change_id') 88 ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]); 89 90 $rows = $q1 91 ->unionAll($q2) 92 ->unionAll($q3) 93 ->unionAll($q4) 94 ->unionAll($q5) 95 ->unionAll($q6) 96 ->get() 97 ->map(static function (stdClass $row): stdClass { 98 // Extract type for pending record 99 if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match)) { 100 $row->type = $match[1]; 101 } 102 103 return $row; 104 }); 105 106 $records = []; 107 108 foreach ($rows as $row) { 109 if ($row->gedcom !== '') { 110 // existing or updated record 111 $records[$row->xref] = $row; 112 } else { 113 // deleted record 114 unset($records[$row->xref]); 115 } 116 } 117 118 // LOOK FOR BROKEN LINKS 119 $XREF_LINKS = [ 120 'NOTE' => 'NOTE', 121 'SOUR' => 'SOUR', 122 'REPO' => 'REPO', 123 'OBJE' => 'OBJE', 124 'SUBM' => 'SUBM', 125 'FAMC' => 'FAM', 126 'FAMS' => 'FAM', 127 //'ADOP'=>'FAM', // Need to handle this case specially. We may have both ADOP and FAMC links to the same FAM, but only store one. 128 'HUSB' => 'INDI', 129 'WIFE' => 'INDI', 130 'CHIL' => 'INDI', 131 'ASSO' => 'INDI', 132 '_ASSO' => 'INDI', 133 // A webtrees extension 134 'ALIA' => 'INDI', 135 'AUTH' => 'INDI', 136 // A webtrees extension 137 'ANCI' => 'SUBM', 138 'DESI' => 'SUBM', 139 '_WT_OBJE_SORT' => 'OBJE', 140 '_LOC' => '_LOC', 141 ]; 142 143 $RECORD_LINKS = [ 144 'INDI' => [ 145 'NOTE', 146 'OBJE', 147 'SOUR', 148 'SUBM', 149 'ASSO', 150 '_ASSO', 151 'FAMC', 152 'FAMS', 153 'ALIA', 154 '_WT_OBJE_SORT', 155 '_LOC', 156 ], 157 'FAM' => [ 158 'NOTE', 159 'OBJE', 160 'SOUR', 161 'SUBM', 162 'ASSO', 163 '_ASSO', 164 'HUSB', 165 'WIFE', 166 'CHIL', 167 '_LOC', 168 ], 169 'SOUR' => [ 170 'NOTE', 171 'OBJE', 172 'REPO', 173 'AUTH', 174 ], 175 'REPO' => ['NOTE'], 176 'OBJE' => ['NOTE'], 177 // The spec also allows SOUR, but we treat this as a warning 178 'NOTE' => [], 179 // The spec also allows SOUR, but we treat this as a warning 180 'SUBM' => [ 181 'NOTE', 182 'OBJE', 183 ], 184 'SUBN' => ['SUBM'], 185 '_LOC' => [ 186 'SOUR', 187 'OBJE', 188 '_LOC', 189 'NOTE', 190 ], 191 ]; 192 193 $errors = []; 194 $warnings = []; 195 196 // Generate lists of all links 197 $all_links = []; 198 $upper_links = []; 199 foreach ($records as $record) { 200 $all_links[$record->xref] = []; 201 $upper_links[strtoupper($record->xref)] = $record->xref; 202 preg_match_all('/\n\d (' . Gedcom::REGEX_TAG . ') @([^#@\n][^\n@]*)@/', $record->gedcom, $matches, PREG_SET_ORDER); 203 foreach ($matches as $match) { 204 $all_links[$record->xref][$match[2]] = $match[1]; 205 } 206 } 207 208 foreach ($all_links as $xref1 => $links) { 209 // PHP converts array keys to integers. 210 $xref1 = (string) $xref1; 211 212 $type1 = $records[$xref1]->type; 213 foreach ($links as $xref2 => $type2) { 214 // PHP converts array keys to integers. 215 $xref2 = (string) $xref2; 216 217 $type3 = isset($records[$xref2]) ? $records[$xref2]->type : ''; 218 if (!array_key_exists($xref2, $all_links)) { 219 if (array_key_exists(strtoupper($xref2), $upper_links)) { 220 $warnings[] = 221 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 222 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 223 I18N::translate('%1$s does not exist. Did you mean %2$s?', $this->checkLink($tree, $xref2), $this->checkLink($tree, $upper_links[strtoupper($xref2)])); 224 } else { 225 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 226 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not exist.', $this->checkLink($tree, $xref2)); 227 } 228 } elseif ($type2 === 'SOUR' && $type1 === 'NOTE') { 229 // Notes are intended to add explanations and comments to other records. They should not have their own sources. 230 } elseif ($type2 === 'SOUR' && $type1 === 'OBJE') { 231 // Media objects are intended to illustrate other records, facts, and source/citations. They should not have their own sources. 232 } elseif ($type2 === 'OBJE' && $type1 === 'REPO') { 233 $warnings[] = 234 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 235 ' ' . 236 I18N::translate('This type of link is not allowed here.'); 237 } elseif (!array_key_exists($type1, $RECORD_LINKS) || !in_array($type2, $RECORD_LINKS[$type1], true) || !array_key_exists($type2, $XREF_LINKS)) { 238 $errors[] = 239 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 240 ' ' . 241 I18N::translate('This type of link is not allowed here.'); 242 } elseif ($XREF_LINKS[$type2] !== $type3) { 243 // Target XREF does exist - but is invalid 244 $errors[] = 245 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 246 /* I18N: %1$s is an internal ID number such as R123. %2$s and %3$s are record types, such as INDI or SOUR */ 247 I18N::translate('%1$s is a %2$s but a %3$s is expected.', $this->checkLink($tree, $xref2), $this->formatType($type3), $this->formatType($type2)); 248 } elseif ( 249 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMC', ['CHIL']) || 250 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMS', ['HUSB', 'WIFE']) || 251 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'CHIL', ['FAMC']) || 252 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'HUSB', ['FAMS']) || 253 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'WIFE', ['FAMS']) 254 ) { 255 /* I18N: %1$s and %2$s are internal ID numbers such as R123 */ 256 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not have a link back to %2$s.', $this->checkLink($tree, $xref2), $this->checkLink($tree, $xref1)); 257 } 258 } 259 } 260 261 $title = I18N::translate('Check for errors') . ' — ' . e($tree->title()); 262 263 return $this->viewResponse('admin/trees-check', [ 264 'errors' => $errors, 265 'title' => $title, 266 'tree' => $tree, 267 'warnings' => $warnings, 268 ]); 269 } 270 271 /** 272 * @param string $type 273 * @param string[][] $links 274 * @param string $xref1 275 * @param string $xref2 276 * @param string $link 277 * @param string[] $reciprocal 278 * 279 * @return bool 280 */ 281 private function checkReverseLink(string $type, array $links, string $xref1, string $xref2, string $link, array $reciprocal): bool 282 { 283 return $type === $link && (!array_key_exists($xref1, $links[$xref2]) || !in_array($links[$xref2][$xref1], $reciprocal, true)); 284 } 285 286 /** 287 * Create a message linking one record to another. 288 * 289 * @param Tree $tree 290 * @param string $type1 291 * @param string $xref1 292 * @param string $type2 293 * @param string $xref2 294 * 295 * @return string 296 */ 297 private function checkLinkMessage(Tree $tree, string $type1, string $xref1, string $type2, $xref2): string 298 { 299 /* I18N: The placeholders are GEDCOM XREFs and tags. e.g. “INDI I123 contains a FAMC link to F234.” */ 300 return I18N::translate( 301 '%1$s %2$s has a %3$s link to %4$s.', 302 $this->formatType($type1), 303 $this->checkLink($tree, $xref1), 304 $this->formatType($type2), 305 $this->checkLink($tree, $xref2) 306 ); 307 } 308 309 /** 310 * Format a link to a record. 311 * 312 * @param Tree $tree 313 * @param string $xref 314 * 315 * @return string 316 */ 317 private function checkLink(Tree $tree, string $xref): string 318 { 319 return '<b><a href="' . e(route(GedcomRecordPage::class, [ 320 'xref' => $xref, 321 'tree' => $tree->name(), 322 ])) . '">' . $xref . '</a></b>'; 323 } 324 325 /** 326 * Format a record type. 327 * 328 * @param string $type 329 * 330 * @return string 331 */ 332 private function formatType(string $type): string 333 { 334 return '<b title="' . GedcomTag::getLabel($type) . '">' . $type . '</b>'; 335 } 336} 337