1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2020 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Http\RequestHandlers; 21 22use Fisharebest\Webtrees\Gedcom; 23use Fisharebest\Webtrees\GedcomTag; 24use Fisharebest\Webtrees\Header; 25use Fisharebest\Webtrees\Http\ViewResponseTrait; 26use Fisharebest\Webtrees\I18N; 27use Fisharebest\Webtrees\Services\AdminService; 28use Fisharebest\Webtrees\Services\TreeService; 29use Fisharebest\Webtrees\Tree; 30use Illuminate\Database\Capsule\Manager as DB; 31use Illuminate\Database\Query\Expression; 32use Psr\Http\Message\ResponseInterface; 33use Psr\Http\Message\ServerRequestInterface; 34use Psr\Http\Server\RequestHandlerInterface; 35use stdClass; 36 37use function array_key_exists; 38use function assert; 39use function e; 40use function in_array; 41use function preg_match; 42use function preg_match_all; 43use function route; 44use function strtoupper; 45 46use const PREG_SET_ORDER; 47 48/** 49 * Check a tree for errors. 50 */ 51class CheckTree implements RequestHandlerInterface 52{ 53 use ViewResponseTrait; 54 55 /** 56 * @param ServerRequestInterface $request 57 * 58 * @return ResponseInterface 59 */ 60 public function handle(ServerRequestInterface $request): ResponseInterface 61 { 62 $this->layout = 'layouts/administration'; 63 64 $tree = $request->getAttribute('tree'); 65 assert($tree instanceof Tree); 66 67 // We need to work with raw GEDCOM data, as we are looking for errors 68 // which may prevent the GedcomRecord objects from working. 69 70 $q1 = DB::table('individuals') 71 ->where('i_file', '=', $tree->id()) 72 ->select(['i_id AS xref', 'i_gedcom AS gedcom', new Expression("'INDI' AS type")]); 73 $q2 = DB::table('families') 74 ->where('f_file', '=', $tree->id()) 75 ->select(['f_id AS xref', 'f_gedcom AS gedcom', new Expression("'FAM' AS type")]); 76 $q3 = DB::table('media') 77 ->where('m_file', '=', $tree->id()) 78 ->select(['m_id AS xref', 'm_gedcom AS gedcom', new Expression("'OBJE' AS type")]); 79 $q4 = DB::table('sources') 80 ->where('s_file', '=', $tree->id()) 81 ->select(['s_id AS xref', 's_gedcom AS gedcom', new Expression("'SOUR' AS type")]); 82 $q5 = DB::table('other') 83 ->where('o_file', '=', $tree->id()) 84 ->whereNotIn('o_type', [Header::RECORD_TYPE, 'TRLR']) 85 ->select(['o_id AS xref', 'o_gedcom AS gedcom', 'o_type']); 86 $q6 = DB::table('change') 87 ->where('gedcom_id', '=', $tree->id()) 88 ->where('status', '=', 'pending') 89 ->orderBy('change_id') 90 ->select(['xref', 'new_gedcom AS gedcom', new Expression("'' AS type")]); 91 92 $rows = $q1 93 ->unionAll($q2) 94 ->unionAll($q3) 95 ->unionAll($q4) 96 ->unionAll($q5) 97 ->unionAll($q6) 98 ->get() 99 ->map(static function (stdClass $row): stdClass { 100 // Extract type for pending record 101 if ($row->type === '' && preg_match('/^0 @[^@]*@ ([_A-Z0-9]+)/', $row->gedcom, $match)) { 102 $row->type = $match[1]; 103 } 104 105 return $row; 106 }); 107 108 $records = []; 109 110 foreach ($rows as $row) { 111 if ($row->gedcom !== '') { 112 // existing or updated record 113 $records[$row->xref] = $row; 114 } else { 115 // deleted record 116 unset($records[$row->xref]); 117 } 118 } 119 120 // LOOK FOR BROKEN LINKS 121 $XREF_LINKS = [ 122 'NOTE' => 'NOTE', 123 'SOUR' => 'SOUR', 124 'REPO' => 'REPO', 125 'OBJE' => 'OBJE', 126 'SUBM' => 'SUBM', 127 'FAMC' => 'FAM', 128 'FAMS' => 'FAM', 129 //'ADOP'=>'FAM', // Need to handle this case specially. We may have both ADOP and FAMC links to the same FAM, but only store one. 130 'HUSB' => 'INDI', 131 'WIFE' => 'INDI', 132 'CHIL' => 'INDI', 133 'ASSO' => 'INDI', 134 '_ASSO' => 'INDI', 135 // A webtrees extension 136 'ALIA' => 'INDI', 137 'AUTH' => 'INDI', 138 // A webtrees extension 139 'ANCI' => 'SUBM', 140 'DESI' => 'SUBM', 141 '_WT_OBJE_SORT' => 'OBJE', 142 '_LOC' => '_LOC', 143 ]; 144 145 $RECORD_LINKS = [ 146 'INDI' => [ 147 'NOTE', 148 'OBJE', 149 'SOUR', 150 'SUBM', 151 'ASSO', 152 '_ASSO', 153 'FAMC', 154 'FAMS', 155 'ALIA', 156 '_WT_OBJE_SORT', 157 '_LOC', 158 ], 159 'FAM' => [ 160 'NOTE', 161 'OBJE', 162 'SOUR', 163 'SUBM', 164 'ASSO', 165 '_ASSO', 166 'HUSB', 167 'WIFE', 168 'CHIL', 169 '_LOC', 170 ], 171 'SOUR' => [ 172 'NOTE', 173 'OBJE', 174 'REPO', 175 'AUTH', 176 ], 177 'REPO' => ['NOTE'], 178 'OBJE' => ['NOTE'], 179 // The spec also allows SOUR, but we treat this as a warning 180 'NOTE' => [], 181 // The spec also allows SOUR, but we treat this as a warning 182 'SUBM' => [ 183 'NOTE', 184 'OBJE', 185 ], 186 'SUBN' => ['SUBM'], 187 '_LOC' => [ 188 'SOUR', 189 'OBJE', 190 '_LOC', 191 'NOTE', 192 ], 193 ]; 194 195 $errors = []; 196 $warnings = []; 197 198 // Generate lists of all links 199 $all_links = []; 200 $upper_links = []; 201 foreach ($records as $record) { 202 $all_links[$record->xref] = []; 203 $upper_links[strtoupper($record->xref)] = $record->xref; 204 preg_match_all('/\n\d (' . Gedcom::REGEX_TAG . ') @([^#@\n][^\n@]*)@/', $record->gedcom, $matches, PREG_SET_ORDER); 205 foreach ($matches as $match) { 206 $all_links[$record->xref][$match[2]] = $match[1]; 207 } 208 } 209 210 foreach ($all_links as $xref1 => $links) { 211 // PHP converts array keys to integers. 212 $xref1 = (string) $xref1; 213 214 $type1 = $records[$xref1]->type; 215 foreach ($links as $xref2 => $type2) { 216 // PHP converts array keys to integers. 217 $xref2 = (string) $xref2; 218 219 $type3 = isset($records[$xref2]) ? $records[$xref2]->type : ''; 220 if (!array_key_exists($xref2, $all_links)) { 221 if (array_key_exists(strtoupper($xref2), $upper_links)) { 222 $warnings[] = 223 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 224 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 225 I18N::translate('%1$s does not exist. Did you mean %2$s?', $this->checkLink($tree, $xref2), $this->checkLink($tree, $upper_links[strtoupper($xref2)])); 226 } else { 227 /* I18N: placeholders are GEDCOM XREFs, such as R123 */ 228 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not exist.', $this->checkLink($tree, $xref2)); 229 } 230 } elseif ($type2 === 'SOUR' && $type1 === 'NOTE') { 231 // Notes are intended to add explanations and comments to other records. They should not have their own sources. 232 } elseif ($type2 === 'SOUR' && $type1 === 'OBJE') { 233 // Media objects are intended to illustrate other records, facts, and source/citations. They should not have their own sources. 234 } elseif ($type2 === 'OBJE' && $type1 === 'REPO') { 235 $warnings[] = 236 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 237 ' ' . 238 I18N::translate('This type of link is not allowed here.'); 239 } elseif (!array_key_exists($type1, $RECORD_LINKS) || !in_array($type2, $RECORD_LINKS[$type1], true) || !array_key_exists($type2, $XREF_LINKS)) { 240 $errors[] = 241 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . 242 ' ' . 243 I18N::translate('This type of link is not allowed here.'); 244 } elseif ($XREF_LINKS[$type2] !== $type3) { 245 // Target XREF does exist - but is invalid 246 $errors[] = 247 $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . 248 /* I18N: %1$s is an internal ID number such as R123. %2$s and %3$s are record types, such as INDI or SOUR */ 249 I18N::translate('%1$s is a %2$s but a %3$s is expected.', $this->checkLink($tree, $xref2), $this->formatType($type3), $this->formatType($type2)); 250 } elseif ( 251 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMC', ['CHIL']) || 252 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'FAMS', ['HUSB', 'WIFE']) || 253 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'CHIL', ['FAMC']) || 254 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'HUSB', ['FAMS']) || 255 $this->checkReverseLink($type2, $all_links, $xref1, $xref2, 'WIFE', ['FAMS']) 256 ) { 257 /* I18N: %1$s and %2$s are internal ID numbers such as R123 */ 258 $errors[] = $this->checkLinkMessage($tree, $type1, $xref1, $type2, $xref2) . ' ' . I18N::translate('%1$s does not have a link back to %2$s.', $this->checkLink($tree, $xref2), $this->checkLink($tree, $xref1)); 259 } 260 } 261 } 262 263 $title = I18N::translate('Check for errors') . ' — ' . e($tree->title()); 264 265 return $this->viewResponse('admin/trees-check', [ 266 'errors' => $errors, 267 'title' => $title, 268 'tree' => $tree, 269 'warnings' => $warnings, 270 ]); 271 } 272 273 /** 274 * @param string $type 275 * @param string[][] $links 276 * @param string $xref1 277 * @param string $xref2 278 * @param string $link 279 * @param string[] $reciprocal 280 * 281 * @return bool 282 */ 283 private function checkReverseLink(string $type, array $links, string $xref1, string $xref2, string $link, array $reciprocal): bool 284 { 285 return $type === $link && (!array_key_exists($xref1, $links[$xref2]) || !in_array($links[$xref2][$xref1], $reciprocal, true)); 286 } 287 288 /** 289 * Create a message linking one record to another. 290 * 291 * @param Tree $tree 292 * @param string $type1 293 * @param string $xref1 294 * @param string $type2 295 * @param string $xref2 296 * 297 * @return string 298 */ 299 private function checkLinkMessage(Tree $tree, string $type1, string $xref1, string $type2, $xref2): string 300 { 301 /* I18N: The placeholders are GEDCOM XREFs and tags. e.g. “INDI I123 contains a FAMC link to F234.” */ 302 return I18N::translate( 303 '%1$s %2$s has a %3$s link to %4$s.', 304 $this->formatType($type1), 305 $this->checkLink($tree, $xref1), 306 $this->formatType($type2), 307 $this->checkLink($tree, $xref2) 308 ); 309 } 310 311 /** 312 * Format a link to a record. 313 * 314 * @param Tree $tree 315 * @param string $xref 316 * 317 * @return string 318 */ 319 private function checkLink(Tree $tree, string $xref): string 320 { 321 return '<b><a href="' . e(route(GedcomRecordPage::class, [ 322 'xref' => $xref, 323 'tree' => $tree->name(), 324 ])) . '">' . $xref . '</a></b>'; 325 } 326 327 /** 328 * Format a record type. 329 * 330 * @param string $type 331 * 332 * @return string 333 */ 334 private function formatType(string $type): string 335 { 336 return '<b title="' . GedcomTag::getLabel($type) . '">' . $type . '</b>'; 337 } 338} 339