1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fig\Http\Message\StatusCodeInterface; 23use Fisharebest\Webtrees\Auth; 24use Fisharebest\Webtrees\Family; 25use Fisharebest\Webtrees\FlashMessages; 26use Fisharebest\Webtrees\GedcomRecord; 27use Fisharebest\Webtrees\Html; 28use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 29use Fisharebest\Webtrees\I18N; 30use Fisharebest\Webtrees\Individual; 31use Fisharebest\Webtrees\Media; 32use Fisharebest\Webtrees\Note; 33use Fisharebest\Webtrees\Registry; 34use Fisharebest\Webtrees\Repository; 35use Fisharebest\Webtrees\Services\TreeService; 36use Fisharebest\Webtrees\Source; 37use Fisharebest\Webtrees\Submitter; 38use Fisharebest\Webtrees\Tree; 39use Fisharebest\Webtrees\Validator; 40use Illuminate\Database\Capsule\Manager as DB; 41use Illuminate\Database\Query\Expression; 42use Illuminate\Support\Collection; 43use Psr\Http\Message\ResponseInterface; 44use Psr\Http\Message\ServerRequestInterface; 45use Psr\Http\Server\RequestHandlerInterface; 46 47use function date; 48use function redirect; 49use function response; 50use function route; 51use function view; 52 53/** 54 * Class SiteMapModule 55 */ 56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 57{ 58 use ModuleConfigTrait; 59 60 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 61 private const CACHE_LIFE = 209600; // Two weeks 62 63 private const PRIORITY = [ 64 Family::RECORD_TYPE => 0.7, 65 Individual::RECORD_TYPE => 0.9, 66 Media::RECORD_TYPE => 0.5, 67 Note::RECORD_TYPE => 0.3, 68 Repository::RECORD_TYPE => 0.5, 69 Source::RECORD_TYPE => 0.5, 70 Submitter::RECORD_TYPE => 0.3, 71 ]; 72 73 private TreeService $tree_service; 74 75 /** 76 * TreesMenuModule constructor. 77 * 78 * @param TreeService $tree_service 79 */ 80 public function __construct(TreeService $tree_service) 81 { 82 $this->tree_service = $tree_service; 83 } 84 85 /** 86 * Initialization. 87 * 88 * @return void 89 */ 90 public function boot(): void 91 { 92 Registry::routeFactory()->routeMap() 93 ->get('sitemap-style', '/sitemap.xsl', $this); 94 95 Registry::routeFactory()->routeMap() 96 ->get('sitemap-index', '/sitemap.xml', $this); 97 98 Registry::routeFactory()->routeMap() 99 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 100 } 101 102 /** 103 * A sentence describing what this module does. 104 * 105 * @return string 106 */ 107 public function description(): string 108 { 109 /* I18N: Description of the “Sitemaps” module */ 110 return I18N::translate('Generate sitemap files for search engines.'); 111 } 112 113 /** 114 * Should this module be enabled when it is first installed? 115 * 116 * @return bool 117 */ 118 public function isEnabledByDefault(): bool 119 { 120 return false; 121 } 122 123 /** 124 * @param ServerRequestInterface $request 125 * 126 * @return ResponseInterface 127 */ 128 public function getAdminAction(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 129 { 130 $this->layout = 'layouts/administration'; 131 132 $sitemap_url = route('sitemap-index'); 133 134 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 135 $submit_urls = [ 136 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 137 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 138 ]; 139 140 return $this->viewResponse('modules/sitemap/config', [ 141 'all_trees' => $this->tree_service->all(), 142 'sitemap_url' => $sitemap_url, 143 'submit_urls' => $submit_urls, 144 'title' => $this->title(), 145 ]); 146 } 147 148 /** 149 * How should this module be identified in the control panel, etc.? 150 * 151 * @return string 152 */ 153 public function title(): string 154 { 155 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 156 return I18N::translate('Sitemaps'); 157 } 158 159 /** 160 * @param ServerRequestInterface $request 161 * 162 * @return ResponseInterface 163 */ 164 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 165 { 166 foreach ($this->tree_service->all() as $tree) { 167 $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false); 168 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 169 } 170 171 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 172 173 return redirect($this->getConfigLink()); 174 } 175 176 /** 177 * @param ServerRequestInterface $request 178 * 179 * @return ResponseInterface 180 */ 181 public function handle(ServerRequestInterface $request): ResponseInterface 182 { 183 $route = Validator::attributes($request)->route(); 184 185 if ($route->name === 'sitemap-style') { 186 $content = view('modules/sitemap/sitemap-xsl'); 187 188 return response($content, StatusCodeInterface::STATUS_OK, [ 189 'content-type' => 'application/xml', 190 ]); 191 } 192 193 if ($route->name === 'sitemap-index') { 194 return $this->siteMapIndex($request); 195 } 196 197 return $this->siteMapFile($request); 198 } 199 200 /** 201 * @param ServerRequestInterface $request 202 * 203 * @return ResponseInterface 204 */ 205 private function siteMapIndex(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 206 { 207 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 208 // Which trees have sitemaps enabled? 209 $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool { 210 return $tree->getPreference('include_in_sitemap') === '1'; 211 })->map(static function (Tree $tree): int { 212 return $tree->id(); 213 }); 214 215 $count_families = DB::table('families') 216 ->join('gedcom', 'f_file', '=', 'gedcom_id') 217 ->whereIn('gedcom_id', $tree_ids) 218 ->groupBy(['gedcom_id']) 219 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 220 ->pluck('total', 'gedcom_name'); 221 222 $count_individuals = DB::table('individuals') 223 ->join('gedcom', 'i_file', '=', 'gedcom_id') 224 ->whereIn('gedcom_id', $tree_ids) 225 ->groupBy(['gedcom_id']) 226 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 227 ->pluck('total', 'gedcom_name'); 228 229 $count_media = DB::table('media') 230 ->join('gedcom', 'm_file', '=', 'gedcom_id') 231 ->whereIn('gedcom_id', $tree_ids) 232 ->groupBy(['gedcom_id']) 233 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 234 ->pluck('total', 'gedcom_name'); 235 236 $count_notes = DB::table('other') 237 ->join('gedcom', 'o_file', '=', 'gedcom_id') 238 ->whereIn('gedcom_id', $tree_ids) 239 ->where('o_type', '=', Note::RECORD_TYPE) 240 ->groupBy(['gedcom_id']) 241 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 242 ->pluck('total', 'gedcom_name'); 243 244 $count_repositories = DB::table('other') 245 ->join('gedcom', 'o_file', '=', 'gedcom_id') 246 ->whereIn('gedcom_id', $tree_ids) 247 ->where('o_type', '=', Repository::RECORD_TYPE) 248 ->groupBy(['gedcom_id']) 249 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 250 ->pluck('total', 'gedcom_name'); 251 252 $count_sources = DB::table('sources') 253 ->join('gedcom', 's_file', '=', 'gedcom_id') 254 ->whereIn('gedcom_id', $tree_ids) 255 ->groupBy(['gedcom_id']) 256 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 257 ->pluck('total', 'gedcom_name'); 258 259 $count_submitters = DB::table('other') 260 ->join('gedcom', 'o_file', '=', 'gedcom_id') 261 ->whereIn('gedcom_id', $tree_ids) 262 ->where('o_type', '=', Submitter::RECORD_TYPE) 263 ->groupBy(['gedcom_id']) 264 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 265 ->pluck('total', 'gedcom_name'); 266 267 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 268 DB::table('module_setting') 269 ->where('module_name', '=', $this->name()) 270 ->delete(); 271 272 return view('modules/sitemap/sitemap-index-xml', [ 273 'all_trees' => $this->tree_service->all(), 274 'count_families' => $count_families, 275 'count_individuals' => $count_individuals, 276 'count_media' => $count_media, 277 'count_notes' => $count_notes, 278 'count_repositories' => $count_repositories, 279 'count_sources' => $count_sources, 280 'count_submitters' => $count_submitters, 281 'last_mod' => date('Y-m-d'), 282 'records_per_volume' => self::RECORDS_PER_VOLUME, 283 'sitemap_xsl' => route('sitemap-style'), 284 ]); 285 }, self::CACHE_LIFE); 286 287 return response($content, StatusCodeInterface::STATUS_OK, [ 288 'content-type' => 'application/xml', 289 ]); 290 } 291 292 /** 293 * @param ServerRequestInterface $request 294 * 295 * @return ResponseInterface 296 */ 297 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 298 { 299 $tree = Validator::attributes($request)->tree('tree'); 300 $type = Validator::attributes($request)->string('type'); 301 $page = Validator::attributes($request)->integer('page'); 302 303 if ($tree->getPreference('include_in_sitemap') !== '1') { 304 throw new HttpNotFoundException(); 305 } 306 307 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 308 309 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 310 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 311 312 return view('modules/sitemap/sitemap-file-xml', [ 313 'priority' => self::PRIORITY[$type], 314 'records' => $records, 315 'sitemap_xsl' => route('sitemap-style'), 316 'tree' => $tree, 317 ]); 318 }, self::CACHE_LIFE); 319 320 return response($content, StatusCodeInterface::STATUS_OK, [ 321 'content-type' => 'application/xml', 322 ]); 323 } 324 325 /** 326 * @param Tree $tree 327 * @param string $type 328 * @param int $limit 329 * @param int $offset 330 * 331 * @return Collection<int,GedcomRecord> 332 */ 333 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 334 { 335 switch ($type) { 336 case Family::RECORD_TYPE: 337 $records = $this->sitemapFamilies($tree, $limit, $offset); 338 break; 339 340 case Individual::RECORD_TYPE: 341 $records = $this->sitemapIndividuals($tree, $limit, $offset); 342 break; 343 344 case Media::RECORD_TYPE: 345 $records = $this->sitemapMedia($tree, $limit, $offset); 346 break; 347 348 case Note::RECORD_TYPE: 349 $records = $this->sitemapNotes($tree, $limit, $offset); 350 break; 351 352 case Repository::RECORD_TYPE: 353 $records = $this->sitemapRepositories($tree, $limit, $offset); 354 break; 355 356 case Source::RECORD_TYPE: 357 $records = $this->sitemapSources($tree, $limit, $offset); 358 break; 359 360 case Submitter::RECORD_TYPE: 361 $records = $this->sitemapSubmitters($tree, $limit, $offset); 362 break; 363 364 default: 365 throw new HttpNotFoundException('Invalid record type: ' . $type); 366 } 367 368 // Skip private records. 369 $records = $records->filter(static function (GedcomRecord $record): bool { 370 return $record->canShow(Auth::PRIV_PRIVATE); 371 }); 372 373 return $records; 374 } 375 376 /** 377 * @param Tree $tree 378 * @param int $limit 379 * @param int $offset 380 * 381 * @return Collection<int,Family> 382 */ 383 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 384 { 385 return DB::table('families') 386 ->where('f_file', '=', $tree->id()) 387 ->orderBy('f_id') 388 ->skip($offset) 389 ->take($limit) 390 ->get() 391 ->map(Registry::familyFactory()->mapper($tree)); 392 } 393 394 /** 395 * @param Tree $tree 396 * @param int $limit 397 * @param int $offset 398 * 399 * @return Collection<int,Individual> 400 */ 401 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 402 { 403 return DB::table('individuals') 404 ->where('i_file', '=', $tree->id()) 405 ->orderBy('i_id') 406 ->skip($offset) 407 ->take($limit) 408 ->get() 409 ->map(Registry::individualFactory()->mapper($tree)); 410 } 411 412 /** 413 * @param Tree $tree 414 * @param int $limit 415 * @param int $offset 416 * 417 * @return Collection<int,Media> 418 */ 419 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 420 { 421 return DB::table('media') 422 ->where('m_file', '=', $tree->id()) 423 ->orderBy('m_id') 424 ->skip($offset) 425 ->take($limit) 426 ->get() 427 ->map(Registry::mediaFactory()->mapper($tree)); 428 } 429 430 /** 431 * @param Tree $tree 432 * @param int $limit 433 * @param int $offset 434 * 435 * @return Collection<int,Note> 436 */ 437 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 438 { 439 return DB::table('other') 440 ->where('o_file', '=', $tree->id()) 441 ->where('o_type', '=', Note::RECORD_TYPE) 442 ->orderBy('o_id') 443 ->skip($offset) 444 ->take($limit) 445 ->get() 446 ->map(Registry::noteFactory()->mapper($tree)); 447 } 448 449 /** 450 * @param Tree $tree 451 * @param int $limit 452 * @param int $offset 453 * 454 * @return Collection<int,Repository> 455 */ 456 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 457 { 458 return DB::table('other') 459 ->where('o_file', '=', $tree->id()) 460 ->where('o_type', '=', Repository::RECORD_TYPE) 461 ->orderBy('o_id') 462 ->skip($offset) 463 ->take($limit) 464 ->get() 465 ->map(Registry::repositoryFactory()->mapper($tree)); 466 } 467 468 /** 469 * @param Tree $tree 470 * @param int $limit 471 * @param int $offset 472 * 473 * @return Collection<int,Source> 474 */ 475 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 476 { 477 return DB::table('sources') 478 ->where('s_file', '=', $tree->id()) 479 ->orderBy('s_id') 480 ->skip($offset) 481 ->take($limit) 482 ->get() 483 ->map(Registry::sourceFactory()->mapper($tree)); 484 } 485 486 /** 487 * @param Tree $tree 488 * @param int $limit 489 * @param int $offset 490 * 491 * @return Collection<int,Submitter> 492 */ 493 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 494 { 495 return DB::table('other') 496 ->where('o_file', '=', $tree->id()) 497 ->where('o_type', '=', Submitter::RECORD_TYPE) 498 ->orderBy('o_id') 499 ->skip($offset) 500 ->take($limit) 501 ->get() 502 ->map(Registry::submitterFactory()->mapper($tree)); 503 } 504} 505