1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Aura\Router\Route; 23use Aura\Router\RouterContainer; 24use Fig\Http\Message\StatusCodeInterface; 25use Fisharebest\Webtrees\Auth; 26use Fisharebest\Webtrees\Family; 27use Fisharebest\Webtrees\FlashMessages; 28use Fisharebest\Webtrees\GedcomRecord; 29use Fisharebest\Webtrees\Html; 30use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 31use Fisharebest\Webtrees\I18N; 32use Fisharebest\Webtrees\Individual; 33use Fisharebest\Webtrees\Media; 34use Fisharebest\Webtrees\Note; 35use Fisharebest\Webtrees\Registry; 36use Fisharebest\Webtrees\Repository; 37use Fisharebest\Webtrees\Services\TreeService; 38use Fisharebest\Webtrees\Source; 39use Fisharebest\Webtrees\Submitter; 40use Fisharebest\Webtrees\Tree; 41use Fisharebest\Webtrees\Validator; 42use Illuminate\Database\Capsule\Manager as DB; 43use Illuminate\Database\Query\Expression; 44use Illuminate\Support\Collection; 45use Psr\Http\Message\ResponseInterface; 46use Psr\Http\Message\ServerRequestInterface; 47use Psr\Http\Server\RequestHandlerInterface; 48 49use function app; 50use function assert; 51use function date; 52use function redirect; 53use function response; 54use function route; 55use function view; 56 57/** 58 * Class SiteMapModule 59 */ 60class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 61{ 62 use ModuleConfigTrait; 63 64 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 65 private const CACHE_LIFE = 209600; // Two weeks 66 67 private const PRIORITY = [ 68 Family::RECORD_TYPE => 0.7, 69 Individual::RECORD_TYPE => 0.9, 70 Media::RECORD_TYPE => 0.5, 71 Note::RECORD_TYPE => 0.3, 72 Repository::RECORD_TYPE => 0.5, 73 Source::RECORD_TYPE => 0.5, 74 Submitter::RECORD_TYPE => 0.3, 75 ]; 76 77 private TreeService $tree_service; 78 79 /** 80 * TreesMenuModule constructor. 81 * 82 * @param TreeService $tree_service 83 */ 84 public function __construct(TreeService $tree_service) 85 { 86 $this->tree_service = $tree_service; 87 } 88 89 /** 90 * Initialization. 91 * 92 * @return void 93 */ 94 public function boot(): void 95 { 96 Registry::routeFactory()->routeMap() 97 ->get('sitemap-style', '/sitemap.xsl', $this); 98 99 Registry::routeFactory()->routeMap() 100 ->get('sitemap-index', '/sitemap.xml', $this); 101 102 Registry::routeFactory()->routeMap() 103 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 104 } 105 106 /** 107 * A sentence describing what this module does. 108 * 109 * @return string 110 */ 111 public function description(): string 112 { 113 /* I18N: Description of the “Sitemaps” module */ 114 return I18N::translate('Generate sitemap files for search engines.'); 115 } 116 117 /** 118 * Should this module be enabled when it is first installed? 119 * 120 * @return bool 121 */ 122 public function isEnabledByDefault(): bool 123 { 124 return false; 125 } 126 127 /** 128 * @param ServerRequestInterface $request 129 * 130 * @return ResponseInterface 131 */ 132 public function getAdminAction(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 133 { 134 $this->layout = 'layouts/administration'; 135 136 $sitemap_url = route('sitemap-index'); 137 138 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 139 $submit_urls = [ 140 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 141 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 142 ]; 143 144 return $this->viewResponse('modules/sitemap/config', [ 145 'all_trees' => $this->tree_service->all(), 146 'sitemap_url' => $sitemap_url, 147 'submit_urls' => $submit_urls, 148 'title' => $this->title(), 149 ]); 150 } 151 152 /** 153 * How should this module be identified in the control panel, etc.? 154 * 155 * @return string 156 */ 157 public function title(): string 158 { 159 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 160 return I18N::translate('Sitemaps'); 161 } 162 163 /** 164 * @param ServerRequestInterface $request 165 * 166 * @return ResponseInterface 167 */ 168 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 169 { 170 $params = (array) $request->getParsedBody(); 171 172 foreach ($this->tree_service->all() as $tree) { 173 $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false); 174 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 175 } 176 177 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 178 179 return redirect($this->getConfigLink()); 180 } 181 182 /** 183 * @param ServerRequestInterface $request 184 * 185 * @return ResponseInterface 186 */ 187 public function handle(ServerRequestInterface $request): ResponseInterface 188 { 189 $route = Validator::attributes($request)->route(); 190 191 if ($route->name === 'sitemap-style') { 192 $content = view('modules/sitemap/sitemap-xsl'); 193 194 return response($content, StatusCodeInterface::STATUS_OK, [ 195 'Content-Type' => 'application/xml', 196 ]); 197 } 198 199 if ($route->name === 'sitemap-index') { 200 return $this->siteMapIndex($request); 201 } 202 203 return $this->siteMapFile($request); 204 } 205 206 /** 207 * @param ServerRequestInterface $request 208 * 209 * @return ResponseInterface 210 */ 211 private function siteMapIndex(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 212 { 213 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 214 // Which trees have sitemaps enabled? 215 $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool { 216 return $tree->getPreference('include_in_sitemap') === '1'; 217 })->map(static function (Tree $tree): int { 218 return $tree->id(); 219 }); 220 221 $count_families = DB::table('families') 222 ->join('gedcom', 'f_file', '=', 'gedcom_id') 223 ->whereIn('gedcom_id', $tree_ids) 224 ->groupBy(['gedcom_id']) 225 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 226 ->pluck('total', 'gedcom_name'); 227 228 $count_individuals = DB::table('individuals') 229 ->join('gedcom', 'i_file', '=', 'gedcom_id') 230 ->whereIn('gedcom_id', $tree_ids) 231 ->groupBy(['gedcom_id']) 232 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 233 ->pluck('total', 'gedcom_name'); 234 235 $count_media = DB::table('media') 236 ->join('gedcom', 'm_file', '=', 'gedcom_id') 237 ->whereIn('gedcom_id', $tree_ids) 238 ->groupBy(['gedcom_id']) 239 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 240 ->pluck('total', 'gedcom_name'); 241 242 $count_notes = DB::table('other') 243 ->join('gedcom', 'o_file', '=', 'gedcom_id') 244 ->whereIn('gedcom_id', $tree_ids) 245 ->where('o_type', '=', Note::RECORD_TYPE) 246 ->groupBy(['gedcom_id']) 247 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 248 ->pluck('total', 'gedcom_name'); 249 250 $count_repositories = DB::table('other') 251 ->join('gedcom', 'o_file', '=', 'gedcom_id') 252 ->whereIn('gedcom_id', $tree_ids) 253 ->where('o_type', '=', Repository::RECORD_TYPE) 254 ->groupBy(['gedcom_id']) 255 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 256 ->pluck('total', 'gedcom_name'); 257 258 $count_sources = DB::table('sources') 259 ->join('gedcom', 's_file', '=', 'gedcom_id') 260 ->whereIn('gedcom_id', $tree_ids) 261 ->groupBy(['gedcom_id']) 262 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 263 ->pluck('total', 'gedcom_name'); 264 265 $count_submitters = DB::table('other') 266 ->join('gedcom', 'o_file', '=', 'gedcom_id') 267 ->whereIn('gedcom_id', $tree_ids) 268 ->where('o_type', '=', Submitter::RECORD_TYPE) 269 ->groupBy(['gedcom_id']) 270 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 271 ->pluck('total', 'gedcom_name'); 272 273 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 274 DB::table('module_setting') 275 ->where('module_name', '=', $this->name()) 276 ->delete(); 277 278 return view('modules/sitemap/sitemap-index-xml', [ 279 'all_trees' => $this->tree_service->all(), 280 'count_families' => $count_families, 281 'count_individuals' => $count_individuals, 282 'count_media' => $count_media, 283 'count_notes' => $count_notes, 284 'count_repositories' => $count_repositories, 285 'count_sources' => $count_sources, 286 'count_submitters' => $count_submitters, 287 'last_mod' => date('Y-m-d'), 288 'records_per_volume' => self::RECORDS_PER_VOLUME, 289 'sitemap_xsl' => route('sitemap-style'), 290 ]); 291 }, self::CACHE_LIFE); 292 293 return response($content, StatusCodeInterface::STATUS_OK, [ 294 'Content-Type' => 'application/xml', 295 ]); 296 } 297 298 /** 299 * @param ServerRequestInterface $request 300 * 301 * @return ResponseInterface 302 */ 303 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 304 { 305 $tree = Validator::attributes($request)->tree('tree'); 306 $type = Validator::attributes($request)->string('type'); 307 $page = Validator::attributes($request)->integer('page'); 308 309 if ($tree->getPreference('include_in_sitemap') !== '1') { 310 throw new HttpNotFoundException(); 311 } 312 313 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 314 315 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 316 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 317 318 return view('modules/sitemap/sitemap-file-xml', [ 319 'priority' => self::PRIORITY[$type], 320 'records' => $records, 321 'sitemap_xsl' => route('sitemap-style'), 322 'tree' => $tree, 323 ]); 324 }, self::CACHE_LIFE); 325 326 return response($content, StatusCodeInterface::STATUS_OK, [ 327 'Content-Type' => 'application/xml', 328 ]); 329 } 330 331 /** 332 * @param Tree $tree 333 * @param string $type 334 * @param int $limit 335 * @param int $offset 336 * 337 * @return Collection<int,GedcomRecord> 338 */ 339 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 340 { 341 switch ($type) { 342 case Family::RECORD_TYPE: 343 $records = $this->sitemapFamilies($tree, $limit, $offset); 344 break; 345 346 case Individual::RECORD_TYPE: 347 $records = $this->sitemapIndividuals($tree, $limit, $offset); 348 break; 349 350 case Media::RECORD_TYPE: 351 $records = $this->sitemapMedia($tree, $limit, $offset); 352 break; 353 354 case Note::RECORD_TYPE: 355 $records = $this->sitemapNotes($tree, $limit, $offset); 356 break; 357 358 case Repository::RECORD_TYPE: 359 $records = $this->sitemapRepositories($tree, $limit, $offset); 360 break; 361 362 case Source::RECORD_TYPE: 363 $records = $this->sitemapSources($tree, $limit, $offset); 364 break; 365 366 case Submitter::RECORD_TYPE: 367 $records = $this->sitemapSubmitters($tree, $limit, $offset); 368 break; 369 370 default: 371 throw new HttpNotFoundException('Invalid record type: ' . $type); 372 } 373 374 // Skip private records. 375 $records = $records->filter(static function (GedcomRecord $record): bool { 376 return $record->canShow(Auth::PRIV_PRIVATE); 377 }); 378 379 return $records; 380 } 381 382 /** 383 * @param Tree $tree 384 * @param int $limit 385 * @param int $offset 386 * 387 * @return Collection<int,Family> 388 */ 389 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 390 { 391 return DB::table('families') 392 ->where('f_file', '=', $tree->id()) 393 ->orderBy('f_id') 394 ->skip($offset) 395 ->take($limit) 396 ->get() 397 ->map(Registry::familyFactory()->mapper($tree)); 398 } 399 400 /** 401 * @param Tree $tree 402 * @param int $limit 403 * @param int $offset 404 * 405 * @return Collection<int,Individual> 406 */ 407 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 408 { 409 return DB::table('individuals') 410 ->where('i_file', '=', $tree->id()) 411 ->orderBy('i_id') 412 ->skip($offset) 413 ->take($limit) 414 ->get() 415 ->map(Registry::individualFactory()->mapper($tree)); 416 } 417 418 /** 419 * @param Tree $tree 420 * @param int $limit 421 * @param int $offset 422 * 423 * @return Collection<int,Media> 424 */ 425 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 426 { 427 return DB::table('media') 428 ->where('m_file', '=', $tree->id()) 429 ->orderBy('m_id') 430 ->skip($offset) 431 ->take($limit) 432 ->get() 433 ->map(Registry::mediaFactory()->mapper($tree)); 434 } 435 436 /** 437 * @param Tree $tree 438 * @param int $limit 439 * @param int $offset 440 * 441 * @return Collection<int,Note> 442 */ 443 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 444 { 445 return DB::table('other') 446 ->where('o_file', '=', $tree->id()) 447 ->where('o_type', '=', Note::RECORD_TYPE) 448 ->orderBy('o_id') 449 ->skip($offset) 450 ->take($limit) 451 ->get() 452 ->map(Registry::noteFactory()->mapper($tree)); 453 } 454 455 /** 456 * @param Tree $tree 457 * @param int $limit 458 * @param int $offset 459 * 460 * @return Collection<int,Repository> 461 */ 462 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 463 { 464 return DB::table('other') 465 ->where('o_file', '=', $tree->id()) 466 ->where('o_type', '=', Repository::RECORD_TYPE) 467 ->orderBy('o_id') 468 ->skip($offset) 469 ->take($limit) 470 ->get() 471 ->map(Registry::repositoryFactory()->mapper($tree)); 472 } 473 474 /** 475 * @param Tree $tree 476 * @param int $limit 477 * @param int $offset 478 * 479 * @return Collection<int,Source> 480 */ 481 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 482 { 483 return DB::table('sources') 484 ->where('s_file', '=', $tree->id()) 485 ->orderBy('s_id') 486 ->skip($offset) 487 ->take($limit) 488 ->get() 489 ->map(Registry::sourceFactory()->mapper($tree)); 490 } 491 492 /** 493 * @param Tree $tree 494 * @param int $limit 495 * @param int $offset 496 * 497 * @return Collection<int,Submitter> 498 */ 499 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 500 { 501 return DB::table('other') 502 ->where('o_file', '=', $tree->id()) 503 ->where('o_type', '=', Submitter::RECORD_TYPE) 504 ->orderBy('o_id') 505 ->skip($offset) 506 ->take($limit) 507 ->get() 508 ->map(Registry::submitterFactory()->mapper($tree)); 509 } 510} 511