1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Aura\Router\Route; 23use Aura\Router\RouterContainer; 24use Fig\Http\Message\StatusCodeInterface; 25use Fisharebest\Webtrees\Auth; 26use Fisharebest\Webtrees\Family; 27use Fisharebest\Webtrees\FlashMessages; 28use Fisharebest\Webtrees\GedcomRecord; 29use Fisharebest\Webtrees\Html; 30use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 31use Fisharebest\Webtrees\I18N; 32use Fisharebest\Webtrees\Individual; 33use Fisharebest\Webtrees\Media; 34use Fisharebest\Webtrees\Note; 35use Fisharebest\Webtrees\Registry; 36use Fisharebest\Webtrees\Repository; 37use Fisharebest\Webtrees\Services\TreeService; 38use Fisharebest\Webtrees\Source; 39use Fisharebest\Webtrees\Submitter; 40use Fisharebest\Webtrees\Tree; 41use Fisharebest\Webtrees\Validator; 42use Illuminate\Database\Capsule\Manager as DB; 43use Illuminate\Database\Query\Expression; 44use Illuminate\Support\Collection; 45use Psr\Http\Message\ResponseInterface; 46use Psr\Http\Message\ServerRequestInterface; 47use Psr\Http\Server\RequestHandlerInterface; 48 49use function app; 50use function assert; 51use function date; 52use function redirect; 53use function response; 54use function route; 55use function view; 56 57/** 58 * Class SiteMapModule 59 */ 60class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 61{ 62 use ModuleConfigTrait; 63 64 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 65 private const CACHE_LIFE = 209600; // Two weeks 66 67 private const PRIORITY = [ 68 Family::RECORD_TYPE => 0.7, 69 Individual::RECORD_TYPE => 0.9, 70 Media::RECORD_TYPE => 0.5, 71 Note::RECORD_TYPE => 0.3, 72 Repository::RECORD_TYPE => 0.5, 73 Source::RECORD_TYPE => 0.5, 74 Submitter::RECORD_TYPE => 0.3, 75 ]; 76 77 private TreeService $tree_service; 78 79 /** 80 * TreesMenuModule constructor. 81 * 82 * @param TreeService $tree_service 83 */ 84 public function __construct(TreeService $tree_service) 85 { 86 $this->tree_service = $tree_service; 87 } 88 89 /** 90 * Initialization. 91 * 92 * @return void 93 */ 94 public function boot(): void 95 { 96 $router_container = app(RouterContainer::class); 97 assert($router_container instanceof RouterContainer); 98 99 $router_container->getMap() 100 ->get('sitemap-style', '/sitemap.xsl', $this); 101 102 $router_container->getMap() 103 ->get('sitemap-index', '/sitemap.xml', $this); 104 105 $router_container->getMap() 106 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 107 } 108 109 /** 110 * A sentence describing what this module does. 111 * 112 * @return string 113 */ 114 public function description(): string 115 { 116 /* I18N: Description of the “Sitemaps” module */ 117 return I18N::translate('Generate sitemap files for search engines.'); 118 } 119 120 /** 121 * Should this module be enabled when it is first installed? 122 * 123 * @return bool 124 */ 125 public function isEnabledByDefault(): bool 126 { 127 return false; 128 } 129 130 /** 131 * @param ServerRequestInterface $request 132 * 133 * @return ResponseInterface 134 */ 135 public function getAdminAction(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 136 { 137 $this->layout = 'layouts/administration'; 138 139 $sitemap_url = route('sitemap-index'); 140 141 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 142 $submit_urls = [ 143 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 144 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 145 ]; 146 147 return $this->viewResponse('modules/sitemap/config', [ 148 'all_trees' => $this->tree_service->all(), 149 'sitemap_url' => $sitemap_url, 150 'submit_urls' => $submit_urls, 151 'title' => $this->title(), 152 ]); 153 } 154 155 /** 156 * How should this module be identified in the control panel, etc.? 157 * 158 * @return string 159 */ 160 public function title(): string 161 { 162 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 163 return I18N::translate('Sitemaps'); 164 } 165 166 /** 167 * @param ServerRequestInterface $request 168 * 169 * @return ResponseInterface 170 */ 171 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 172 { 173 $params = (array) $request->getParsedBody(); 174 175 foreach ($this->tree_service->all() as $tree) { 176 $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false); 177 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 178 } 179 180 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 181 182 return redirect($this->getConfigLink()); 183 } 184 185 /** 186 * @param ServerRequestInterface $request 187 * 188 * @return ResponseInterface 189 */ 190 public function handle(ServerRequestInterface $request): ResponseInterface 191 { 192 $route = Validator::attributes($request)->route(); 193 194 if ($route->name === 'sitemap-style') { 195 $content = view('modules/sitemap/sitemap-xsl'); 196 197 return response($content, StatusCodeInterface::STATUS_OK, [ 198 'Content-Type' => 'application/xml', 199 ]); 200 } 201 202 if ($route->name === 'sitemap-index') { 203 return $this->siteMapIndex($request); 204 } 205 206 return $this->siteMapFile($request); 207 } 208 209 /** 210 * @param ServerRequestInterface $request 211 * 212 * @return ResponseInterface 213 */ 214 private function siteMapIndex(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 215 { 216 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 217 // Which trees have sitemaps enabled? 218 $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool { 219 return $tree->getPreference('include_in_sitemap') === '1'; 220 })->map(static function (Tree $tree): int { 221 return $tree->id(); 222 }); 223 224 $count_families = DB::table('families') 225 ->join('gedcom', 'f_file', '=', 'gedcom_id') 226 ->whereIn('gedcom_id', $tree_ids) 227 ->groupBy(['gedcom_id']) 228 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 229 ->pluck('total', 'gedcom_name'); 230 231 $count_individuals = DB::table('individuals') 232 ->join('gedcom', 'i_file', '=', 'gedcom_id') 233 ->whereIn('gedcom_id', $tree_ids) 234 ->groupBy(['gedcom_id']) 235 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 236 ->pluck('total', 'gedcom_name'); 237 238 $count_media = DB::table('media') 239 ->join('gedcom', 'm_file', '=', 'gedcom_id') 240 ->whereIn('gedcom_id', $tree_ids) 241 ->groupBy(['gedcom_id']) 242 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 243 ->pluck('total', 'gedcom_name'); 244 245 $count_notes = DB::table('other') 246 ->join('gedcom', 'o_file', '=', 'gedcom_id') 247 ->whereIn('gedcom_id', $tree_ids) 248 ->where('o_type', '=', Note::RECORD_TYPE) 249 ->groupBy(['gedcom_id']) 250 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 251 ->pluck('total', 'gedcom_name'); 252 253 $count_repositories = DB::table('other') 254 ->join('gedcom', 'o_file', '=', 'gedcom_id') 255 ->whereIn('gedcom_id', $tree_ids) 256 ->where('o_type', '=', Repository::RECORD_TYPE) 257 ->groupBy(['gedcom_id']) 258 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 259 ->pluck('total', 'gedcom_name'); 260 261 $count_sources = DB::table('sources') 262 ->join('gedcom', 's_file', '=', 'gedcom_id') 263 ->whereIn('gedcom_id', $tree_ids) 264 ->groupBy(['gedcom_id']) 265 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 266 ->pluck('total', 'gedcom_name'); 267 268 $count_submitters = DB::table('other') 269 ->join('gedcom', 'o_file', '=', 'gedcom_id') 270 ->whereIn('gedcom_id', $tree_ids) 271 ->where('o_type', '=', Submitter::RECORD_TYPE) 272 ->groupBy(['gedcom_id']) 273 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 274 ->pluck('total', 'gedcom_name'); 275 276 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 277 DB::table('module_setting') 278 ->where('module_name', '=', $this->name()) 279 ->delete(); 280 281 return view('modules/sitemap/sitemap-index-xml', [ 282 'all_trees' => $this->tree_service->all(), 283 'count_families' => $count_families, 284 'count_individuals' => $count_individuals, 285 'count_media' => $count_media, 286 'count_notes' => $count_notes, 287 'count_repositories' => $count_repositories, 288 'count_sources' => $count_sources, 289 'count_submitters' => $count_submitters, 290 'last_mod' => date('Y-m-d'), 291 'records_per_volume' => self::RECORDS_PER_VOLUME, 292 'sitemap_xsl' => route('sitemap-style'), 293 ]); 294 }, self::CACHE_LIFE); 295 296 return response($content, StatusCodeInterface::STATUS_OK, [ 297 'Content-Type' => 'application/xml', 298 ]); 299 } 300 301 /** 302 * @param ServerRequestInterface $request 303 * 304 * @return ResponseInterface 305 */ 306 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 307 { 308 $tree = Validator::attributes($request)->tree('tree'); 309 $type = Validator::attributes($request)->string('type'); 310 $page = Validator::attributes($request)->integer('page'); 311 312 if ($tree->getPreference('include_in_sitemap') !== '1') { 313 throw new HttpNotFoundException(); 314 } 315 316 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 317 318 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 319 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 320 321 return view('modules/sitemap/sitemap-file-xml', [ 322 'priority' => self::PRIORITY[$type], 323 'records' => $records, 324 'sitemap_xsl' => route('sitemap-style'), 325 'tree' => $tree, 326 ]); 327 }, self::CACHE_LIFE); 328 329 return response($content, StatusCodeInterface::STATUS_OK, [ 330 'Content-Type' => 'application/xml', 331 ]); 332 } 333 334 /** 335 * @param Tree $tree 336 * @param string $type 337 * @param int $limit 338 * @param int $offset 339 * 340 * @return Collection<int,GedcomRecord> 341 */ 342 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 343 { 344 switch ($type) { 345 case Family::RECORD_TYPE: 346 $records = $this->sitemapFamilies($tree, $limit, $offset); 347 break; 348 349 case Individual::RECORD_TYPE: 350 $records = $this->sitemapIndividuals($tree, $limit, $offset); 351 break; 352 353 case Media::RECORD_TYPE: 354 $records = $this->sitemapMedia($tree, $limit, $offset); 355 break; 356 357 case Note::RECORD_TYPE: 358 $records = $this->sitemapNotes($tree, $limit, $offset); 359 break; 360 361 case Repository::RECORD_TYPE: 362 $records = $this->sitemapRepositories($tree, $limit, $offset); 363 break; 364 365 case Source::RECORD_TYPE: 366 $records = $this->sitemapSources($tree, $limit, $offset); 367 break; 368 369 case Submitter::RECORD_TYPE: 370 $records = $this->sitemapSubmitters($tree, $limit, $offset); 371 break; 372 373 default: 374 throw new HttpNotFoundException('Invalid record type: ' . $type); 375 } 376 377 // Skip private records. 378 $records = $records->filter(static function (GedcomRecord $record): bool { 379 return $record->canShow(Auth::PRIV_PRIVATE); 380 }); 381 382 return $records; 383 } 384 385 /** 386 * @param Tree $tree 387 * @param int $limit 388 * @param int $offset 389 * 390 * @return Collection<int,Family> 391 */ 392 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 393 { 394 return DB::table('families') 395 ->where('f_file', '=', $tree->id()) 396 ->orderBy('f_id') 397 ->skip($offset) 398 ->take($limit) 399 ->get() 400 ->map(Registry::familyFactory()->mapper($tree)); 401 } 402 403 /** 404 * @param Tree $tree 405 * @param int $limit 406 * @param int $offset 407 * 408 * @return Collection<int,Individual> 409 */ 410 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 411 { 412 return DB::table('individuals') 413 ->where('i_file', '=', $tree->id()) 414 ->orderBy('i_id') 415 ->skip($offset) 416 ->take($limit) 417 ->get() 418 ->map(Registry::individualFactory()->mapper($tree)); 419 } 420 421 /** 422 * @param Tree $tree 423 * @param int $limit 424 * @param int $offset 425 * 426 * @return Collection<int,Media> 427 */ 428 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 429 { 430 return DB::table('media') 431 ->where('m_file', '=', $tree->id()) 432 ->orderBy('m_id') 433 ->skip($offset) 434 ->take($limit) 435 ->get() 436 ->map(Registry::mediaFactory()->mapper($tree)); 437 } 438 439 /** 440 * @param Tree $tree 441 * @param int $limit 442 * @param int $offset 443 * 444 * @return Collection<int,Note> 445 */ 446 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 447 { 448 return DB::table('other') 449 ->where('o_file', '=', $tree->id()) 450 ->where('o_type', '=', Note::RECORD_TYPE) 451 ->orderBy('o_id') 452 ->skip($offset) 453 ->take($limit) 454 ->get() 455 ->map(Registry::noteFactory()->mapper($tree)); 456 } 457 458 /** 459 * @param Tree $tree 460 * @param int $limit 461 * @param int $offset 462 * 463 * @return Collection<int,Repository> 464 */ 465 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 466 { 467 return DB::table('other') 468 ->where('o_file', '=', $tree->id()) 469 ->where('o_type', '=', Repository::RECORD_TYPE) 470 ->orderBy('o_id') 471 ->skip($offset) 472 ->take($limit) 473 ->get() 474 ->map(Registry::repositoryFactory()->mapper($tree)); 475 } 476 477 /** 478 * @param Tree $tree 479 * @param int $limit 480 * @param int $offset 481 * 482 * @return Collection<int,Source> 483 */ 484 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 485 { 486 return DB::table('sources') 487 ->where('s_file', '=', $tree->id()) 488 ->orderBy('s_id') 489 ->skip($offset) 490 ->take($limit) 491 ->get() 492 ->map(Registry::sourceFactory()->mapper($tree)); 493 } 494 495 /** 496 * @param Tree $tree 497 * @param int $limit 498 * @param int $offset 499 * 500 * @return Collection<int,Submitter> 501 */ 502 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 503 { 504 return DB::table('other') 505 ->where('o_file', '=', $tree->id()) 506 ->where('o_type', '=', Submitter::RECORD_TYPE) 507 ->orderBy('o_id') 508 ->skip($offset) 509 ->take($limit) 510 ->get() 511 ->map(Registry::submitterFactory()->mapper($tree)); 512 } 513} 514