1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Aura\Router\Route; 23use Aura\Router\RouterContainer; 24use Fig\Http\Message\StatusCodeInterface; 25use Fisharebest\Webtrees\Auth; 26use Fisharebest\Webtrees\Family; 27use Fisharebest\Webtrees\FlashMessages; 28use Fisharebest\Webtrees\GedcomRecord; 29use Fisharebest\Webtrees\Html; 30use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 31use Fisharebest\Webtrees\I18N; 32use Fisharebest\Webtrees\Individual; 33use Fisharebest\Webtrees\Media; 34use Fisharebest\Webtrees\Note; 35use Fisharebest\Webtrees\Registry; 36use Fisharebest\Webtrees\Repository; 37use Fisharebest\Webtrees\Services\TreeService; 38use Fisharebest\Webtrees\Source; 39use Fisharebest\Webtrees\Submitter; 40use Fisharebest\Webtrees\Tree; 41use Illuminate\Database\Capsule\Manager as DB; 42use Illuminate\Database\Query\Expression; 43use Illuminate\Support\Collection; 44use Psr\Http\Message\ResponseInterface; 45use Psr\Http\Message\ServerRequestInterface; 46use Psr\Http\Server\RequestHandlerInterface; 47 48use function app; 49use function assert; 50use function date; 51use function redirect; 52use function response; 53use function route; 54use function view; 55 56/** 57 * Class SiteMapModule 58 */ 59class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 60{ 61 use ModuleConfigTrait; 62 63 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 64 private const CACHE_LIFE = 209600; // Two weeks 65 66 private const PRIORITY = [ 67 Family::RECORD_TYPE => 0.7, 68 Individual::RECORD_TYPE => 0.9, 69 Media::RECORD_TYPE => 0.5, 70 Note::RECORD_TYPE => 0.3, 71 Repository::RECORD_TYPE => 0.5, 72 Source::RECORD_TYPE => 0.5, 73 Submitter::RECORD_TYPE => 0.3, 74 ]; 75 76 private TreeService $tree_service; 77 78 /** 79 * TreesMenuModule constructor. 80 * 81 * @param TreeService $tree_service 82 */ 83 public function __construct(TreeService $tree_service) 84 { 85 $this->tree_service = $tree_service; 86 } 87 88 /** 89 * Initialization. 90 * 91 * @return void 92 */ 93 public function boot(): void 94 { 95 $router_container = app(RouterContainer::class); 96 assert($router_container instanceof RouterContainer); 97 98 $router_container->getMap() 99 ->get('sitemap-style', '/sitemap.xsl', $this); 100 101 $router_container->getMap() 102 ->get('sitemap-index', '/sitemap.xml', $this); 103 104 $router_container->getMap() 105 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 106 } 107 108 /** 109 * A sentence describing what this module does. 110 * 111 * @return string 112 */ 113 public function description(): string 114 { 115 /* I18N: Description of the “Sitemaps” module */ 116 return I18N::translate('Generate sitemap files for search engines.'); 117 } 118 119 /** 120 * Should this module be enabled when it is first installed? 121 * 122 * @return bool 123 */ 124 public function isEnabledByDefault(): bool 125 { 126 return false; 127 } 128 129 /** 130 * @param ServerRequestInterface $request 131 * 132 * @return ResponseInterface 133 */ 134 public function getAdminAction(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 135 { 136 $this->layout = 'layouts/administration'; 137 138 $sitemap_url = route('sitemap-index'); 139 140 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 141 $submit_urls = [ 142 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 143 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 144 ]; 145 146 return $this->viewResponse('modules/sitemap/config', [ 147 'all_trees' => $this->tree_service->all(), 148 'sitemap_url' => $sitemap_url, 149 'submit_urls' => $submit_urls, 150 'title' => $this->title(), 151 ]); 152 } 153 154 /** 155 * How should this module be identified in the control panel, etc.? 156 * 157 * @return string 158 */ 159 public function title(): string 160 { 161 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 162 return I18N::translate('Sitemaps'); 163 } 164 165 /** 166 * @param ServerRequestInterface $request 167 * 168 * @return ResponseInterface 169 */ 170 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 171 { 172 $params = (array) $request->getParsedBody(); 173 174 foreach ($this->tree_service->all() as $tree) { 175 $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false); 176 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 177 } 178 179 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 180 181 return redirect($this->getConfigLink()); 182 } 183 184 /** 185 * @param ServerRequestInterface $request 186 * 187 * @return ResponseInterface 188 */ 189 public function handle(ServerRequestInterface $request): ResponseInterface 190 { 191 $route = $request->getAttribute('route'); 192 assert($route instanceof Route); 193 194 if ($route->name === 'sitemap-style') { 195 $content = view('modules/sitemap/sitemap-xsl'); 196 197 return response($content, StatusCodeInterface::STATUS_OK, [ 198 'Content-Type' => 'application/xml', 199 ]); 200 } 201 202 if ($route->name === 'sitemap-index') { 203 return $this->siteMapIndex($request); 204 } 205 206 return $this->siteMapFile($request); 207 } 208 209 /** 210 * @param ServerRequestInterface $request 211 * 212 * @return ResponseInterface 213 */ 214 private function siteMapIndex(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 215 { 216 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 217 // Which trees have sitemaps enabled? 218 $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool { 219 return $tree->getPreference('include_in_sitemap') === '1'; 220 })->map(static function (Tree $tree): int { 221 return $tree->id(); 222 }); 223 224 $count_families = DB::table('families') 225 ->join('gedcom', 'f_file', '=', 'gedcom_id') 226 ->whereIn('gedcom_id', $tree_ids) 227 ->groupBy(['gedcom_id']) 228 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 229 ->pluck('total', 'gedcom_name'); 230 231 $count_individuals = DB::table('individuals') 232 ->join('gedcom', 'i_file', '=', 'gedcom_id') 233 ->whereIn('gedcom_id', $tree_ids) 234 ->groupBy(['gedcom_id']) 235 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 236 ->pluck('total', 'gedcom_name'); 237 238 $count_media = DB::table('media') 239 ->join('gedcom', 'm_file', '=', 'gedcom_id') 240 ->whereIn('gedcom_id', $tree_ids) 241 ->groupBy(['gedcom_id']) 242 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 243 ->pluck('total', 'gedcom_name'); 244 245 $count_notes = DB::table('other') 246 ->join('gedcom', 'o_file', '=', 'gedcom_id') 247 ->whereIn('gedcom_id', $tree_ids) 248 ->where('o_type', '=', Note::RECORD_TYPE) 249 ->groupBy(['gedcom_id']) 250 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 251 ->pluck('total', 'gedcom_name'); 252 253 $count_repositories = DB::table('other') 254 ->join('gedcom', 'o_file', '=', 'gedcom_id') 255 ->whereIn('gedcom_id', $tree_ids) 256 ->where('o_type', '=', Repository::RECORD_TYPE) 257 ->groupBy(['gedcom_id']) 258 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 259 ->pluck('total', 'gedcom_name'); 260 261 $count_sources = DB::table('sources') 262 ->join('gedcom', 's_file', '=', 'gedcom_id') 263 ->whereIn('gedcom_id', $tree_ids) 264 ->groupBy(['gedcom_id']) 265 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 266 ->pluck('total', 'gedcom_name'); 267 268 $count_submitters = DB::table('other') 269 ->join('gedcom', 'o_file', '=', 'gedcom_id') 270 ->whereIn('gedcom_id', $tree_ids) 271 ->where('o_type', '=', Submitter::RECORD_TYPE) 272 ->groupBy(['gedcom_id']) 273 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 274 ->pluck('total', 'gedcom_name'); 275 276 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 277 DB::table('module_setting') 278 ->where('module_name', '=', $this->name()) 279 ->delete(); 280 281 return view('modules/sitemap/sitemap-index-xml', [ 282 'all_trees' => $this->tree_service->all(), 283 'count_families' => $count_families, 284 'count_individuals' => $count_individuals, 285 'count_media' => $count_media, 286 'count_notes' => $count_notes, 287 'count_repositories' => $count_repositories, 288 'count_sources' => $count_sources, 289 'count_submitters' => $count_submitters, 290 'last_mod' => date('Y-m-d'), 291 'records_per_volume' => self::RECORDS_PER_VOLUME, 292 'sitemap_xsl' => route('sitemap-style'), 293 ]); 294 }, self::CACHE_LIFE); 295 296 return response($content, StatusCodeInterface::STATUS_OK, [ 297 'Content-Type' => 'application/xml', 298 ]); 299 } 300 301 /** 302 * @param ServerRequestInterface $request 303 * 304 * @return ResponseInterface 305 */ 306 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 307 { 308 $tree = $request->getAttribute('tree'); 309 assert($tree instanceof Tree); 310 311 $type = $request->getAttribute('type'); 312 $page = (int) $request->getAttribute('page'); 313 314 if ($tree->getPreference('include_in_sitemap') !== '1') { 315 throw new HttpNotFoundException(); 316 } 317 318 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 319 320 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 321 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 322 323 return view('modules/sitemap/sitemap-file-xml', [ 324 'priority' => self::PRIORITY[$type], 325 'records' => $records, 326 'sitemap_xsl' => route('sitemap-style'), 327 'tree' => $tree, 328 ]); 329 }, self::CACHE_LIFE); 330 331 return response($content, StatusCodeInterface::STATUS_OK, [ 332 'Content-Type' => 'application/xml', 333 ]); 334 } 335 336 /** 337 * @param Tree $tree 338 * @param string $type 339 * @param int $limit 340 * @param int $offset 341 * 342 * @return Collection<int,GedcomRecord> 343 */ 344 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 345 { 346 switch ($type) { 347 case Family::RECORD_TYPE: 348 $records = $this->sitemapFamilies($tree, $limit, $offset); 349 break; 350 351 case Individual::RECORD_TYPE: 352 $records = $this->sitemapIndividuals($tree, $limit, $offset); 353 break; 354 355 case Media::RECORD_TYPE: 356 $records = $this->sitemapMedia($tree, $limit, $offset); 357 break; 358 359 case Note::RECORD_TYPE: 360 $records = $this->sitemapNotes($tree, $limit, $offset); 361 break; 362 363 case Repository::RECORD_TYPE: 364 $records = $this->sitemapRepositories($tree, $limit, $offset); 365 break; 366 367 case Source::RECORD_TYPE: 368 $records = $this->sitemapSources($tree, $limit, $offset); 369 break; 370 371 case Submitter::RECORD_TYPE: 372 $records = $this->sitemapSubmitters($tree, $limit, $offset); 373 break; 374 375 default: 376 throw new HttpNotFoundException('Invalid record type: ' . $type); 377 } 378 379 // Skip private records. 380 $records = $records->filter(static function (GedcomRecord $record): bool { 381 return $record->canShow(Auth::PRIV_PRIVATE); 382 }); 383 384 return $records; 385 } 386 387 /** 388 * @param Tree $tree 389 * @param int $limit 390 * @param int $offset 391 * 392 * @return Collection<int,Family> 393 */ 394 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 395 { 396 return DB::table('families') 397 ->where('f_file', '=', $tree->id()) 398 ->orderBy('f_id') 399 ->skip($offset) 400 ->take($limit) 401 ->get() 402 ->map(Registry::familyFactory()->mapper($tree)); 403 } 404 405 /** 406 * @param Tree $tree 407 * @param int $limit 408 * @param int $offset 409 * 410 * @return Collection<int,Individual> 411 */ 412 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 413 { 414 return DB::table('individuals') 415 ->where('i_file', '=', $tree->id()) 416 ->orderBy('i_id') 417 ->skip($offset) 418 ->take($limit) 419 ->get() 420 ->map(Registry::individualFactory()->mapper($tree)); 421 } 422 423 /** 424 * @param Tree $tree 425 * @param int $limit 426 * @param int $offset 427 * 428 * @return Collection<int,Media> 429 */ 430 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 431 { 432 return DB::table('media') 433 ->where('m_file', '=', $tree->id()) 434 ->orderBy('m_id') 435 ->skip($offset) 436 ->take($limit) 437 ->get() 438 ->map(Registry::mediaFactory()->mapper($tree)); 439 } 440 441 /** 442 * @param Tree $tree 443 * @param int $limit 444 * @param int $offset 445 * 446 * @return Collection<int,Note> 447 */ 448 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 449 { 450 return DB::table('other') 451 ->where('o_file', '=', $tree->id()) 452 ->where('o_type', '=', Note::RECORD_TYPE) 453 ->orderBy('o_id') 454 ->skip($offset) 455 ->take($limit) 456 ->get() 457 ->map(Registry::noteFactory()->mapper($tree)); 458 } 459 460 /** 461 * @param Tree $tree 462 * @param int $limit 463 * @param int $offset 464 * 465 * @return Collection<int,Repository> 466 */ 467 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 468 { 469 return DB::table('other') 470 ->where('o_file', '=', $tree->id()) 471 ->where('o_type', '=', Repository::RECORD_TYPE) 472 ->orderBy('o_id') 473 ->skip($offset) 474 ->take($limit) 475 ->get() 476 ->map(Registry::repositoryFactory()->mapper($tree)); 477 } 478 479 /** 480 * @param Tree $tree 481 * @param int $limit 482 * @param int $offset 483 * 484 * @return Collection<int,Source> 485 */ 486 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 487 { 488 return DB::table('sources') 489 ->where('s_file', '=', $tree->id()) 490 ->orderBy('s_id') 491 ->skip($offset) 492 ->take($limit) 493 ->get() 494 ->map(Registry::sourceFactory()->mapper($tree)); 495 } 496 497 /** 498 * @param Tree $tree 499 * @param int $limit 500 * @param int $offset 501 * 502 * @return Collection<int,Submitter> 503 */ 504 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 505 { 506 return DB::table('other') 507 ->where('o_file', '=', $tree->id()) 508 ->where('o_type', '=', Submitter::RECORD_TYPE) 509 ->orderBy('o_id') 510 ->skip($offset) 511 ->take($limit) 512 ->get() 513 ->map(Registry::submitterFactory()->mapper($tree)); 514 } 515} 516