1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Aura\Router\Route; 23use Aura\Router\RouterContainer; 24use Fig\Http\Message\StatusCodeInterface; 25use Fisharebest\Webtrees\Auth; 26use Fisharebest\Webtrees\Exceptions\HttpNotFoundException; 27use Fisharebest\Webtrees\Registry; 28use Fisharebest\Webtrees\Family; 29use Fisharebest\Webtrees\FlashMessages; 30use Fisharebest\Webtrees\GedcomRecord; 31use Fisharebest\Webtrees\Html; 32use Fisharebest\Webtrees\I18N; 33use Fisharebest\Webtrees\Individual; 34use Fisharebest\Webtrees\Media; 35use Fisharebest\Webtrees\Note; 36use Fisharebest\Webtrees\Repository; 37use Fisharebest\Webtrees\Services\TreeService; 38use Fisharebest\Webtrees\Source; 39use Fisharebest\Webtrees\Submitter; 40use Fisharebest\Webtrees\Tree; 41use Illuminate\Database\Capsule\Manager as DB; 42use Illuminate\Database\Query\Expression; 43use Illuminate\Support\Collection; 44use Psr\Http\Message\ResponseInterface; 45use Psr\Http\Message\ServerRequestInterface; 46use Psr\Http\Server\RequestHandlerInterface; 47 48use function app; 49use function assert; 50use function date; 51use function redirect; 52use function response; 53use function route; 54use function view; 55 56/** 57 * Class SiteMapModule 58 */ 59class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 60{ 61 use ModuleConfigTrait; 62 63 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 64 private const CACHE_LIFE = 209600; // Two weeks 65 66 private const PRIORITY = [ 67 Family::RECORD_TYPE => 0.7, 68 Individual::RECORD_TYPE => 0.9, 69 Media::RECORD_TYPE => 0.5, 70 Note::RECORD_TYPE => 0.3, 71 Repository::RECORD_TYPE => 0.5, 72 Source::RECORD_TYPE => 0.5, 73 Submitter::RECORD_TYPE => 0.3, 74 ]; 75 76 /** @var TreeService */ 77 private $tree_service; 78 79 /** 80 * TreesMenuModule constructor. 81 * 82 * @param TreeService $tree_service 83 */ 84 public function __construct(TreeService $tree_service) 85 { 86 $this->tree_service = $tree_service; 87 } 88 89 /** 90 * Initialization. 91 * 92 * @return void 93 */ 94 public function boot(): void 95 { 96 $router_container = app(RouterContainer::class); 97 assert($router_container instanceof RouterContainer); 98 99 $router_container->getMap() 100 ->get('sitemap-style', '/sitemap.xsl', $this); 101 102 $router_container->getMap() 103 ->get('sitemap-index', '/sitemap.xml', $this); 104 105 $router_container->getMap() 106 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 107 } 108 109 /** 110 * A sentence describing what this module does. 111 * 112 * @return string 113 */ 114 public function description(): string 115 { 116 /* I18N: Description of the “Sitemaps” module */ 117 return I18N::translate('Generate sitemap files for search engines.'); 118 } 119 120 /** 121 * Should this module be enabled when it is first installed? 122 * 123 * @return bool 124 */ 125 public function isEnabledByDefault(): bool 126 { 127 return false; 128 } 129 130 /** 131 * @param ServerRequestInterface $request 132 * 133 * @return ResponseInterface 134 */ 135 public function getAdminAction(ServerRequestInterface $request): ResponseInterface 136 { 137 $this->layout = 'layouts/administration'; 138 139 $sitemap_url = route('sitemap-index'); 140 141 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 142 $submit_urls = [ 143 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 144 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 145 ]; 146 147 return $this->viewResponse('modules/sitemap/config', [ 148 'all_trees' => $this->tree_service->all(), 149 'sitemap_url' => $sitemap_url, 150 'submit_urls' => $submit_urls, 151 'title' => $this->title(), 152 ]); 153 } 154 155 /** 156 * How should this module be identified in the control panel, etc.? 157 * 158 * @return string 159 */ 160 public function title(): string 161 { 162 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 163 return I18N::translate('Sitemaps'); 164 } 165 166 /** 167 * @param ServerRequestInterface $request 168 * 169 * @return ResponseInterface 170 */ 171 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 172 { 173 $params = (array) $request->getParsedBody(); 174 175 foreach ($this->tree_service->all() as $tree) { 176 $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false); 177 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 178 } 179 180 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 181 182 return redirect($this->getConfigLink()); 183 } 184 185 /** 186 * @param ServerRequestInterface $request 187 * 188 * @return ResponseInterface 189 */ 190 public function handle(ServerRequestInterface $request): ResponseInterface 191 { 192 $route = $request->getAttribute('route'); 193 assert($route instanceof Route); 194 195 if ($route->name === 'sitemap-style') { 196 $content = view('modules/sitemap/sitemap-xsl'); 197 198 return response($content, StatusCodeInterface::STATUS_OK, [ 199 'Content-Type' => 'application/xml', 200 ]); 201 } 202 203 if ($route->name === 'sitemap-index') { 204 return $this->siteMapIndex($request); 205 } 206 207 return $this->siteMapFile($request); 208 } 209 210 /** 211 * @param ServerRequestInterface $request 212 * 213 * @return ResponseInterface 214 */ 215 private function siteMapIndex(ServerRequestInterface $request): ResponseInterface 216 { 217 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 218 // Which trees have sitemaps enabled? 219 $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool { 220 return $tree->getPreference('include_in_sitemap') === '1'; 221 })->map(static function (Tree $tree): int { 222 return $tree->id(); 223 }); 224 225 $count_families = DB::table('families') 226 ->join('gedcom', 'f_file', '=', 'gedcom_id') 227 ->whereIn('gedcom_id', $tree_ids) 228 ->groupBy(['gedcom_id']) 229 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 230 ->pluck('total', 'gedcom_name'); 231 232 $count_individuals = DB::table('individuals') 233 ->join('gedcom', 'i_file', '=', 'gedcom_id') 234 ->whereIn('gedcom_id', $tree_ids) 235 ->groupBy(['gedcom_id']) 236 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 237 ->pluck('total', 'gedcom_name'); 238 239 $count_media = DB::table('media') 240 ->join('gedcom', 'm_file', '=', 'gedcom_id') 241 ->whereIn('gedcom_id', $tree_ids) 242 ->groupBy(['gedcom_id']) 243 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 244 ->pluck('total', 'gedcom_name'); 245 246 $count_notes = DB::table('other') 247 ->join('gedcom', 'o_file', '=', 'gedcom_id') 248 ->whereIn('gedcom_id', $tree_ids) 249 ->where('o_type', '=', Note::RECORD_TYPE) 250 ->groupBy(['gedcom_id']) 251 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 252 ->pluck('total', 'gedcom_name'); 253 254 $count_repositories = DB::table('other') 255 ->join('gedcom', 'o_file', '=', 'gedcom_id') 256 ->whereIn('gedcom_id', $tree_ids) 257 ->where('o_type', '=', Repository::RECORD_TYPE) 258 ->groupBy(['gedcom_id']) 259 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 260 ->pluck('total', 'gedcom_name'); 261 262 $count_sources = DB::table('sources') 263 ->join('gedcom', 's_file', '=', 'gedcom_id') 264 ->whereIn('gedcom_id', $tree_ids) 265 ->groupBy(['gedcom_id']) 266 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 267 ->pluck('total', 'gedcom_name'); 268 269 $count_submitters = DB::table('other') 270 ->join('gedcom', 'o_file', '=', 'gedcom_id') 271 ->whereIn('gedcom_id', $tree_ids) 272 ->where('o_type', '=', Submitter::RECORD_TYPE) 273 ->groupBy(['gedcom_id']) 274 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 275 ->pluck('total', 'gedcom_name'); 276 277 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 278 DB::table('module_setting') 279 ->where('module_name', '=', $this->name()) 280 ->delete(); 281 282 return view('modules/sitemap/sitemap-index-xml', [ 283 'all_trees' => $this->tree_service->all(), 284 'count_families' => $count_families, 285 'count_individuals' => $count_individuals, 286 'count_media' => $count_media, 287 'count_notes' => $count_notes, 288 'count_repositories' => $count_repositories, 289 'count_sources' => $count_sources, 290 'count_submitters' => $count_submitters, 291 'last_mod' => date('Y-m-d'), 292 'records_per_volume' => self::RECORDS_PER_VOLUME, 293 'sitemap_xsl' => route('sitemap-style'), 294 ]); 295 }, self::CACHE_LIFE); 296 297 return response($content, StatusCodeInterface::STATUS_OK, [ 298 'Content-Type' => 'application/xml', 299 ]); 300 } 301 302 /** 303 * @param ServerRequestInterface $request 304 * 305 * @return ResponseInterface 306 */ 307 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 308 { 309 $tree = $request->getAttribute('tree'); 310 assert($tree instanceof Tree); 311 312 $type = $request->getAttribute('type'); 313 $page = (int) $request->getAttribute('page'); 314 315 if ($tree->getPreference('include_in_sitemap') !== '1') { 316 throw new HttpNotFoundException(); 317 } 318 319 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 320 321 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 322 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 323 324 return view('modules/sitemap/sitemap-file-xml', [ 325 'priority' => self::PRIORITY[$type], 326 'records' => $records, 327 'sitemap_xsl' => route('sitemap-style'), 328 'tree' => $tree, 329 ]); 330 }, self::CACHE_LIFE); 331 332 return response($content, StatusCodeInterface::STATUS_OK, [ 333 'Content-Type' => 'application/xml', 334 ]); 335 } 336 337 /** 338 * @param Tree $tree 339 * @param string $type 340 * @param int $limit 341 * @param int $offset 342 * 343 * @return Collection<GedcomRecord> 344 */ 345 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 346 { 347 switch ($type) { 348 case Family::RECORD_TYPE: 349 $records = $this->sitemapFamilies($tree, $limit, $offset); 350 break; 351 352 case Individual::RECORD_TYPE: 353 $records = $this->sitemapIndividuals($tree, $limit, $offset); 354 break; 355 356 case Media::RECORD_TYPE: 357 $records = $this->sitemapMedia($tree, $limit, $offset); 358 break; 359 360 case Note::RECORD_TYPE: 361 $records = $this->sitemapNotes($tree, $limit, $offset); 362 break; 363 364 case Repository::RECORD_TYPE: 365 $records = $this->sitemapRepositories($tree, $limit, $offset); 366 break; 367 368 case Source::RECORD_TYPE: 369 $records = $this->sitemapSources($tree, $limit, $offset); 370 break; 371 372 case Submitter::RECORD_TYPE: 373 $records = $this->sitemapSubmitters($tree, $limit, $offset); 374 break; 375 376 default: 377 throw new HttpNotFoundException('Invalid record type: ' . $type); 378 } 379 380 // Skip private records. 381 $records = $records->filter(static function (GedcomRecord $record): bool { 382 return $record->canShow(Auth::PRIV_PRIVATE); 383 }); 384 385 return $records; 386 } 387 388 /** 389 * @param Tree $tree 390 * @param int $limit 391 * @param int $offset 392 * 393 * @return Collection<Family> 394 */ 395 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 396 { 397 return DB::table('families') 398 ->where('f_file', '=', $tree->id()) 399 ->orderBy('f_id') 400 ->skip($offset) 401 ->take($limit) 402 ->get() 403 ->map(Registry::familyFactory()->mapper($tree)); 404 } 405 406 /** 407 * @param Tree $tree 408 * @param int $limit 409 * @param int $offset 410 * 411 * @return Collection<Individual> 412 */ 413 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 414 { 415 return DB::table('individuals') 416 ->where('i_file', '=', $tree->id()) 417 ->orderBy('i_id') 418 ->skip($offset) 419 ->take($limit) 420 ->get() 421 ->map(Registry::individualFactory()->mapper($tree)); 422 } 423 424 /** 425 * @param Tree $tree 426 * @param int $limit 427 * @param int $offset 428 * 429 * @return Collection<Media> 430 */ 431 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 432 { 433 return DB::table('media') 434 ->where('m_file', '=', $tree->id()) 435 ->orderBy('m_id') 436 ->skip($offset) 437 ->take($limit) 438 ->get() 439 ->map(Registry::mediaFactory()->mapper($tree)); 440 } 441 442 /** 443 * @param Tree $tree 444 * @param int $limit 445 * @param int $offset 446 * 447 * @return Collection<Note> 448 */ 449 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 450 { 451 return DB::table('other') 452 ->where('o_file', '=', $tree->id()) 453 ->where('o_type', '=', Note::RECORD_TYPE) 454 ->orderBy('o_id') 455 ->skip($offset) 456 ->take($limit) 457 ->get() 458 ->map(Registry::noteFactory()->mapper($tree)); 459 } 460 461 /** 462 * @param Tree $tree 463 * @param int $limit 464 * @param int $offset 465 * 466 * @return Collection<Repository> 467 */ 468 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 469 { 470 return DB::table('other') 471 ->where('o_file', '=', $tree->id()) 472 ->where('o_type', '=', Repository::RECORD_TYPE) 473 ->orderBy('o_id') 474 ->skip($offset) 475 ->take($limit) 476 ->get() 477 ->map(Registry::repositoryFactory()->mapper($tree)); 478 } 479 480 /** 481 * @param Tree $tree 482 * @param int $limit 483 * @param int $offset 484 * 485 * @return Collection<Source> 486 */ 487 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 488 { 489 return DB::table('sources') 490 ->where('s_file', '=', $tree->id()) 491 ->orderBy('s_id') 492 ->skip($offset) 493 ->take($limit) 494 ->get() 495 ->map(Registry::sourceFactory()->mapper($tree)); 496 } 497 498 /** 499 * @param Tree $tree 500 * @param int $limit 501 * @param int $offset 502 * 503 * @return Collection<Submitter> 504 */ 505 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 506 { 507 return DB::table('other') 508 ->where('o_file', '=', $tree->id()) 509 ->where('o_type', '=', Submitter::RECORD_TYPE) 510 ->orderBy('o_id') 511 ->skip($offset) 512 ->take($limit) 513 ->get() 514 ->map(Registry::submitterFactory()->mapper($tree)); 515 } 516} 517