1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2022 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fig\Http\Message\StatusCodeInterface; 23use Fisharebest\Webtrees\Auth; 24use Fisharebest\Webtrees\Family; 25use Fisharebest\Webtrees\FlashMessages; 26use Fisharebest\Webtrees\GedcomRecord; 27use Fisharebest\Webtrees\Html; 28use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 29use Fisharebest\Webtrees\I18N; 30use Fisharebest\Webtrees\Individual; 31use Fisharebest\Webtrees\Media; 32use Fisharebest\Webtrees\Note; 33use Fisharebest\Webtrees\Registry; 34use Fisharebest\Webtrees\Repository; 35use Fisharebest\Webtrees\Services\TreeService; 36use Fisharebest\Webtrees\Source; 37use Fisharebest\Webtrees\Submitter; 38use Fisharebest\Webtrees\Tree; 39use Fisharebest\Webtrees\Validator; 40use Illuminate\Database\Capsule\Manager as DB; 41use Illuminate\Database\Query\Expression; 42use Illuminate\Support\Collection; 43use Psr\Http\Message\ResponseInterface; 44use Psr\Http\Message\ServerRequestInterface; 45use Psr\Http\Server\RequestHandlerInterface; 46 47use function date; 48use function redirect; 49use function response; 50use function route; 51use function view; 52 53/** 54 * Class SiteMapModule 55 */ 56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 57{ 58 use ModuleConfigTrait; 59 60 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 61 private const CACHE_LIFE = 209600; // Two weeks 62 63 private const PRIORITY = [ 64 Family::RECORD_TYPE => 0.7, 65 Individual::RECORD_TYPE => 0.9, 66 Media::RECORD_TYPE => 0.5, 67 Note::RECORD_TYPE => 0.3, 68 Repository::RECORD_TYPE => 0.5, 69 Source::RECORD_TYPE => 0.5, 70 Submitter::RECORD_TYPE => 0.3, 71 ]; 72 73 private TreeService $tree_service; 74 75 /** 76 * TreesMenuModule constructor. 77 * 78 * @param TreeService $tree_service 79 */ 80 public function __construct(TreeService $tree_service) 81 { 82 $this->tree_service = $tree_service; 83 } 84 85 /** 86 * Initialization. 87 * 88 * @return void 89 */ 90 public function boot(): void 91 { 92 Registry::routeFactory()->routeMap() 93 ->get('sitemap-style', '/sitemap.xsl', $this); 94 95 Registry::routeFactory()->routeMap() 96 ->get('sitemap-index', '/sitemap.xml', $this); 97 98 Registry::routeFactory()->routeMap() 99 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 100 } 101 102 /** 103 * A sentence describing what this module does. 104 * 105 * @return string 106 */ 107 public function description(): string 108 { 109 /* I18N: Description of the “Sitemaps” module */ 110 return I18N::translate('Generate sitemap files for search engines.'); 111 } 112 113 /** 114 * Should this module be enabled when it is first installed? 115 * 116 * @return bool 117 */ 118 public function isEnabledByDefault(): bool 119 { 120 return false; 121 } 122 123 /** 124 * @param ServerRequestInterface $request 125 * 126 * @return ResponseInterface 127 */ 128 public function getAdminAction(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 129 { 130 $this->layout = 'layouts/administration'; 131 132 $sitemap_url = route('sitemap-index'); 133 134 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 135 $submit_urls = [ 136 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 137 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 138 ]; 139 140 return $this->viewResponse('modules/sitemap/config', [ 141 'all_trees' => $this->tree_service->all(), 142 'sitemap_url' => $sitemap_url, 143 'submit_urls' => $submit_urls, 144 'title' => $this->title(), 145 ]); 146 } 147 148 /** 149 * How should this module be identified in the control panel, etc.? 150 * 151 * @return string 152 */ 153 public function title(): string 154 { 155 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 156 return I18N::translate('Sitemaps'); 157 } 158 159 /** 160 * @param ServerRequestInterface $request 161 * 162 * @return ResponseInterface 163 */ 164 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 165 { 166 $params = (array) $request->getParsedBody(); 167 168 foreach ($this->tree_service->all() as $tree) { 169 $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false); 170 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 171 } 172 173 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 174 175 return redirect($this->getConfigLink()); 176 } 177 178 /** 179 * @param ServerRequestInterface $request 180 * 181 * @return ResponseInterface 182 */ 183 public function handle(ServerRequestInterface $request): ResponseInterface 184 { 185 $route = Validator::attributes($request)->route(); 186 187 if ($route->name === 'sitemap-style') { 188 $content = view('modules/sitemap/sitemap-xsl'); 189 190 return response($content, StatusCodeInterface::STATUS_OK, [ 191 'content-type' => 'application/xml', 192 ]); 193 } 194 195 if ($route->name === 'sitemap-index') { 196 return $this->siteMapIndex($request); 197 } 198 199 return $this->siteMapFile($request); 200 } 201 202 /** 203 * @param ServerRequestInterface $request 204 * 205 * @return ResponseInterface 206 */ 207 private function siteMapIndex(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface 208 { 209 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 210 // Which trees have sitemaps enabled? 211 $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool { 212 return $tree->getPreference('include_in_sitemap') === '1'; 213 })->map(static function (Tree $tree): int { 214 return $tree->id(); 215 }); 216 217 $count_families = DB::table('families') 218 ->join('gedcom', 'f_file', '=', 'gedcom_id') 219 ->whereIn('gedcom_id', $tree_ids) 220 ->groupBy(['gedcom_id']) 221 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 222 ->pluck('total', 'gedcom_name'); 223 224 $count_individuals = DB::table('individuals') 225 ->join('gedcom', 'i_file', '=', 'gedcom_id') 226 ->whereIn('gedcom_id', $tree_ids) 227 ->groupBy(['gedcom_id']) 228 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 229 ->pluck('total', 'gedcom_name'); 230 231 $count_media = DB::table('media') 232 ->join('gedcom', 'm_file', '=', 'gedcom_id') 233 ->whereIn('gedcom_id', $tree_ids) 234 ->groupBy(['gedcom_id']) 235 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 236 ->pluck('total', 'gedcom_name'); 237 238 $count_notes = DB::table('other') 239 ->join('gedcom', 'o_file', '=', 'gedcom_id') 240 ->whereIn('gedcom_id', $tree_ids) 241 ->where('o_type', '=', Note::RECORD_TYPE) 242 ->groupBy(['gedcom_id']) 243 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 244 ->pluck('total', 'gedcom_name'); 245 246 $count_repositories = DB::table('other') 247 ->join('gedcom', 'o_file', '=', 'gedcom_id') 248 ->whereIn('gedcom_id', $tree_ids) 249 ->where('o_type', '=', Repository::RECORD_TYPE) 250 ->groupBy(['gedcom_id']) 251 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 252 ->pluck('total', 'gedcom_name'); 253 254 $count_sources = DB::table('sources') 255 ->join('gedcom', 's_file', '=', 'gedcom_id') 256 ->whereIn('gedcom_id', $tree_ids) 257 ->groupBy(['gedcom_id']) 258 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 259 ->pluck('total', 'gedcom_name'); 260 261 $count_submitters = DB::table('other') 262 ->join('gedcom', 'o_file', '=', 'gedcom_id') 263 ->whereIn('gedcom_id', $tree_ids) 264 ->where('o_type', '=', Submitter::RECORD_TYPE) 265 ->groupBy(['gedcom_id']) 266 ->select([new Expression('COUNT(*) AS total'), 'gedcom_name']) 267 ->pluck('total', 'gedcom_name'); 268 269 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 270 DB::table('module_setting') 271 ->where('module_name', '=', $this->name()) 272 ->delete(); 273 274 return view('modules/sitemap/sitemap-index-xml', [ 275 'all_trees' => $this->tree_service->all(), 276 'count_families' => $count_families, 277 'count_individuals' => $count_individuals, 278 'count_media' => $count_media, 279 'count_notes' => $count_notes, 280 'count_repositories' => $count_repositories, 281 'count_sources' => $count_sources, 282 'count_submitters' => $count_submitters, 283 'last_mod' => date('Y-m-d'), 284 'records_per_volume' => self::RECORDS_PER_VOLUME, 285 'sitemap_xsl' => route('sitemap-style'), 286 ]); 287 }, self::CACHE_LIFE); 288 289 return response($content, StatusCodeInterface::STATUS_OK, [ 290 'content-type' => 'application/xml', 291 ]); 292 } 293 294 /** 295 * @param ServerRequestInterface $request 296 * 297 * @return ResponseInterface 298 */ 299 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 300 { 301 $tree = Validator::attributes($request)->tree('tree'); 302 $type = Validator::attributes($request)->string('type'); 303 $page = Validator::attributes($request)->integer('page'); 304 305 if ($tree->getPreference('include_in_sitemap') !== '1') { 306 throw new HttpNotFoundException(); 307 } 308 309 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 310 311 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 312 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 313 314 return view('modules/sitemap/sitemap-file-xml', [ 315 'priority' => self::PRIORITY[$type], 316 'records' => $records, 317 'sitemap_xsl' => route('sitemap-style'), 318 'tree' => $tree, 319 ]); 320 }, self::CACHE_LIFE); 321 322 return response($content, StatusCodeInterface::STATUS_OK, [ 323 'content-type' => 'application/xml', 324 ]); 325 } 326 327 /** 328 * @param Tree $tree 329 * @param string $type 330 * @param int $limit 331 * @param int $offset 332 * 333 * @return Collection<int,GedcomRecord> 334 */ 335 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 336 { 337 switch ($type) { 338 case Family::RECORD_TYPE: 339 $records = $this->sitemapFamilies($tree, $limit, $offset); 340 break; 341 342 case Individual::RECORD_TYPE: 343 $records = $this->sitemapIndividuals($tree, $limit, $offset); 344 break; 345 346 case Media::RECORD_TYPE: 347 $records = $this->sitemapMedia($tree, $limit, $offset); 348 break; 349 350 case Note::RECORD_TYPE: 351 $records = $this->sitemapNotes($tree, $limit, $offset); 352 break; 353 354 case Repository::RECORD_TYPE: 355 $records = $this->sitemapRepositories($tree, $limit, $offset); 356 break; 357 358 case Source::RECORD_TYPE: 359 $records = $this->sitemapSources($tree, $limit, $offset); 360 break; 361 362 case Submitter::RECORD_TYPE: 363 $records = $this->sitemapSubmitters($tree, $limit, $offset); 364 break; 365 366 default: 367 throw new HttpNotFoundException('Invalid record type: ' . $type); 368 } 369 370 // Skip private records. 371 $records = $records->filter(static function (GedcomRecord $record): bool { 372 return $record->canShow(Auth::PRIV_PRIVATE); 373 }); 374 375 return $records; 376 } 377 378 /** 379 * @param Tree $tree 380 * @param int $limit 381 * @param int $offset 382 * 383 * @return Collection<int,Family> 384 */ 385 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 386 { 387 return DB::table('families') 388 ->where('f_file', '=', $tree->id()) 389 ->orderBy('f_id') 390 ->skip($offset) 391 ->take($limit) 392 ->get() 393 ->map(Registry::familyFactory()->mapper($tree)); 394 } 395 396 /** 397 * @param Tree $tree 398 * @param int $limit 399 * @param int $offset 400 * 401 * @return Collection<int,Individual> 402 */ 403 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 404 { 405 return DB::table('individuals') 406 ->where('i_file', '=', $tree->id()) 407 ->orderBy('i_id') 408 ->skip($offset) 409 ->take($limit) 410 ->get() 411 ->map(Registry::individualFactory()->mapper($tree)); 412 } 413 414 /** 415 * @param Tree $tree 416 * @param int $limit 417 * @param int $offset 418 * 419 * @return Collection<int,Media> 420 */ 421 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 422 { 423 return DB::table('media') 424 ->where('m_file', '=', $tree->id()) 425 ->orderBy('m_id') 426 ->skip($offset) 427 ->take($limit) 428 ->get() 429 ->map(Registry::mediaFactory()->mapper($tree)); 430 } 431 432 /** 433 * @param Tree $tree 434 * @param int $limit 435 * @param int $offset 436 * 437 * @return Collection<int,Note> 438 */ 439 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 440 { 441 return DB::table('other') 442 ->where('o_file', '=', $tree->id()) 443 ->where('o_type', '=', Note::RECORD_TYPE) 444 ->orderBy('o_id') 445 ->skip($offset) 446 ->take($limit) 447 ->get() 448 ->map(Registry::noteFactory()->mapper($tree)); 449 } 450 451 /** 452 * @param Tree $tree 453 * @param int $limit 454 * @param int $offset 455 * 456 * @return Collection<int,Repository> 457 */ 458 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 459 { 460 return DB::table('other') 461 ->where('o_file', '=', $tree->id()) 462 ->where('o_type', '=', Repository::RECORD_TYPE) 463 ->orderBy('o_id') 464 ->skip($offset) 465 ->take($limit) 466 ->get() 467 ->map(Registry::repositoryFactory()->mapper($tree)); 468 } 469 470 /** 471 * @param Tree $tree 472 * @param int $limit 473 * @param int $offset 474 * 475 * @return Collection<int,Source> 476 */ 477 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 478 { 479 return DB::table('sources') 480 ->where('s_file', '=', $tree->id()) 481 ->orderBy('s_id') 482 ->skip($offset) 483 ->take($limit) 484 ->get() 485 ->map(Registry::sourceFactory()->mapper($tree)); 486 } 487 488 /** 489 * @param Tree $tree 490 * @param int $limit 491 * @param int $offset 492 * 493 * @return Collection<int,Submitter> 494 */ 495 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 496 { 497 return DB::table('other') 498 ->where('o_file', '=', $tree->id()) 499 ->where('o_type', '=', Submitter::RECORD_TYPE) 500 ->orderBy('o_id') 501 ->skip($offset) 502 ->take($limit) 503 ->get() 504 ->map(Registry::submitterFactory()->mapper($tree)); 505 } 506} 507