1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fig\Http\Message\StatusCodeInterface; 23use Fisharebest\Webtrees\Auth; 24use Fisharebest\Webtrees\DB; 25use Fisharebest\Webtrees\Family; 26use Fisharebest\Webtrees\FlashMessages; 27use Fisharebest\Webtrees\GedcomRecord; 28use Fisharebest\Webtrees\Html; 29use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 30use Fisharebest\Webtrees\I18N; 31use Fisharebest\Webtrees\Individual; 32use Fisharebest\Webtrees\Media; 33use Fisharebest\Webtrees\Note; 34use Fisharebest\Webtrees\Registry; 35use Fisharebest\Webtrees\Repository; 36use Fisharebest\Webtrees\Services\TreeService; 37use Fisharebest\Webtrees\Source; 38use Fisharebest\Webtrees\Submitter; 39use Fisharebest\Webtrees\Tree; 40use Fisharebest\Webtrees\Validator; 41use Illuminate\Database\Query\Expression; 42use Illuminate\Support\Collection; 43use Psr\Http\Message\ResponseInterface; 44use Psr\Http\Message\ServerRequestInterface; 45use Psr\Http\Server\RequestHandlerInterface; 46 47use function date; 48use function redirect; 49use function response; 50use function route; 51use function view; 52 53/** 54 * Class SiteMapModule 55 */ 56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 57{ 58 use ModuleConfigTrait; 59 60 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 61 private const CACHE_LIFE = 209600; // Two weeks 62 63 private const PRIORITY = [ 64 Family::RECORD_TYPE => 0.7, 65 Individual::RECORD_TYPE => 0.9, 66 Media::RECORD_TYPE => 0.5, 67 Note::RECORD_TYPE => 0.3, 68 Repository::RECORD_TYPE => 0.5, 69 Source::RECORD_TYPE => 0.5, 70 Submitter::RECORD_TYPE => 0.3, 71 ]; 72 73 private TreeService $tree_service; 74 75 /** 76 * @param TreeService $tree_service 77 */ 78 public function __construct(TreeService $tree_service) 79 { 80 $this->tree_service = $tree_service; 81 } 82 83 /** 84 * Initialization. 85 * 86 * @return void 87 */ 88 public function boot(): void 89 { 90 Registry::routeFactory()->routeMap() 91 ->get('sitemap-style', '/sitemap.xsl', $this); 92 93 Registry::routeFactory()->routeMap() 94 ->get('sitemap-index', '/sitemap.xml', $this); 95 96 Registry::routeFactory()->routeMap() 97 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 98 } 99 100 /** 101 * A sentence describing what this module does. 102 * 103 * @return string 104 */ 105 public function description(): string 106 { 107 /* I18N: Description of the “Sitemaps” module */ 108 return I18N::translate('Generate sitemap files for search engines.'); 109 } 110 111 /** 112 * Should this module be enabled when it is first installed? 113 * 114 * @return bool 115 */ 116 public function isEnabledByDefault(): bool 117 { 118 return false; 119 } 120 121 /** 122 * @param ServerRequestInterface $request 123 * 124 * @return ResponseInterface 125 */ 126 public function getAdminAction(ServerRequestInterface $request): ResponseInterface 127 { 128 $this->layout = 'layouts/administration'; 129 130 $sitemap_url = route('sitemap-index'); 131 132 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 133 $submit_urls = [ 134 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 135 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 136 ]; 137 138 return $this->viewResponse('modules/sitemap/config', [ 139 'all_trees' => $this->tree_service->all(), 140 'sitemap_url' => $sitemap_url, 141 'submit_urls' => $submit_urls, 142 'title' => $this->title(), 143 ]); 144 } 145 146 /** 147 * How should this module be identified in the control panel, etc.? 148 * 149 * @return string 150 */ 151 public function title(): string 152 { 153 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 154 return I18N::translate('Sitemaps'); 155 } 156 157 /** 158 * @param ServerRequestInterface $request 159 * 160 * @return ResponseInterface 161 */ 162 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 163 { 164 foreach ($this->tree_service->all() as $tree) { 165 $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false); 166 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 167 } 168 169 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 170 171 return redirect($this->getConfigLink()); 172 } 173 174 /** 175 * @param ServerRequestInterface $request 176 * 177 * @return ResponseInterface 178 */ 179 public function handle(ServerRequestInterface $request): ResponseInterface 180 { 181 $route = Validator::attributes($request)->route(); 182 183 if ($route->name === 'sitemap-style') { 184 $content = view('modules/sitemap/sitemap-xsl'); 185 186 return response($content, StatusCodeInterface::STATUS_OK, [ 187 'content-type' => 'application/xml', 188 ]); 189 } 190 191 if ($route->name === 'sitemap-index') { 192 return $this->siteMapIndex($request); 193 } 194 195 return $this->siteMapFile($request); 196 } 197 198 /** 199 * @param ServerRequestInterface $request 200 * 201 * @return ResponseInterface 202 */ 203 private function siteMapIndex(ServerRequestInterface $request): ResponseInterface 204 { 205 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 206 // Which trees have sitemaps enabled? 207 $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool { 208 return $tree->getPreference('include_in_sitemap') === '1'; 209 })->map(static function (Tree $tree): int { 210 return $tree->id(); 211 }); 212 213 $count_families = DB::table('families') 214 ->join('gedcom', 'f_file', '=', 'gedcom_id') 215 ->whereIn('gedcom_id', $tree_ids) 216 ->groupBy(['gedcom_id']) 217 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 218 219 $count_individuals = DB::table('individuals') 220 ->join('gedcom', 'i_file', '=', 'gedcom_id') 221 ->whereIn('gedcom_id', $tree_ids) 222 ->groupBy(['gedcom_id']) 223 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 224 225 $count_media = DB::table('media') 226 ->join('gedcom', 'm_file', '=', 'gedcom_id') 227 ->whereIn('gedcom_id', $tree_ids) 228 ->groupBy(['gedcom_id']) 229 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 230 231 $count_notes = DB::table('other') 232 ->join('gedcom', 'o_file', '=', 'gedcom_id') 233 ->whereIn('gedcom_id', $tree_ids) 234 ->where('o_type', '=', Note::RECORD_TYPE) 235 ->groupBy(['gedcom_id']) 236 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 237 238 $count_repositories = DB::table('other') 239 ->join('gedcom', 'o_file', '=', 'gedcom_id') 240 ->whereIn('gedcom_id', $tree_ids) 241 ->where('o_type', '=', Repository::RECORD_TYPE) 242 ->groupBy(['gedcom_id']) 243 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 244 245 $count_sources = DB::table('sources') 246 ->join('gedcom', 's_file', '=', 'gedcom_id') 247 ->whereIn('gedcom_id', $tree_ids) 248 ->groupBy(['gedcom_id']) 249 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 250 251 $count_submitters = DB::table('other') 252 ->join('gedcom', 'o_file', '=', 'gedcom_id') 253 ->whereIn('gedcom_id', $tree_ids) 254 ->where('o_type', '=', Submitter::RECORD_TYPE) 255 ->groupBy(['gedcom_id']) 256 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 257 258 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 259 DB::table('module_setting') 260 ->where('module_name', '=', $this->name()) 261 ->delete(); 262 263 return view('modules/sitemap/sitemap-index-xml', [ 264 'all_trees' => $this->tree_service->all(), 265 'count_families' => $count_families, 266 'count_individuals' => $count_individuals, 267 'count_media' => $count_media, 268 'count_notes' => $count_notes, 269 'count_repositories' => $count_repositories, 270 'count_sources' => $count_sources, 271 'count_submitters' => $count_submitters, 272 'last_mod' => date('Y-m-d'), 273 'records_per_volume' => self::RECORDS_PER_VOLUME, 274 'sitemap_xsl' => route('sitemap-style'), 275 ]); 276 }, self::CACHE_LIFE); 277 278 return response($content, StatusCodeInterface::STATUS_OK, [ 279 'content-type' => 'application/xml', 280 ]); 281 } 282 283 /** 284 * @param ServerRequestInterface $request 285 * 286 * @return ResponseInterface 287 */ 288 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 289 { 290 $tree = Validator::attributes($request)->tree('tree'); 291 $type = Validator::attributes($request)->string('type'); 292 $page = Validator::attributes($request)->integer('page'); 293 294 if ($tree->getPreference('include_in_sitemap') !== '1') { 295 throw new HttpNotFoundException(); 296 } 297 298 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 299 300 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 301 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 302 303 return view('modules/sitemap/sitemap-file-xml', [ 304 'priority' => self::PRIORITY[$type], 305 'records' => $records, 306 'sitemap_xsl' => route('sitemap-style'), 307 'tree' => $tree, 308 ]); 309 }, self::CACHE_LIFE); 310 311 return response($content, StatusCodeInterface::STATUS_OK, [ 312 'content-type' => 'application/xml', 313 ]); 314 } 315 316 /** 317 * @param Tree $tree 318 * @param string $type 319 * @param int $limit 320 * @param int $offset 321 * 322 * @return Collection<int,GedcomRecord> 323 */ 324 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 325 { 326 switch ($type) { 327 case Family::RECORD_TYPE: 328 $records = $this->sitemapFamilies($tree, $limit, $offset); 329 break; 330 331 case Individual::RECORD_TYPE: 332 $records = $this->sitemapIndividuals($tree, $limit, $offset); 333 break; 334 335 case Media::RECORD_TYPE: 336 $records = $this->sitemapMedia($tree, $limit, $offset); 337 break; 338 339 case Note::RECORD_TYPE: 340 $records = $this->sitemapNotes($tree, $limit, $offset); 341 break; 342 343 case Repository::RECORD_TYPE: 344 $records = $this->sitemapRepositories($tree, $limit, $offset); 345 break; 346 347 case Source::RECORD_TYPE: 348 $records = $this->sitemapSources($tree, $limit, $offset); 349 break; 350 351 case Submitter::RECORD_TYPE: 352 $records = $this->sitemapSubmitters($tree, $limit, $offset); 353 break; 354 355 default: 356 throw new HttpNotFoundException('Invalid record type: ' . $type); 357 } 358 359 // Skip private records. 360 $records = $records->filter(static function (GedcomRecord $record): bool { 361 return $record->canShow(Auth::PRIV_PRIVATE); 362 }); 363 364 return $records; 365 } 366 367 /** 368 * @param Tree $tree 369 * @param int $limit 370 * @param int $offset 371 * 372 * @return Collection<int,Family> 373 */ 374 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 375 { 376 return DB::table('families') 377 ->where('f_file', '=', $tree->id()) 378 ->orderBy('f_id') 379 ->skip($offset) 380 ->take($limit) 381 ->get() 382 ->map(Registry::familyFactory()->mapper($tree)); 383 } 384 385 /** 386 * @param Tree $tree 387 * @param int $limit 388 * @param int $offset 389 * 390 * @return Collection<int,Individual> 391 */ 392 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 393 { 394 return DB::table('individuals') 395 ->where('i_file', '=', $tree->id()) 396 ->orderBy('i_id') 397 ->skip($offset) 398 ->take($limit) 399 ->get() 400 ->map(Registry::individualFactory()->mapper($tree)); 401 } 402 403 /** 404 * @param Tree $tree 405 * @param int $limit 406 * @param int $offset 407 * 408 * @return Collection<int,Media> 409 */ 410 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 411 { 412 return DB::table('media') 413 ->where('m_file', '=', $tree->id()) 414 ->orderBy('m_id') 415 ->skip($offset) 416 ->take($limit) 417 ->get() 418 ->map(Registry::mediaFactory()->mapper($tree)); 419 } 420 421 /** 422 * @param Tree $tree 423 * @param int $limit 424 * @param int $offset 425 * 426 * @return Collection<int,Note> 427 */ 428 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 429 { 430 return DB::table('other') 431 ->where('o_file', '=', $tree->id()) 432 ->where('o_type', '=', Note::RECORD_TYPE) 433 ->orderBy('o_id') 434 ->skip($offset) 435 ->take($limit) 436 ->get() 437 ->map(Registry::noteFactory()->mapper($tree)); 438 } 439 440 /** 441 * @param Tree $tree 442 * @param int $limit 443 * @param int $offset 444 * 445 * @return Collection<int,Repository> 446 */ 447 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 448 { 449 return DB::table('other') 450 ->where('o_file', '=', $tree->id()) 451 ->where('o_type', '=', Repository::RECORD_TYPE) 452 ->orderBy('o_id') 453 ->skip($offset) 454 ->take($limit) 455 ->get() 456 ->map(Registry::repositoryFactory()->mapper($tree)); 457 } 458 459 /** 460 * @param Tree $tree 461 * @param int $limit 462 * @param int $offset 463 * 464 * @return Collection<int,Source> 465 */ 466 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 467 { 468 return DB::table('sources') 469 ->where('s_file', '=', $tree->id()) 470 ->orderBy('s_id') 471 ->skip($offset) 472 ->take($limit) 473 ->get() 474 ->map(Registry::sourceFactory()->mapper($tree)); 475 } 476 477 /** 478 * @param Tree $tree 479 * @param int $limit 480 * @param int $offset 481 * 482 * @return Collection<int,Submitter> 483 */ 484 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 485 { 486 return DB::table('other') 487 ->where('o_file', '=', $tree->id()) 488 ->where('o_type', '=', Submitter::RECORD_TYPE) 489 ->orderBy('o_id') 490 ->skip($offset) 491 ->take($limit) 492 ->get() 493 ->map(Registry::submitterFactory()->mapper($tree)); 494 } 495} 496