1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fig\Http\Message\StatusCodeInterface; 23use Fisharebest\Webtrees\Auth; 24use Fisharebest\Webtrees\DB; 25use Fisharebest\Webtrees\Family; 26use Fisharebest\Webtrees\FlashMessages; 27use Fisharebest\Webtrees\GedcomRecord; 28use Fisharebest\Webtrees\Html; 29use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 30use Fisharebest\Webtrees\I18N; 31use Fisharebest\Webtrees\Individual; 32use Fisharebest\Webtrees\Media; 33use Fisharebest\Webtrees\Note; 34use Fisharebest\Webtrees\Registry; 35use Fisharebest\Webtrees\Repository; 36use Fisharebest\Webtrees\Services\TreeService; 37use Fisharebest\Webtrees\Source; 38use Fisharebest\Webtrees\Submitter; 39use Fisharebest\Webtrees\Tree; 40use Fisharebest\Webtrees\Validator; 41use Illuminate\Database\Query\Expression; 42use Illuminate\Support\Collection; 43use Psr\Http\Message\ResponseInterface; 44use Psr\Http\Message\ServerRequestInterface; 45use Psr\Http\Server\RequestHandlerInterface; 46 47use function date; 48use function redirect; 49use function response; 50use function route; 51use function view; 52 53/** 54 * Class SiteMapModule 55 */ 56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 57{ 58 use ModuleConfigTrait; 59 60 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 61 private const CACHE_LIFE = 209600; // Two weeks 62 63 private const PRIORITY = [ 64 Family::RECORD_TYPE => 0.7, 65 Individual::RECORD_TYPE => 0.9, 66 Media::RECORD_TYPE => 0.5, 67 Note::RECORD_TYPE => 0.3, 68 Repository::RECORD_TYPE => 0.5, 69 Source::RECORD_TYPE => 0.5, 70 Submitter::RECORD_TYPE => 0.3, 71 ]; 72 73 private TreeService $tree_service; 74 75 /** 76 * @param TreeService $tree_service 77 */ 78 public function __construct(TreeService $tree_service) 79 { 80 $this->tree_service = $tree_service; 81 } 82 83 /** 84 * Initialization. 85 * 86 * @return void 87 */ 88 public function boot(): void 89 { 90 Registry::routeFactory()->routeMap() 91 ->get('sitemap-style', '/sitemap.xsl', $this); 92 93 Registry::routeFactory()->routeMap() 94 ->get('sitemap-index', '/sitemap.xml', $this); 95 96 Registry::routeFactory()->routeMap() 97 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 98 } 99 100 /** 101 * A sentence describing what this module does. 102 * 103 * @return string 104 */ 105 public function description(): string 106 { 107 /* I18N: Description of the “Sitemaps” module */ 108 return I18N::translate('Generate sitemap files for search engines.'); 109 } 110 111 /** 112 * Should this module be enabled when it is first installed? 113 * 114 * @return bool 115 */ 116 public function isEnabledByDefault(): bool 117 { 118 return false; 119 } 120 121 /** 122 * @param ServerRequestInterface $request 123 * 124 * @return ResponseInterface 125 */ 126 public function getAdminAction(ServerRequestInterface $request): ResponseInterface 127 { 128 $this->layout = 'layouts/administration'; 129 130 $sitemap_url = route('sitemap-index'); 131 132 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 133 $submit_urls = [ 134 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 135 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 136 ]; 137 138 return $this->viewResponse('modules/sitemap/config', [ 139 'all_trees' => $this->tree_service->all(), 140 'sitemap_url' => $sitemap_url, 141 'submit_urls' => $submit_urls, 142 'title' => $this->title(), 143 ]); 144 } 145 146 /** 147 * How should this module be identified in the control panel, etc.? 148 * 149 * @return string 150 */ 151 public function title(): string 152 { 153 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 154 return I18N::translate('Sitemaps'); 155 } 156 157 /** 158 * @param ServerRequestInterface $request 159 * 160 * @return ResponseInterface 161 */ 162 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 163 { 164 foreach ($this->tree_service->all() as $tree) { 165 $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false); 166 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 167 } 168 169 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 170 171 return redirect($this->getConfigLink()); 172 } 173 174 /** 175 * @param ServerRequestInterface $request 176 * 177 * @return ResponseInterface 178 */ 179 public function handle(ServerRequestInterface $request): ResponseInterface 180 { 181 $route = Validator::attributes($request)->route(); 182 183 if ($route->name === 'sitemap-style') { 184 $content = view('modules/sitemap/sitemap-xsl'); 185 186 return response($content, StatusCodeInterface::STATUS_OK, [ 187 'content-type' => 'application/xml', 188 ]); 189 } 190 191 if ($route->name === 'sitemap-index') { 192 return $this->siteMapIndex($request); 193 } 194 195 return $this->siteMapFile($request); 196 } 197 198 /** 199 * @param ServerRequestInterface $request 200 * 201 * @return ResponseInterface 202 */ 203 private function siteMapIndex(ServerRequestInterface $request): ResponseInterface 204 { 205 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 206 // Which trees have sitemaps enabled? 207 $tree_ids = $this->tree_service->all() 208 ->filter(static fn (Tree $tree): bool => $tree->getPreference('include_in_sitemap') === '1') 209 ->map(static fn (Tree $tree): int => $tree->id()); 210 211 $count_families = DB::table('families') 212 ->join('gedcom', 'f_file', '=', 'gedcom_id') 213 ->whereIn('gedcom_id', $tree_ids) 214 ->groupBy(['gedcom_id']) 215 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 216 217 $count_individuals = DB::table('individuals') 218 ->join('gedcom', 'i_file', '=', 'gedcom_id') 219 ->whereIn('gedcom_id', $tree_ids) 220 ->groupBy(['gedcom_id']) 221 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 222 223 $count_media = DB::table('media') 224 ->join('gedcom', 'm_file', '=', 'gedcom_id') 225 ->whereIn('gedcom_id', $tree_ids) 226 ->groupBy(['gedcom_id']) 227 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 228 229 $count_notes = DB::table('other') 230 ->join('gedcom', 'o_file', '=', 'gedcom_id') 231 ->whereIn('gedcom_id', $tree_ids) 232 ->where('o_type', '=', Note::RECORD_TYPE) 233 ->groupBy(['gedcom_id']) 234 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 235 236 $count_repositories = DB::table('other') 237 ->join('gedcom', 'o_file', '=', 'gedcom_id') 238 ->whereIn('gedcom_id', $tree_ids) 239 ->where('o_type', '=', Repository::RECORD_TYPE) 240 ->groupBy(['gedcom_id']) 241 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 242 243 $count_sources = DB::table('sources') 244 ->join('gedcom', 's_file', '=', 'gedcom_id') 245 ->whereIn('gedcom_id', $tree_ids) 246 ->groupBy(['gedcom_id']) 247 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 248 249 $count_submitters = DB::table('other') 250 ->join('gedcom', 'o_file', '=', 'gedcom_id') 251 ->whereIn('gedcom_id', $tree_ids) 252 ->where('o_type', '=', Submitter::RECORD_TYPE) 253 ->groupBy(['gedcom_id']) 254 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 255 256 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 257 DB::table('module_setting') 258 ->where('module_name', '=', $this->name()) 259 ->delete(); 260 261 return view('modules/sitemap/sitemap-index-xml', [ 262 'all_trees' => $this->tree_service->all(), 263 'count_families' => $count_families, 264 'count_individuals' => $count_individuals, 265 'count_media' => $count_media, 266 'count_notes' => $count_notes, 267 'count_repositories' => $count_repositories, 268 'count_sources' => $count_sources, 269 'count_submitters' => $count_submitters, 270 'last_mod' => date('Y-m-d'), 271 'records_per_volume' => self::RECORDS_PER_VOLUME, 272 'sitemap_xsl' => route('sitemap-style'), 273 ]); 274 }, self::CACHE_LIFE); 275 276 return response($content, StatusCodeInterface::STATUS_OK, [ 277 'content-type' => 'application/xml', 278 ]); 279 } 280 281 /** 282 * @param ServerRequestInterface $request 283 * 284 * @return ResponseInterface 285 */ 286 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 287 { 288 $tree = Validator::attributes($request)->tree('tree'); 289 $type = Validator::attributes($request)->string('type'); 290 $page = Validator::attributes($request)->integer('page'); 291 292 if ($tree->getPreference('include_in_sitemap') !== '1') { 293 throw new HttpNotFoundException(); 294 } 295 296 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 297 298 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 299 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 300 301 return view('modules/sitemap/sitemap-file-xml', [ 302 'priority' => self::PRIORITY[$type], 303 'records' => $records, 304 'sitemap_xsl' => route('sitemap-style'), 305 'tree' => $tree, 306 ]); 307 }, self::CACHE_LIFE); 308 309 return response($content, StatusCodeInterface::STATUS_OK, [ 310 'content-type' => 'application/xml', 311 ]); 312 } 313 314 /** 315 * @param Tree $tree 316 * @param string $type 317 * @param int $limit 318 * @param int $offset 319 * 320 * @return Collection<int,GedcomRecord> 321 */ 322 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 323 { 324 switch ($type) { 325 case Family::RECORD_TYPE: 326 $records = $this->sitemapFamilies($tree, $limit, $offset); 327 break; 328 329 case Individual::RECORD_TYPE: 330 $records = $this->sitemapIndividuals($tree, $limit, $offset); 331 break; 332 333 case Media::RECORD_TYPE: 334 $records = $this->sitemapMedia($tree, $limit, $offset); 335 break; 336 337 case Note::RECORD_TYPE: 338 $records = $this->sitemapNotes($tree, $limit, $offset); 339 break; 340 341 case Repository::RECORD_TYPE: 342 $records = $this->sitemapRepositories($tree, $limit, $offset); 343 break; 344 345 case Source::RECORD_TYPE: 346 $records = $this->sitemapSources($tree, $limit, $offset); 347 break; 348 349 case Submitter::RECORD_TYPE: 350 $records = $this->sitemapSubmitters($tree, $limit, $offset); 351 break; 352 353 default: 354 throw new HttpNotFoundException('Invalid record type: ' . $type); 355 } 356 357 // Skip private records. 358 $records = $records->filter(static fn (GedcomRecord $record): bool => $record->canShow(Auth::PRIV_PRIVATE)); 359 360 return $records; 361 } 362 363 /** 364 * @param Tree $tree 365 * @param int $limit 366 * @param int $offset 367 * 368 * @return Collection<int,Family> 369 */ 370 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 371 { 372 return DB::table('families') 373 ->where('f_file', '=', $tree->id()) 374 ->orderBy('f_id') 375 ->skip($offset) 376 ->take($limit) 377 ->get() 378 ->map(Registry::familyFactory()->mapper($tree)); 379 } 380 381 /** 382 * @param Tree $tree 383 * @param int $limit 384 * @param int $offset 385 * 386 * @return Collection<int,Individual> 387 */ 388 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 389 { 390 return DB::table('individuals') 391 ->where('i_file', '=', $tree->id()) 392 ->orderBy('i_id') 393 ->skip($offset) 394 ->take($limit) 395 ->get() 396 ->map(Registry::individualFactory()->mapper($tree)); 397 } 398 399 /** 400 * @param Tree $tree 401 * @param int $limit 402 * @param int $offset 403 * 404 * @return Collection<int,Media> 405 */ 406 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 407 { 408 return DB::table('media') 409 ->where('m_file', '=', $tree->id()) 410 ->orderBy('m_id') 411 ->skip($offset) 412 ->take($limit) 413 ->get() 414 ->map(Registry::mediaFactory()->mapper($tree)); 415 } 416 417 /** 418 * @param Tree $tree 419 * @param int $limit 420 * @param int $offset 421 * 422 * @return Collection<int,Note> 423 */ 424 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 425 { 426 return DB::table('other') 427 ->where('o_file', '=', $tree->id()) 428 ->where('o_type', '=', Note::RECORD_TYPE) 429 ->orderBy('o_id') 430 ->skip($offset) 431 ->take($limit) 432 ->get() 433 ->map(Registry::noteFactory()->mapper($tree)); 434 } 435 436 /** 437 * @param Tree $tree 438 * @param int $limit 439 * @param int $offset 440 * 441 * @return Collection<int,Repository> 442 */ 443 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 444 { 445 return DB::table('other') 446 ->where('o_file', '=', $tree->id()) 447 ->where('o_type', '=', Repository::RECORD_TYPE) 448 ->orderBy('o_id') 449 ->skip($offset) 450 ->take($limit) 451 ->get() 452 ->map(Registry::repositoryFactory()->mapper($tree)); 453 } 454 455 /** 456 * @param Tree $tree 457 * @param int $limit 458 * @param int $offset 459 * 460 * @return Collection<int,Source> 461 */ 462 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 463 { 464 return DB::table('sources') 465 ->where('s_file', '=', $tree->id()) 466 ->orderBy('s_id') 467 ->skip($offset) 468 ->take($limit) 469 ->get() 470 ->map(Registry::sourceFactory()->mapper($tree)); 471 } 472 473 /** 474 * @param Tree $tree 475 * @param int $limit 476 * @param int $offset 477 * 478 * @return Collection<int,Submitter> 479 */ 480 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 481 { 482 return DB::table('other') 483 ->where('o_file', '=', $tree->id()) 484 ->where('o_type', '=', Submitter::RECORD_TYPE) 485 ->orderBy('o_id') 486 ->skip($offset) 487 ->take($limit) 488 ->get() 489 ->map(Registry::submitterFactory()->mapper($tree)); 490 } 491} 492