1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fig\Http\Message\StatusCodeInterface; 23use Fisharebest\Webtrees\Auth; 24use Fisharebest\Webtrees\DB; 25use Fisharebest\Webtrees\Family; 26use Fisharebest\Webtrees\FlashMessages; 27use Fisharebest\Webtrees\GedcomRecord; 28use Fisharebest\Webtrees\Html; 29use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 30use Fisharebest\Webtrees\I18N; 31use Fisharebest\Webtrees\Individual; 32use Fisharebest\Webtrees\Media; 33use Fisharebest\Webtrees\Note; 34use Fisharebest\Webtrees\Registry; 35use Fisharebest\Webtrees\Repository; 36use Fisharebest\Webtrees\Services\TreeService; 37use Fisharebest\Webtrees\Source; 38use Fisharebest\Webtrees\Submitter; 39use Fisharebest\Webtrees\Tree; 40use Fisharebest\Webtrees\Validator; 41use Illuminate\Database\Query\Expression; 42use Illuminate\Support\Collection; 43use Psr\Http\Message\ResponseInterface; 44use Psr\Http\Message\ServerRequestInterface; 45use Psr\Http\Server\RequestHandlerInterface; 46 47use function date; 48use function redirect; 49use function response; 50use function route; 51use function view; 52 53/** 54 * Class SiteMapModule 55 */ 56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 57{ 58 use ModuleConfigTrait; 59 60 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 61 private const CACHE_LIFE = 209600; // Two weeks 62 63 private const PRIORITY = [ 64 Family::RECORD_TYPE => 0.7, 65 Individual::RECORD_TYPE => 0.9, 66 Media::RECORD_TYPE => 0.5, 67 Note::RECORD_TYPE => 0.3, 68 Repository::RECORD_TYPE => 0.5, 69 Source::RECORD_TYPE => 0.5, 70 Submitter::RECORD_TYPE => 0.3, 71 ]; 72 73 private TreeService $tree_service; 74 75 /** 76 * @param TreeService $tree_service 77 */ 78 public function __construct(TreeService $tree_service) 79 { 80 $this->tree_service = $tree_service; 81 } 82 83 /** 84 * Initialization. 85 * 86 * @return void 87 */ 88 public function boot(): void 89 { 90 Registry::routeFactory()->routeMap() 91 ->get('sitemap-style', '/sitemap.xsl', $this); 92 93 Registry::routeFactory()->routeMap() 94 ->get('sitemap-index', '/sitemap.xml', $this); 95 96 Registry::routeFactory()->routeMap() 97 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 98 } 99 100 public function description(): string 101 { 102 /* I18N: Description of the “Sitemaps” module */ 103 return I18N::translate('Generate sitemap files for search engines.'); 104 } 105 106 /** 107 * Should this module be enabled when it is first installed? 108 * 109 * @return bool 110 */ 111 public function isEnabledByDefault(): bool 112 { 113 return false; 114 } 115 116 /** 117 * @param ServerRequestInterface $request 118 * 119 * @return ResponseInterface 120 */ 121 public function getAdminAction(ServerRequestInterface $request): ResponseInterface 122 { 123 $this->layout = 'layouts/administration'; 124 125 $sitemap_url = route('sitemap-index'); 126 127 return $this->viewResponse('modules/sitemap/config', [ 128 'all_trees' => $this->tree_service->all(), 129 'sitemap_url' => $sitemap_url, 130 'title' => $this->title(), 131 ]); 132 } 133 134 /** 135 * How should this module be identified in the control panel, etc.? 136 * 137 * @return string 138 */ 139 public function title(): string 140 { 141 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 142 return I18N::translate('Sitemaps'); 143 } 144 145 /** 146 * @param ServerRequestInterface $request 147 * 148 * @return ResponseInterface 149 */ 150 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 151 { 152 foreach ($this->tree_service->all() as $tree) { 153 $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false); 154 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 155 } 156 157 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 158 159 return redirect($this->getConfigLink()); 160 } 161 162 /** 163 * @param ServerRequestInterface $request 164 * 165 * @return ResponseInterface 166 */ 167 public function handle(ServerRequestInterface $request): ResponseInterface 168 { 169 $route = Validator::attributes($request)->route(); 170 171 if ($route->name === 'sitemap-style') { 172 $content = view('modules/sitemap/sitemap-xsl'); 173 174 return response($content, StatusCodeInterface::STATUS_OK, [ 175 'content-type' => 'application/xml', 176 ]); 177 } 178 179 if ($route->name === 'sitemap-index') { 180 return $this->siteMapIndex($request); 181 } 182 183 return $this->siteMapFile($request); 184 } 185 186 /** 187 * @param ServerRequestInterface $request 188 * 189 * @return ResponseInterface 190 */ 191 private function siteMapIndex(ServerRequestInterface $request): ResponseInterface 192 { 193 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 194 // Which trees have sitemaps enabled? 195 $tree_ids = $this->tree_service->all() 196 ->filter(static fn (Tree $tree): bool => $tree->getPreference('include_in_sitemap') === '1') 197 ->map(static fn (Tree $tree): int => $tree->id()); 198 199 $count_families = DB::table('families') 200 ->join('gedcom', 'f_file', '=', 'gedcom_id') 201 ->whereIn('gedcom_id', $tree_ids) 202 ->groupBy(['gedcom_id']) 203 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 204 205 $count_individuals = DB::table('individuals') 206 ->join('gedcom', 'i_file', '=', 'gedcom_id') 207 ->whereIn('gedcom_id', $tree_ids) 208 ->groupBy(['gedcom_id']) 209 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 210 211 $count_media = DB::table('media') 212 ->join('gedcom', 'm_file', '=', 'gedcom_id') 213 ->whereIn('gedcom_id', $tree_ids) 214 ->groupBy(['gedcom_id']) 215 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 216 217 $count_notes = DB::table('other') 218 ->join('gedcom', 'o_file', '=', 'gedcom_id') 219 ->whereIn('gedcom_id', $tree_ids) 220 ->where('o_type', '=', Note::RECORD_TYPE) 221 ->groupBy(['gedcom_id']) 222 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 223 224 $count_repositories = DB::table('other') 225 ->join('gedcom', 'o_file', '=', 'gedcom_id') 226 ->whereIn('gedcom_id', $tree_ids) 227 ->where('o_type', '=', Repository::RECORD_TYPE) 228 ->groupBy(['gedcom_id']) 229 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 230 231 $count_sources = DB::table('sources') 232 ->join('gedcom', 's_file', '=', 'gedcom_id') 233 ->whereIn('gedcom_id', $tree_ids) 234 ->groupBy(['gedcom_id']) 235 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 236 237 $count_submitters = DB::table('other') 238 ->join('gedcom', 'o_file', '=', 'gedcom_id') 239 ->whereIn('gedcom_id', $tree_ids) 240 ->where('o_type', '=', Submitter::RECORD_TYPE) 241 ->groupBy(['gedcom_id']) 242 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 243 244 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 245 DB::table('module_setting') 246 ->where('module_name', '=', $this->name()) 247 ->delete(); 248 249 return view('modules/sitemap/sitemap-index-xml', [ 250 'all_trees' => $this->tree_service->all(), 251 'count_families' => $count_families, 252 'count_individuals' => $count_individuals, 253 'count_media' => $count_media, 254 'count_notes' => $count_notes, 255 'count_repositories' => $count_repositories, 256 'count_sources' => $count_sources, 257 'count_submitters' => $count_submitters, 258 'last_mod' => date('Y-m-d'), 259 'records_per_volume' => self::RECORDS_PER_VOLUME, 260 'sitemap_xsl' => route('sitemap-style'), 261 ]); 262 }, self::CACHE_LIFE); 263 264 return response($content, StatusCodeInterface::STATUS_OK, [ 265 'content-type' => 'application/xml', 266 ]); 267 } 268 269 /** 270 * @param ServerRequestInterface $request 271 * 272 * @return ResponseInterface 273 */ 274 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 275 { 276 $tree = Validator::attributes($request)->tree('tree'); 277 $type = Validator::attributes($request)->string('type'); 278 $page = Validator::attributes($request)->integer('page'); 279 280 if ($tree->getPreference('include_in_sitemap') !== '1') { 281 throw new HttpNotFoundException(); 282 } 283 284 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 285 286 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 287 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 288 289 return view('modules/sitemap/sitemap-file-xml', [ 290 'priority' => self::PRIORITY[$type], 291 'records' => $records, 292 'sitemap_xsl' => route('sitemap-style'), 293 'tree' => $tree, 294 ]); 295 }, self::CACHE_LIFE); 296 297 return response($content, StatusCodeInterface::STATUS_OK, [ 298 'content-type' => 'application/xml', 299 ]); 300 } 301 302 /** 303 * @param Tree $tree 304 * @param string $type 305 * @param int $limit 306 * @param int $offset 307 * 308 * @return Collection<int,GedcomRecord> 309 */ 310 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 311 { 312 switch ($type) { 313 case Family::RECORD_TYPE: 314 $records = $this->sitemapFamilies($tree, $limit, $offset); 315 break; 316 317 case Individual::RECORD_TYPE: 318 $records = $this->sitemapIndividuals($tree, $limit, $offset); 319 break; 320 321 case Media::RECORD_TYPE: 322 $records = $this->sitemapMedia($tree, $limit, $offset); 323 break; 324 325 case Note::RECORD_TYPE: 326 $records = $this->sitemapNotes($tree, $limit, $offset); 327 break; 328 329 case Repository::RECORD_TYPE: 330 $records = $this->sitemapRepositories($tree, $limit, $offset); 331 break; 332 333 case Source::RECORD_TYPE: 334 $records = $this->sitemapSources($tree, $limit, $offset); 335 break; 336 337 case Submitter::RECORD_TYPE: 338 $records = $this->sitemapSubmitters($tree, $limit, $offset); 339 break; 340 341 default: 342 throw new HttpNotFoundException('Invalid record type: ' . $type); 343 } 344 345 // Skip private records. 346 $records = $records->filter(static fn (GedcomRecord $record): bool => $record->canShow(Auth::PRIV_PRIVATE)); 347 348 return $records; 349 } 350 351 /** 352 * @param Tree $tree 353 * @param int $limit 354 * @param int $offset 355 * 356 * @return Collection<int,Family> 357 */ 358 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 359 { 360 return DB::table('families') 361 ->where('f_file', '=', $tree->id()) 362 ->orderBy('f_id') 363 ->skip($offset) 364 ->take($limit) 365 ->get() 366 ->map(Registry::familyFactory()->mapper($tree)); 367 } 368 369 /** 370 * @param Tree $tree 371 * @param int $limit 372 * @param int $offset 373 * 374 * @return Collection<int,Individual> 375 */ 376 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 377 { 378 return DB::table('individuals') 379 ->where('i_file', '=', $tree->id()) 380 ->orderBy('i_id') 381 ->skip($offset) 382 ->take($limit) 383 ->get() 384 ->map(Registry::individualFactory()->mapper($tree)); 385 } 386 387 /** 388 * @param Tree $tree 389 * @param int $limit 390 * @param int $offset 391 * 392 * @return Collection<int,Media> 393 */ 394 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 395 { 396 return DB::table('media') 397 ->where('m_file', '=', $tree->id()) 398 ->orderBy('m_id') 399 ->skip($offset) 400 ->take($limit) 401 ->get() 402 ->map(Registry::mediaFactory()->mapper($tree)); 403 } 404 405 /** 406 * @param Tree $tree 407 * @param int $limit 408 * @param int $offset 409 * 410 * @return Collection<int,Note> 411 */ 412 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 413 { 414 return DB::table('other') 415 ->where('o_file', '=', $tree->id()) 416 ->where('o_type', '=', Note::RECORD_TYPE) 417 ->orderBy('o_id') 418 ->skip($offset) 419 ->take($limit) 420 ->get() 421 ->map(Registry::noteFactory()->mapper($tree)); 422 } 423 424 /** 425 * @param Tree $tree 426 * @param int $limit 427 * @param int $offset 428 * 429 * @return Collection<int,Repository> 430 */ 431 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 432 { 433 return DB::table('other') 434 ->where('o_file', '=', $tree->id()) 435 ->where('o_type', '=', Repository::RECORD_TYPE) 436 ->orderBy('o_id') 437 ->skip($offset) 438 ->take($limit) 439 ->get() 440 ->map(Registry::repositoryFactory()->mapper($tree)); 441 } 442 443 /** 444 * @param Tree $tree 445 * @param int $limit 446 * @param int $offset 447 * 448 * @return Collection<int,Source> 449 */ 450 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 451 { 452 return DB::table('sources') 453 ->where('s_file', '=', $tree->id()) 454 ->orderBy('s_id') 455 ->skip($offset) 456 ->take($limit) 457 ->get() 458 ->map(Registry::sourceFactory()->mapper($tree)); 459 } 460 461 /** 462 * @param Tree $tree 463 * @param int $limit 464 * @param int $offset 465 * 466 * @return Collection<int,Submitter> 467 */ 468 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 469 { 470 return DB::table('other') 471 ->where('o_file', '=', $tree->id()) 472 ->where('o_type', '=', Submitter::RECORD_TYPE) 473 ->orderBy('o_id') 474 ->skip($offset) 475 ->take($limit) 476 ->get() 477 ->map(Registry::submitterFactory()->mapper($tree)); 478 } 479} 480