1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fig\Http\Message\StatusCodeInterface; 23use Fisharebest\Webtrees\Auth; 24use Fisharebest\Webtrees\DB; 25use Fisharebest\Webtrees\Family; 26use Fisharebest\Webtrees\FlashMessages; 27use Fisharebest\Webtrees\GedcomRecord; 28use Fisharebest\Webtrees\Html; 29use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 30use Fisharebest\Webtrees\I18N; 31use Fisharebest\Webtrees\Individual; 32use Fisharebest\Webtrees\Media; 33use Fisharebest\Webtrees\Note; 34use Fisharebest\Webtrees\Registry; 35use Fisharebest\Webtrees\Repository; 36use Fisharebest\Webtrees\Services\TreeService; 37use Fisharebest\Webtrees\Source; 38use Fisharebest\Webtrees\Submitter; 39use Fisharebest\Webtrees\Tree; 40use Fisharebest\Webtrees\Validator; 41use Illuminate\Database\Query\Expression; 42use Illuminate\Support\Collection; 43use Psr\Http\Message\ResponseInterface; 44use Psr\Http\Message\ServerRequestInterface; 45use Psr\Http\Server\RequestHandlerInterface; 46 47use function date; 48use function redirect; 49use function response; 50use function route; 51use function view; 52 53/** 54 * Class SiteMapModule 55 */ 56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 57{ 58 use ModuleConfigTrait; 59 60 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 61 private const CACHE_LIFE = 209600; // Two weeks 62 63 private const PRIORITY = [ 64 Family::RECORD_TYPE => 0.7, 65 Individual::RECORD_TYPE => 0.9, 66 Media::RECORD_TYPE => 0.5, 67 Note::RECORD_TYPE => 0.3, 68 Repository::RECORD_TYPE => 0.5, 69 Source::RECORD_TYPE => 0.5, 70 Submitter::RECORD_TYPE => 0.3, 71 ]; 72 73 private TreeService $tree_service; 74 75 /** 76 * @param TreeService $tree_service 77 */ 78 public function __construct(TreeService $tree_service) 79 { 80 $this->tree_service = $tree_service; 81 } 82 83 /** 84 * Initialization. 85 * 86 * @return void 87 */ 88 public function boot(): void 89 { 90 Registry::routeFactory()->routeMap() 91 ->get('sitemap-style', '/sitemap.xsl', $this); 92 93 Registry::routeFactory()->routeMap() 94 ->get('sitemap-index', '/sitemap.xml', $this); 95 96 Registry::routeFactory()->routeMap() 97 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 98 } 99 100 public function description(): string 101 { 102 /* I18N: Description of the “Sitemaps” module */ 103 return I18N::translate('Generate sitemap files for search engines.'); 104 } 105 106 /** 107 * Should this module be enabled when it is first installed? 108 * 109 * @return bool 110 */ 111 public function isEnabledByDefault(): bool 112 { 113 return false; 114 } 115 116 /** 117 * @param ServerRequestInterface $request 118 * 119 * @return ResponseInterface 120 */ 121 public function getAdminAction(ServerRequestInterface $request): ResponseInterface 122 { 123 $this->layout = 'layouts/administration'; 124 125 $sitemap_url = route('sitemap-index'); 126 127 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 128 $submit_urls = [ 129 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 130 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 131 ]; 132 133 return $this->viewResponse('modules/sitemap/config', [ 134 'all_trees' => $this->tree_service->all(), 135 'sitemap_url' => $sitemap_url, 136 'submit_urls' => $submit_urls, 137 'title' => $this->title(), 138 ]); 139 } 140 141 /** 142 * How should this module be identified in the control panel, etc.? 143 * 144 * @return string 145 */ 146 public function title(): string 147 { 148 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 149 return I18N::translate('Sitemaps'); 150 } 151 152 /** 153 * @param ServerRequestInterface $request 154 * 155 * @return ResponseInterface 156 */ 157 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 158 { 159 foreach ($this->tree_service->all() as $tree) { 160 $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false); 161 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 162 } 163 164 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 165 166 return redirect($this->getConfigLink()); 167 } 168 169 /** 170 * @param ServerRequestInterface $request 171 * 172 * @return ResponseInterface 173 */ 174 public function handle(ServerRequestInterface $request): ResponseInterface 175 { 176 $route = Validator::attributes($request)->route(); 177 178 if ($route->name === 'sitemap-style') { 179 $content = view('modules/sitemap/sitemap-xsl'); 180 181 return response($content, StatusCodeInterface::STATUS_OK, [ 182 'content-type' => 'application/xml', 183 ]); 184 } 185 186 if ($route->name === 'sitemap-index') { 187 return $this->siteMapIndex($request); 188 } 189 190 return $this->siteMapFile($request); 191 } 192 193 /** 194 * @param ServerRequestInterface $request 195 * 196 * @return ResponseInterface 197 */ 198 private function siteMapIndex(ServerRequestInterface $request): ResponseInterface 199 { 200 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 201 // Which trees have sitemaps enabled? 202 $tree_ids = $this->tree_service->all() 203 ->filter(static fn (Tree $tree): bool => $tree->getPreference('include_in_sitemap') === '1') 204 ->map(static fn (Tree $tree): int => $tree->id()); 205 206 $count_families = DB::table('families') 207 ->join('gedcom', 'f_file', '=', 'gedcom_id') 208 ->whereIn('gedcom_id', $tree_ids) 209 ->groupBy(['gedcom_id']) 210 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 211 212 $count_individuals = DB::table('individuals') 213 ->join('gedcom', 'i_file', '=', 'gedcom_id') 214 ->whereIn('gedcom_id', $tree_ids) 215 ->groupBy(['gedcom_id']) 216 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 217 218 $count_media = DB::table('media') 219 ->join('gedcom', 'm_file', '=', 'gedcom_id') 220 ->whereIn('gedcom_id', $tree_ids) 221 ->groupBy(['gedcom_id']) 222 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 223 224 $count_notes = DB::table('other') 225 ->join('gedcom', 'o_file', '=', 'gedcom_id') 226 ->whereIn('gedcom_id', $tree_ids) 227 ->where('o_type', '=', Note::RECORD_TYPE) 228 ->groupBy(['gedcom_id']) 229 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 230 231 $count_repositories = DB::table('other') 232 ->join('gedcom', 'o_file', '=', 'gedcom_id') 233 ->whereIn('gedcom_id', $tree_ids) 234 ->where('o_type', '=', Repository::RECORD_TYPE) 235 ->groupBy(['gedcom_id']) 236 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 237 238 $count_sources = DB::table('sources') 239 ->join('gedcom', 's_file', '=', 'gedcom_id') 240 ->whereIn('gedcom_id', $tree_ids) 241 ->groupBy(['gedcom_id']) 242 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 243 244 $count_submitters = DB::table('other') 245 ->join('gedcom', 'o_file', '=', 'gedcom_id') 246 ->whereIn('gedcom_id', $tree_ids) 247 ->where('o_type', '=', Submitter::RECORD_TYPE) 248 ->groupBy(['gedcom_id']) 249 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 250 251 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 252 DB::table('module_setting') 253 ->where('module_name', '=', $this->name()) 254 ->delete(); 255 256 return view('modules/sitemap/sitemap-index-xml', [ 257 'all_trees' => $this->tree_service->all(), 258 'count_families' => $count_families, 259 'count_individuals' => $count_individuals, 260 'count_media' => $count_media, 261 'count_notes' => $count_notes, 262 'count_repositories' => $count_repositories, 263 'count_sources' => $count_sources, 264 'count_submitters' => $count_submitters, 265 'last_mod' => date('Y-m-d'), 266 'records_per_volume' => self::RECORDS_PER_VOLUME, 267 'sitemap_xsl' => route('sitemap-style'), 268 ]); 269 }, self::CACHE_LIFE); 270 271 return response($content, StatusCodeInterface::STATUS_OK, [ 272 'content-type' => 'application/xml', 273 ]); 274 } 275 276 /** 277 * @param ServerRequestInterface $request 278 * 279 * @return ResponseInterface 280 */ 281 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 282 { 283 $tree = Validator::attributes($request)->tree('tree'); 284 $type = Validator::attributes($request)->string('type'); 285 $page = Validator::attributes($request)->integer('page'); 286 287 if ($tree->getPreference('include_in_sitemap') !== '1') { 288 throw new HttpNotFoundException(); 289 } 290 291 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 292 293 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 294 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 295 296 return view('modules/sitemap/sitemap-file-xml', [ 297 'priority' => self::PRIORITY[$type], 298 'records' => $records, 299 'sitemap_xsl' => route('sitemap-style'), 300 'tree' => $tree, 301 ]); 302 }, self::CACHE_LIFE); 303 304 return response($content, StatusCodeInterface::STATUS_OK, [ 305 'content-type' => 'application/xml', 306 ]); 307 } 308 309 /** 310 * @param Tree $tree 311 * @param string $type 312 * @param int $limit 313 * @param int $offset 314 * 315 * @return Collection<int,GedcomRecord> 316 */ 317 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 318 { 319 switch ($type) { 320 case Family::RECORD_TYPE: 321 $records = $this->sitemapFamilies($tree, $limit, $offset); 322 break; 323 324 case Individual::RECORD_TYPE: 325 $records = $this->sitemapIndividuals($tree, $limit, $offset); 326 break; 327 328 case Media::RECORD_TYPE: 329 $records = $this->sitemapMedia($tree, $limit, $offset); 330 break; 331 332 case Note::RECORD_TYPE: 333 $records = $this->sitemapNotes($tree, $limit, $offset); 334 break; 335 336 case Repository::RECORD_TYPE: 337 $records = $this->sitemapRepositories($tree, $limit, $offset); 338 break; 339 340 case Source::RECORD_TYPE: 341 $records = $this->sitemapSources($tree, $limit, $offset); 342 break; 343 344 case Submitter::RECORD_TYPE: 345 $records = $this->sitemapSubmitters($tree, $limit, $offset); 346 break; 347 348 default: 349 throw new HttpNotFoundException('Invalid record type: ' . $type); 350 } 351 352 // Skip private records. 353 $records = $records->filter(static fn (GedcomRecord $record): bool => $record->canShow(Auth::PRIV_PRIVATE)); 354 355 return $records; 356 } 357 358 /** 359 * @param Tree $tree 360 * @param int $limit 361 * @param int $offset 362 * 363 * @return Collection<int,Family> 364 */ 365 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 366 { 367 return DB::table('families') 368 ->where('f_file', '=', $tree->id()) 369 ->orderBy('f_id') 370 ->skip($offset) 371 ->take($limit) 372 ->get() 373 ->map(Registry::familyFactory()->mapper($tree)); 374 } 375 376 /** 377 * @param Tree $tree 378 * @param int $limit 379 * @param int $offset 380 * 381 * @return Collection<int,Individual> 382 */ 383 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 384 { 385 return DB::table('individuals') 386 ->where('i_file', '=', $tree->id()) 387 ->orderBy('i_id') 388 ->skip($offset) 389 ->take($limit) 390 ->get() 391 ->map(Registry::individualFactory()->mapper($tree)); 392 } 393 394 /** 395 * @param Tree $tree 396 * @param int $limit 397 * @param int $offset 398 * 399 * @return Collection<int,Media> 400 */ 401 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 402 { 403 return DB::table('media') 404 ->where('m_file', '=', $tree->id()) 405 ->orderBy('m_id') 406 ->skip($offset) 407 ->take($limit) 408 ->get() 409 ->map(Registry::mediaFactory()->mapper($tree)); 410 } 411 412 /** 413 * @param Tree $tree 414 * @param int $limit 415 * @param int $offset 416 * 417 * @return Collection<int,Note> 418 */ 419 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 420 { 421 return DB::table('other') 422 ->where('o_file', '=', $tree->id()) 423 ->where('o_type', '=', Note::RECORD_TYPE) 424 ->orderBy('o_id') 425 ->skip($offset) 426 ->take($limit) 427 ->get() 428 ->map(Registry::noteFactory()->mapper($tree)); 429 } 430 431 /** 432 * @param Tree $tree 433 * @param int $limit 434 * @param int $offset 435 * 436 * @return Collection<int,Repository> 437 */ 438 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 439 { 440 return DB::table('other') 441 ->where('o_file', '=', $tree->id()) 442 ->where('o_type', '=', Repository::RECORD_TYPE) 443 ->orderBy('o_id') 444 ->skip($offset) 445 ->take($limit) 446 ->get() 447 ->map(Registry::repositoryFactory()->mapper($tree)); 448 } 449 450 /** 451 * @param Tree $tree 452 * @param int $limit 453 * @param int $offset 454 * 455 * @return Collection<int,Source> 456 */ 457 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 458 { 459 return DB::table('sources') 460 ->where('s_file', '=', $tree->id()) 461 ->orderBy('s_id') 462 ->skip($offset) 463 ->take($limit) 464 ->get() 465 ->map(Registry::sourceFactory()->mapper($tree)); 466 } 467 468 /** 469 * @param Tree $tree 470 * @param int $limit 471 * @param int $offset 472 * 473 * @return Collection<int,Submitter> 474 */ 475 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 476 { 477 return DB::table('other') 478 ->where('o_file', '=', $tree->id()) 479 ->where('o_type', '=', Submitter::RECORD_TYPE) 480 ->orderBy('o_id') 481 ->skip($offset) 482 ->take($limit) 483 ->get() 484 ->map(Registry::submitterFactory()->mapper($tree)); 485 } 486} 487