1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Module; 21 22use Fig\Http\Message\StatusCodeInterface; 23use Fisharebest\Webtrees\Auth; 24use Fisharebest\Webtrees\Family; 25use Fisharebest\Webtrees\FlashMessages; 26use Fisharebest\Webtrees\GedcomRecord; 27use Fisharebest\Webtrees\Html; 28use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException; 29use Fisharebest\Webtrees\I18N; 30use Fisharebest\Webtrees\Individual; 31use Fisharebest\Webtrees\Media; 32use Fisharebest\Webtrees\Note; 33use Fisharebest\Webtrees\Registry; 34use Fisharebest\Webtrees\Repository; 35use Fisharebest\Webtrees\Services\TreeService; 36use Fisharebest\Webtrees\Source; 37use Fisharebest\Webtrees\Submitter; 38use Fisharebest\Webtrees\Tree; 39use Fisharebest\Webtrees\Validator; 40use Illuminate\Database\Capsule\Manager as DB; 41use Illuminate\Database\Query\Expression; 42use Illuminate\Support\Collection; 43use Psr\Http\Message\ResponseInterface; 44use Psr\Http\Message\ServerRequestInterface; 45use Psr\Http\Server\RequestHandlerInterface; 46 47use function date; 48use function redirect; 49use function response; 50use function route; 51use function view; 52 53/** 54 * Class SiteMapModule 55 */ 56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface 57{ 58 use ModuleConfigTrait; 59 60 private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 61 private const CACHE_LIFE = 209600; // Two weeks 62 63 private const PRIORITY = [ 64 Family::RECORD_TYPE => 0.7, 65 Individual::RECORD_TYPE => 0.9, 66 Media::RECORD_TYPE => 0.5, 67 Note::RECORD_TYPE => 0.3, 68 Repository::RECORD_TYPE => 0.5, 69 Source::RECORD_TYPE => 0.5, 70 Submitter::RECORD_TYPE => 0.3, 71 ]; 72 73 private TreeService $tree_service; 74 75 /** 76 * TreesMenuModule constructor. 77 * 78 * @param TreeService $tree_service 79 */ 80 public function __construct(TreeService $tree_service) 81 { 82 $this->tree_service = $tree_service; 83 } 84 85 /** 86 * Initialization. 87 * 88 * @return void 89 */ 90 public function boot(): void 91 { 92 Registry::routeFactory()->routeMap() 93 ->get('sitemap-style', '/sitemap.xsl', $this); 94 95 Registry::routeFactory()->routeMap() 96 ->get('sitemap-index', '/sitemap.xml', $this); 97 98 Registry::routeFactory()->routeMap() 99 ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this); 100 } 101 102 /** 103 * A sentence describing what this module does. 104 * 105 * @return string 106 */ 107 public function description(): string 108 { 109 /* I18N: Description of the “Sitemaps” module */ 110 return I18N::translate('Generate sitemap files for search engines.'); 111 } 112 113 /** 114 * Should this module be enabled when it is first installed? 115 * 116 * @return bool 117 */ 118 public function isEnabledByDefault(): bool 119 { 120 return false; 121 } 122 123 /** 124 * @param ServerRequestInterface $request 125 * 126 * @return ResponseInterface 127 */ 128 public function getAdminAction(ServerRequestInterface $request): ResponseInterface 129 { 130 $this->layout = 'layouts/administration'; 131 132 $sitemap_url = route('sitemap-index'); 133 134 // This list comes from https://en.wikipedia.org/wiki/Sitemaps 135 $submit_urls = [ 136 'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]), 137 'Google' => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]), 138 ]; 139 140 return $this->viewResponse('modules/sitemap/config', [ 141 'all_trees' => $this->tree_service->all(), 142 'sitemap_url' => $sitemap_url, 143 'submit_urls' => $submit_urls, 144 'title' => $this->title(), 145 ]); 146 } 147 148 /** 149 * How should this module be identified in the control panel, etc.? 150 * 151 * @return string 152 */ 153 public function title(): string 154 { 155 /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */ 156 return I18N::translate('Sitemaps'); 157 } 158 159 /** 160 * @param ServerRequestInterface $request 161 * 162 * @return ResponseInterface 163 */ 164 public function postAdminAction(ServerRequestInterface $request): ResponseInterface 165 { 166 foreach ($this->tree_service->all() as $tree) { 167 $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false); 168 $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap); 169 } 170 171 FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success'); 172 173 return redirect($this->getConfigLink()); 174 } 175 176 /** 177 * @param ServerRequestInterface $request 178 * 179 * @return ResponseInterface 180 */ 181 public function handle(ServerRequestInterface $request): ResponseInterface 182 { 183 $route = Validator::attributes($request)->route(); 184 185 if ($route->name === 'sitemap-style') { 186 $content = view('modules/sitemap/sitemap-xsl'); 187 188 return response($content, StatusCodeInterface::STATUS_OK, [ 189 'content-type' => 'application/xml', 190 ]); 191 } 192 193 if ($route->name === 'sitemap-index') { 194 return $this->siteMapIndex($request); 195 } 196 197 return $this->siteMapFile($request); 198 } 199 200 /** 201 * @param ServerRequestInterface $request 202 * 203 * @return ResponseInterface 204 */ 205 private function siteMapIndex(ServerRequestInterface $request): ResponseInterface 206 { 207 $content = Registry::cache()->file()->remember('sitemap.xml', function (): string { 208 // Which trees have sitemaps enabled? 209 $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool { 210 return $tree->getPreference('include_in_sitemap') === '1'; 211 })->map(static function (Tree $tree): int { 212 return $tree->id(); 213 }); 214 215 $count_families = DB::table('families') 216 ->join('gedcom', 'f_file', '=', 'gedcom_id') 217 ->whereIn('gedcom_id', $tree_ids) 218 ->groupBy(['gedcom_id']) 219 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 220 221 $count_individuals = DB::table('individuals') 222 ->join('gedcom', 'i_file', '=', 'gedcom_id') 223 ->whereIn('gedcom_id', $tree_ids) 224 ->groupBy(['gedcom_id']) 225 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 226 227 $count_media = DB::table('media') 228 ->join('gedcom', 'm_file', '=', 'gedcom_id') 229 ->whereIn('gedcom_id', $tree_ids) 230 ->groupBy(['gedcom_id']) 231 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 232 233 $count_notes = DB::table('other') 234 ->join('gedcom', 'o_file', '=', 'gedcom_id') 235 ->whereIn('gedcom_id', $tree_ids) 236 ->where('o_type', '=', Note::RECORD_TYPE) 237 ->groupBy(['gedcom_id']) 238 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 239 240 $count_repositories = DB::table('other') 241 ->join('gedcom', 'o_file', '=', 'gedcom_id') 242 ->whereIn('gedcom_id', $tree_ids) 243 ->where('o_type', '=', Repository::RECORD_TYPE) 244 ->groupBy(['gedcom_id']) 245 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 246 247 $count_sources = DB::table('sources') 248 ->join('gedcom', 's_file', '=', 'gedcom_id') 249 ->whereIn('gedcom_id', $tree_ids) 250 ->groupBy(['gedcom_id']) 251 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 252 253 $count_submitters = DB::table('other') 254 ->join('gedcom', 'o_file', '=', 'gedcom_id') 255 ->whereIn('gedcom_id', $tree_ids) 256 ->where('o_type', '=', Submitter::RECORD_TYPE) 257 ->groupBy(['gedcom_id']) 258 ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name'); 259 260 // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings. 261 DB::table('module_setting') 262 ->where('module_name', '=', $this->name()) 263 ->delete(); 264 265 return view('modules/sitemap/sitemap-index-xml', [ 266 'all_trees' => $this->tree_service->all(), 267 'count_families' => $count_families, 268 'count_individuals' => $count_individuals, 269 'count_media' => $count_media, 270 'count_notes' => $count_notes, 271 'count_repositories' => $count_repositories, 272 'count_sources' => $count_sources, 273 'count_submitters' => $count_submitters, 274 'last_mod' => date('Y-m-d'), 275 'records_per_volume' => self::RECORDS_PER_VOLUME, 276 'sitemap_xsl' => route('sitemap-style'), 277 ]); 278 }, self::CACHE_LIFE); 279 280 return response($content, StatusCodeInterface::STATUS_OK, [ 281 'content-type' => 'application/xml', 282 ]); 283 } 284 285 /** 286 * @param ServerRequestInterface $request 287 * 288 * @return ResponseInterface 289 */ 290 private function siteMapFile(ServerRequestInterface $request): ResponseInterface 291 { 292 $tree = Validator::attributes($request)->tree('tree'); 293 $type = Validator::attributes($request)->string('type'); 294 $page = Validator::attributes($request)->integer('page'); 295 296 if ($tree->getPreference('include_in_sitemap') !== '1') { 297 throw new HttpNotFoundException(); 298 } 299 300 $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml'; 301 302 $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string { 303 $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page); 304 305 return view('modules/sitemap/sitemap-file-xml', [ 306 'priority' => self::PRIORITY[$type], 307 'records' => $records, 308 'sitemap_xsl' => route('sitemap-style'), 309 'tree' => $tree, 310 ]); 311 }, self::CACHE_LIFE); 312 313 return response($content, StatusCodeInterface::STATUS_OK, [ 314 'content-type' => 'application/xml', 315 ]); 316 } 317 318 /** 319 * @param Tree $tree 320 * @param string $type 321 * @param int $limit 322 * @param int $offset 323 * 324 * @return Collection<int,GedcomRecord> 325 */ 326 private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection 327 { 328 switch ($type) { 329 case Family::RECORD_TYPE: 330 $records = $this->sitemapFamilies($tree, $limit, $offset); 331 break; 332 333 case Individual::RECORD_TYPE: 334 $records = $this->sitemapIndividuals($tree, $limit, $offset); 335 break; 336 337 case Media::RECORD_TYPE: 338 $records = $this->sitemapMedia($tree, $limit, $offset); 339 break; 340 341 case Note::RECORD_TYPE: 342 $records = $this->sitemapNotes($tree, $limit, $offset); 343 break; 344 345 case Repository::RECORD_TYPE: 346 $records = $this->sitemapRepositories($tree, $limit, $offset); 347 break; 348 349 case Source::RECORD_TYPE: 350 $records = $this->sitemapSources($tree, $limit, $offset); 351 break; 352 353 case Submitter::RECORD_TYPE: 354 $records = $this->sitemapSubmitters($tree, $limit, $offset); 355 break; 356 357 default: 358 throw new HttpNotFoundException('Invalid record type: ' . $type); 359 } 360 361 // Skip private records. 362 $records = $records->filter(static function (GedcomRecord $record): bool { 363 return $record->canShow(Auth::PRIV_PRIVATE); 364 }); 365 366 return $records; 367 } 368 369 /** 370 * @param Tree $tree 371 * @param int $limit 372 * @param int $offset 373 * 374 * @return Collection<int,Family> 375 */ 376 private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection 377 { 378 return DB::table('families') 379 ->where('f_file', '=', $tree->id()) 380 ->orderBy('f_id') 381 ->skip($offset) 382 ->take($limit) 383 ->get() 384 ->map(Registry::familyFactory()->mapper($tree)); 385 } 386 387 /** 388 * @param Tree $tree 389 * @param int $limit 390 * @param int $offset 391 * 392 * @return Collection<int,Individual> 393 */ 394 private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection 395 { 396 return DB::table('individuals') 397 ->where('i_file', '=', $tree->id()) 398 ->orderBy('i_id') 399 ->skip($offset) 400 ->take($limit) 401 ->get() 402 ->map(Registry::individualFactory()->mapper($tree)); 403 } 404 405 /** 406 * @param Tree $tree 407 * @param int $limit 408 * @param int $offset 409 * 410 * @return Collection<int,Media> 411 */ 412 private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection 413 { 414 return DB::table('media') 415 ->where('m_file', '=', $tree->id()) 416 ->orderBy('m_id') 417 ->skip($offset) 418 ->take($limit) 419 ->get() 420 ->map(Registry::mediaFactory()->mapper($tree)); 421 } 422 423 /** 424 * @param Tree $tree 425 * @param int $limit 426 * @param int $offset 427 * 428 * @return Collection<int,Note> 429 */ 430 private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection 431 { 432 return DB::table('other') 433 ->where('o_file', '=', $tree->id()) 434 ->where('o_type', '=', Note::RECORD_TYPE) 435 ->orderBy('o_id') 436 ->skip($offset) 437 ->take($limit) 438 ->get() 439 ->map(Registry::noteFactory()->mapper($tree)); 440 } 441 442 /** 443 * @param Tree $tree 444 * @param int $limit 445 * @param int $offset 446 * 447 * @return Collection<int,Repository> 448 */ 449 private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection 450 { 451 return DB::table('other') 452 ->where('o_file', '=', $tree->id()) 453 ->where('o_type', '=', Repository::RECORD_TYPE) 454 ->orderBy('o_id') 455 ->skip($offset) 456 ->take($limit) 457 ->get() 458 ->map(Registry::repositoryFactory()->mapper($tree)); 459 } 460 461 /** 462 * @param Tree $tree 463 * @param int $limit 464 * @param int $offset 465 * 466 * @return Collection<int,Source> 467 */ 468 private function sitemapSources(Tree $tree, int $limit, int $offset): Collection 469 { 470 return DB::table('sources') 471 ->where('s_file', '=', $tree->id()) 472 ->orderBy('s_id') 473 ->skip($offset) 474 ->take($limit) 475 ->get() 476 ->map(Registry::sourceFactory()->mapper($tree)); 477 } 478 479 /** 480 * @param Tree $tree 481 * @param int $limit 482 * @param int $offset 483 * 484 * @return Collection<int,Submitter> 485 */ 486 private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection 487 { 488 return DB::table('other') 489 ->where('o_file', '=', $tree->id()) 490 ->where('o_type', '=', Submitter::RECORD_TYPE) 491 ->orderBy('o_id') 492 ->skip($offset) 493 ->take($limit) 494 ->get() 495 ->map(Registry::submitterFactory()->mapper($tree)); 496 } 497} 498