xref: /webtrees/app/Module/SiteMapModule.php (revision 32bd038c6b4e0b040c406132f49147f95dd667d2)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fig\Http\Message\StatusCodeInterface;
23use Fisharebest\Webtrees\Auth;
24use Fisharebest\Webtrees\DB;
25use Fisharebest\Webtrees\Family;
26use Fisharebest\Webtrees\FlashMessages;
27use Fisharebest\Webtrees\GedcomRecord;
28use Fisharebest\Webtrees\Html;
29use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
30use Fisharebest\Webtrees\I18N;
31use Fisharebest\Webtrees\Individual;
32use Fisharebest\Webtrees\Media;
33use Fisharebest\Webtrees\Note;
34use Fisharebest\Webtrees\Registry;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\Services\TreeService;
37use Fisharebest\Webtrees\Source;
38use Fisharebest\Webtrees\Submitter;
39use Fisharebest\Webtrees\Tree;
40use Fisharebest\Webtrees\Validator;
41use Illuminate\Database\Query\Expression;
42use Illuminate\Support\Collection;
43use Psr\Http\Message\ResponseInterface;
44use Psr\Http\Message\ServerRequestInterface;
45use Psr\Http\Server\RequestHandlerInterface;
46
47use function date;
48use function redirect;
49use function response;
50use function route;
51use function view;
52
53/**
54 * Class SiteMapModule
55 */
56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
57{
58    use ModuleConfigTrait;
59
60    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
61    private const CACHE_LIFE         = 209600; // Two weeks
62
63    private const PRIORITY = [
64        Family::RECORD_TYPE     => 0.7,
65        Individual::RECORD_TYPE => 0.9,
66        Media::RECORD_TYPE      => 0.5,
67        Note::RECORD_TYPE       => 0.3,
68        Repository::RECORD_TYPE => 0.5,
69        Source::RECORD_TYPE     => 0.5,
70        Submitter::RECORD_TYPE  => 0.3,
71    ];
72
73    private TreeService $tree_service;
74
75    /**
76     * @param TreeService $tree_service
77     */
78    public function __construct(TreeService $tree_service)
79    {
80        $this->tree_service = $tree_service;
81    }
82
83    /**
84     * Initialization.
85     *
86     * @return void
87     */
88    public function boot(): void
89    {
90        Registry::routeFactory()->routeMap()
91            ->get('sitemap-style', '/sitemap.xsl', $this);
92
93        Registry::routeFactory()->routeMap()
94            ->get('sitemap-index', '/sitemap.xml', $this);
95
96        Registry::routeFactory()->routeMap()
97            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
98    }
99
100    /**
101     * A sentence describing what this module does.
102     *
103     * @return string
104     */
105    public function description(): string
106    {
107        /* I18N: Description of the “Sitemaps” module */
108        return I18N::translate('Generate sitemap files for search engines.');
109    }
110
111    /**
112     * Should this module be enabled when it is first installed?
113     *
114     * @return bool
115     */
116    public function isEnabledByDefault(): bool
117    {
118        return false;
119    }
120
121    /**
122     * @param ServerRequestInterface $request
123     *
124     * @return ResponseInterface
125     */
126    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
127    {
128        $this->layout = 'layouts/administration';
129
130        $sitemap_url = route('sitemap-index');
131
132        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
133        $submit_urls = [
134            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
135            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
136        ];
137
138        return $this->viewResponse('modules/sitemap/config', [
139            'all_trees'   => $this->tree_service->all(),
140            'sitemap_url' => $sitemap_url,
141            'submit_urls' => $submit_urls,
142            'title'       => $this->title(),
143        ]);
144    }
145
146    /**
147     * How should this module be identified in the control panel, etc.?
148     *
149     * @return string
150     */
151    public function title(): string
152    {
153        /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */
154        return I18N::translate('Sitemaps');
155    }
156
157    /**
158     * @param ServerRequestInterface $request
159     *
160     * @return ResponseInterface
161     */
162    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
163    {
164        foreach ($this->tree_service->all() as $tree) {
165            $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false);
166            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
167        }
168
169        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
170
171        return redirect($this->getConfigLink());
172    }
173
174    /**
175     * @param ServerRequestInterface $request
176     *
177     * @return ResponseInterface
178     */
179    public function handle(ServerRequestInterface $request): ResponseInterface
180    {
181        $route = Validator::attributes($request)->route();
182
183        if ($route->name === 'sitemap-style') {
184            $content = view('modules/sitemap/sitemap-xsl');
185
186            return response($content, StatusCodeInterface::STATUS_OK, [
187                'content-type' => 'application/xml',
188            ]);
189        }
190
191        if ($route->name === 'sitemap-index') {
192            return $this->siteMapIndex($request);
193        }
194
195        return $this->siteMapFile($request);
196    }
197
198    /**
199     * @param ServerRequestInterface $request
200     *
201     * @return ResponseInterface
202     */
203    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
204    {
205        $content = Registry::cache()->file()->remember('sitemap.xml', function (): string {
206            // Which trees have sitemaps enabled?
207            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
208                return $tree->getPreference('include_in_sitemap') === '1';
209            })->map(static function (Tree $tree): int {
210                return $tree->id();
211            });
212
213            $count_families = DB::table('families')
214                ->join('gedcom', 'f_file', '=', 'gedcom_id')
215                ->whereIn('gedcom_id', $tree_ids)
216                ->groupBy(['gedcom_id'])
217                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
218
219            $count_individuals = DB::table('individuals')
220                ->join('gedcom', 'i_file', '=', 'gedcom_id')
221                ->whereIn('gedcom_id', $tree_ids)
222                ->groupBy(['gedcom_id'])
223                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
224
225            $count_media = DB::table('media')
226                ->join('gedcom', 'm_file', '=', 'gedcom_id')
227                ->whereIn('gedcom_id', $tree_ids)
228                ->groupBy(['gedcom_id'])
229                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
230
231            $count_notes = DB::table('other')
232                ->join('gedcom', 'o_file', '=', 'gedcom_id')
233                ->whereIn('gedcom_id', $tree_ids)
234                ->where('o_type', '=', Note::RECORD_TYPE)
235                ->groupBy(['gedcom_id'])
236                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
237
238            $count_repositories = DB::table('other')
239                ->join('gedcom', 'o_file', '=', 'gedcom_id')
240                ->whereIn('gedcom_id', $tree_ids)
241                ->where('o_type', '=', Repository::RECORD_TYPE)
242                ->groupBy(['gedcom_id'])
243                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
244
245            $count_sources = DB::table('sources')
246                ->join('gedcom', 's_file', '=', 'gedcom_id')
247                ->whereIn('gedcom_id', $tree_ids)
248                ->groupBy(['gedcom_id'])
249                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
250
251            $count_submitters = DB::table('other')
252                ->join('gedcom', 'o_file', '=', 'gedcom_id')
253                ->whereIn('gedcom_id', $tree_ids)
254                ->where('o_type', '=', Submitter::RECORD_TYPE)
255                ->groupBy(['gedcom_id'])
256                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
257
258            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
259            DB::table('module_setting')
260                ->where('module_name', '=', $this->name())
261                ->delete();
262
263            return view('modules/sitemap/sitemap-index-xml', [
264                'all_trees'          => $this->tree_service->all(),
265                'count_families'     => $count_families,
266                'count_individuals'  => $count_individuals,
267                'count_media'        => $count_media,
268                'count_notes'        => $count_notes,
269                'count_repositories' => $count_repositories,
270                'count_sources'      => $count_sources,
271                'count_submitters'   => $count_submitters,
272                'last_mod'           => date('Y-m-d'),
273                'records_per_volume' => self::RECORDS_PER_VOLUME,
274                'sitemap_xsl'        => route('sitemap-style'),
275            ]);
276        }, self::CACHE_LIFE);
277
278        return response($content, StatusCodeInterface::STATUS_OK, [
279            'content-type' => 'application/xml',
280        ]);
281    }
282
283    /**
284     * @param ServerRequestInterface $request
285     *
286     * @return ResponseInterface
287     */
288    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
289    {
290        $tree = Validator::attributes($request)->tree('tree');
291        $type = Validator::attributes($request)->string('type');
292        $page = Validator::attributes($request)->integer('page');
293
294        if ($tree->getPreference('include_in_sitemap') !== '1') {
295            throw new HttpNotFoundException();
296        }
297
298        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
299
300        $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string {
301            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
302
303            return view('modules/sitemap/sitemap-file-xml', [
304                'priority'    => self::PRIORITY[$type],
305                'records'     => $records,
306                'sitemap_xsl' => route('sitemap-style'),
307                'tree'        => $tree,
308            ]);
309        }, self::CACHE_LIFE);
310
311        return response($content, StatusCodeInterface::STATUS_OK, [
312            'content-type' => 'application/xml',
313        ]);
314    }
315
316    /**
317     * @param Tree   $tree
318     * @param string $type
319     * @param int    $limit
320     * @param int    $offset
321     *
322     * @return Collection<int,GedcomRecord>
323     */
324    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
325    {
326        switch ($type) {
327            case Family::RECORD_TYPE:
328                $records = $this->sitemapFamilies($tree, $limit, $offset);
329                break;
330
331            case Individual::RECORD_TYPE:
332                $records = $this->sitemapIndividuals($tree, $limit, $offset);
333                break;
334
335            case Media::RECORD_TYPE:
336                $records = $this->sitemapMedia($tree, $limit, $offset);
337                break;
338
339            case Note::RECORD_TYPE:
340                $records = $this->sitemapNotes($tree, $limit, $offset);
341                break;
342
343            case Repository::RECORD_TYPE:
344                $records = $this->sitemapRepositories($tree, $limit, $offset);
345                break;
346
347            case Source::RECORD_TYPE:
348                $records = $this->sitemapSources($tree, $limit, $offset);
349                break;
350
351            case Submitter::RECORD_TYPE:
352                $records = $this->sitemapSubmitters($tree, $limit, $offset);
353                break;
354
355            default:
356                throw new HttpNotFoundException('Invalid record type: ' . $type);
357        }
358
359        // Skip private records.
360        $records = $records->filter(static function (GedcomRecord $record): bool {
361            return $record->canShow(Auth::PRIV_PRIVATE);
362        });
363
364        return $records;
365    }
366
367    /**
368     * @param Tree $tree
369     * @param int  $limit
370     * @param int  $offset
371     *
372     * @return Collection<int,Family>
373     */
374    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
375    {
376        return DB::table('families')
377            ->where('f_file', '=', $tree->id())
378            ->orderBy('f_id')
379            ->skip($offset)
380            ->take($limit)
381            ->get()
382            ->map(Registry::familyFactory()->mapper($tree));
383    }
384
385    /**
386     * @param Tree $tree
387     * @param int  $limit
388     * @param int  $offset
389     *
390     * @return Collection<int,Individual>
391     */
392    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
393    {
394        return DB::table('individuals')
395            ->where('i_file', '=', $tree->id())
396            ->orderBy('i_id')
397            ->skip($offset)
398            ->take($limit)
399            ->get()
400            ->map(Registry::individualFactory()->mapper($tree));
401    }
402
403    /**
404     * @param Tree $tree
405     * @param int  $limit
406     * @param int  $offset
407     *
408     * @return Collection<int,Media>
409     */
410    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
411    {
412        return DB::table('media')
413            ->where('m_file', '=', $tree->id())
414            ->orderBy('m_id')
415            ->skip($offset)
416            ->take($limit)
417            ->get()
418            ->map(Registry::mediaFactory()->mapper($tree));
419    }
420
421    /**
422     * @param Tree $tree
423     * @param int  $limit
424     * @param int  $offset
425     *
426     * @return Collection<int,Note>
427     */
428    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
429    {
430        return DB::table('other')
431            ->where('o_file', '=', $tree->id())
432            ->where('o_type', '=', Note::RECORD_TYPE)
433            ->orderBy('o_id')
434            ->skip($offset)
435            ->take($limit)
436            ->get()
437            ->map(Registry::noteFactory()->mapper($tree));
438    }
439
440    /**
441     * @param Tree $tree
442     * @param int  $limit
443     * @param int  $offset
444     *
445     * @return Collection<int,Repository>
446     */
447    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
448    {
449        return DB::table('other')
450            ->where('o_file', '=', $tree->id())
451            ->where('o_type', '=', Repository::RECORD_TYPE)
452            ->orderBy('o_id')
453            ->skip($offset)
454            ->take($limit)
455            ->get()
456            ->map(Registry::repositoryFactory()->mapper($tree));
457    }
458
459    /**
460     * @param Tree $tree
461     * @param int  $limit
462     * @param int  $offset
463     *
464     * @return Collection<int,Source>
465     */
466    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
467    {
468        return DB::table('sources')
469            ->where('s_file', '=', $tree->id())
470            ->orderBy('s_id')
471            ->skip($offset)
472            ->take($limit)
473            ->get()
474            ->map(Registry::sourceFactory()->mapper($tree));
475    }
476
477    /**
478     * @param Tree $tree
479     * @param int  $limit
480     * @param int  $offset
481     *
482     * @return Collection<int,Submitter>
483     */
484    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
485    {
486        return DB::table('other')
487            ->where('o_file', '=', $tree->id())
488            ->where('o_type', '=', Submitter::RECORD_TYPE)
489            ->orderBy('o_id')
490            ->skip($offset)
491            ->take($limit)
492            ->get()
493            ->map(Registry::submitterFactory()->mapper($tree));
494    }
495}
496