xref: /webtrees/app/Module/SiteMapModule.php (revision 3340ecd27b8901a894bff51b7c40bfa2896a552b)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Aura\Router\Route;
23use Aura\Router\RouterContainer;
24use Fig\Http\Message\StatusCodeInterface;
25use Fisharebest\Webtrees\Auth;
26use Fisharebest\Webtrees\Family;
27use Fisharebest\Webtrees\FlashMessages;
28use Fisharebest\Webtrees\GedcomRecord;
29use Fisharebest\Webtrees\Html;
30use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
31use Fisharebest\Webtrees\I18N;
32use Fisharebest\Webtrees\Individual;
33use Fisharebest\Webtrees\Media;
34use Fisharebest\Webtrees\Note;
35use Fisharebest\Webtrees\Registry;
36use Fisharebest\Webtrees\Repository;
37use Fisharebest\Webtrees\Services\TreeService;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submitter;
40use Fisharebest\Webtrees\Tree;
41use Illuminate\Database\Capsule\Manager as DB;
42use Illuminate\Database\Query\Expression;
43use Illuminate\Support\Collection;
44use Psr\Http\Message\ResponseInterface;
45use Psr\Http\Message\ServerRequestInterface;
46use Psr\Http\Server\RequestHandlerInterface;
47
48use function app;
49use function assert;
50use function date;
51use function redirect;
52use function response;
53use function route;
54use function view;
55
56/**
57 * Class SiteMapModule
58 */
59class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
60{
61    use ModuleConfigTrait;
62
63    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
64    private const CACHE_LIFE         = 209600; // Two weeks
65
66    private const PRIORITY = [
67        Family::RECORD_TYPE     => 0.7,
68        Individual::RECORD_TYPE => 0.9,
69        Media::RECORD_TYPE      => 0.5,
70        Note::RECORD_TYPE       => 0.3,
71        Repository::RECORD_TYPE => 0.5,
72        Source::RECORD_TYPE     => 0.5,
73        Submitter::RECORD_TYPE  => 0.3,
74    ];
75
76    private TreeService $tree_service;
77
78    /**
79     * TreesMenuModule constructor.
80     *
81     * @param TreeService $tree_service
82     */
83    public function __construct(TreeService $tree_service)
84    {
85        $this->tree_service = $tree_service;
86    }
87
88    /**
89     * Initialization.
90     *
91     * @return void
92     */
93    public function boot(): void
94    {
95        $router_container = app(RouterContainer::class);
96        assert($router_container instanceof RouterContainer);
97
98        $router_container->getMap()
99            ->get('sitemap-style', '/sitemap.xsl', $this);
100
101        $router_container->getMap()
102            ->get('sitemap-index', '/sitemap.xml', $this);
103
104        $router_container->getMap()
105            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
106    }
107
108    /**
109     * A sentence describing what this module does.
110     *
111     * @return string
112     */
113    public function description(): string
114    {
115        /* I18N: Description of the “Sitemaps” module */
116        return I18N::translate('Generate sitemap files for search engines.');
117    }
118
119    /**
120     * Should this module be enabled when it is first installed?
121     *
122     * @return bool
123     */
124    public function isEnabledByDefault(): bool
125    {
126        return false;
127    }
128
129    /**
130     * @param ServerRequestInterface $request
131     *
132     * @return ResponseInterface
133     */
134    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
135    {
136        $this->layout = 'layouts/administration';
137
138        $sitemap_url = route('sitemap-index');
139
140        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
141        $submit_urls = [
142            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
143            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
144        ];
145
146        return $this->viewResponse('modules/sitemap/config', [
147            'all_trees'   => $this->tree_service->all(),
148            'sitemap_url' => $sitemap_url,
149            'submit_urls' => $submit_urls,
150            'title'       => $this->title(),
151        ]);
152    }
153
154    /**
155     * How should this module be identified in the control panel, etc.?
156     *
157     * @return string
158     */
159    public function title(): string
160    {
161        /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */
162        return I18N::translate('Sitemaps');
163    }
164
165    /**
166     * @param ServerRequestInterface $request
167     *
168     * @return ResponseInterface
169     */
170    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
171    {
172        $params = (array) $request->getParsedBody();
173
174        foreach ($this->tree_service->all() as $tree) {
175            $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false);
176            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
177        }
178
179        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
180
181        return redirect($this->getConfigLink());
182    }
183
184    /**
185     * @param ServerRequestInterface $request
186     *
187     * @return ResponseInterface
188     */
189    public function handle(ServerRequestInterface $request): ResponseInterface
190    {
191        $route = $request->getAttribute('route');
192        assert($route instanceof Route);
193
194        if ($route->name === 'sitemap-style') {
195            $content = view('modules/sitemap/sitemap-xsl');
196
197            return response($content, StatusCodeInterface::STATUS_OK, [
198                'Content-Type' => 'application/xml',
199            ]);
200        }
201
202        if ($route->name === 'sitemap-index') {
203            return $this->siteMapIndex($request);
204        }
205
206        return $this->siteMapFile($request);
207    }
208
209    /**
210     * @param ServerRequestInterface $request
211     *
212     * @return ResponseInterface
213     */
214    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
215    {
216        $content = Registry::cache()->file()->remember('sitemap.xml', function (): string {
217            // Which trees have sitemaps enabled?
218            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
219                return $tree->getPreference('include_in_sitemap') === '1';
220            })->map(static function (Tree $tree): int {
221                return $tree->id();
222            });
223
224            $count_families = DB::table('families')
225                ->join('gedcom', 'f_file', '=', 'gedcom_id')
226                ->whereIn('gedcom_id', $tree_ids)
227                ->groupBy(['gedcom_id'])
228                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
229                ->pluck('total', 'gedcom_name');
230
231            $count_individuals = DB::table('individuals')
232                ->join('gedcom', 'i_file', '=', 'gedcom_id')
233                ->whereIn('gedcom_id', $tree_ids)
234                ->groupBy(['gedcom_id'])
235                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
236                ->pluck('total', 'gedcom_name');
237
238            $count_media = DB::table('media')
239                ->join('gedcom', 'm_file', '=', 'gedcom_id')
240                ->whereIn('gedcom_id', $tree_ids)
241                ->groupBy(['gedcom_id'])
242                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
243                ->pluck('total', 'gedcom_name');
244
245            $count_notes = DB::table('other')
246                ->join('gedcom', 'o_file', '=', 'gedcom_id')
247                ->whereIn('gedcom_id', $tree_ids)
248                ->where('o_type', '=', Note::RECORD_TYPE)
249                ->groupBy(['gedcom_id'])
250                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
251                ->pluck('total', 'gedcom_name');
252
253            $count_repositories = DB::table('other')
254                ->join('gedcom', 'o_file', '=', 'gedcom_id')
255                ->whereIn('gedcom_id', $tree_ids)
256                ->where('o_type', '=', Repository::RECORD_TYPE)
257                ->groupBy(['gedcom_id'])
258                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
259                ->pluck('total', 'gedcom_name');
260
261            $count_sources = DB::table('sources')
262                ->join('gedcom', 's_file', '=', 'gedcom_id')
263                ->whereIn('gedcom_id', $tree_ids)
264                ->groupBy(['gedcom_id'])
265                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
266                ->pluck('total', 'gedcom_name');
267
268            $count_submitters = DB::table('other')
269                ->join('gedcom', 'o_file', '=', 'gedcom_id')
270                ->whereIn('gedcom_id', $tree_ids)
271                ->where('o_type', '=', Submitter::RECORD_TYPE)
272                ->groupBy(['gedcom_id'])
273                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
274                ->pluck('total', 'gedcom_name');
275
276            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
277            DB::table('module_setting')
278                ->where('module_name', '=', $this->name())
279                ->delete();
280
281            return view('modules/sitemap/sitemap-index-xml', [
282                'all_trees'          => $this->tree_service->all(),
283                'count_families'     => $count_families,
284                'count_individuals'  => $count_individuals,
285                'count_media'        => $count_media,
286                'count_notes'        => $count_notes,
287                'count_repositories' => $count_repositories,
288                'count_sources'      => $count_sources,
289                'count_submitters'   => $count_submitters,
290                'last_mod'           => date('Y-m-d'),
291                'records_per_volume' => self::RECORDS_PER_VOLUME,
292                'sitemap_xsl'        => route('sitemap-style'),
293            ]);
294        }, self::CACHE_LIFE);
295
296        return response($content, StatusCodeInterface::STATUS_OK, [
297            'Content-Type' => 'application/xml',
298        ]);
299    }
300
301    /**
302     * @param ServerRequestInterface $request
303     *
304     * @return ResponseInterface
305     */
306    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
307    {
308        $tree = $request->getAttribute('tree');
309        assert($tree instanceof Tree);
310
311        $type = $request->getAttribute('type');
312        $page = (int) $request->getAttribute('page');
313
314        if ($tree->getPreference('include_in_sitemap') !== '1') {
315            throw new HttpNotFoundException();
316        }
317
318        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
319
320        $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string {
321            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
322
323            return view('modules/sitemap/sitemap-file-xml', [
324                'priority'    => self::PRIORITY[$type],
325                'records'     => $records,
326                'sitemap_xsl' => route('sitemap-style'),
327                'tree'        => $tree,
328            ]);
329        }, self::CACHE_LIFE);
330
331        return response($content, StatusCodeInterface::STATUS_OK, [
332            'Content-Type' => 'application/xml',
333        ]);
334    }
335
336    /**
337     * @param Tree   $tree
338     * @param string $type
339     * @param int    $limit
340     * @param int    $offset
341     *
342     * @return Collection<GedcomRecord>
343     */
344    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
345    {
346        switch ($type) {
347            case Family::RECORD_TYPE:
348                $records = $this->sitemapFamilies($tree, $limit, $offset);
349                break;
350
351            case Individual::RECORD_TYPE:
352                $records = $this->sitemapIndividuals($tree, $limit, $offset);
353                break;
354
355            case Media::RECORD_TYPE:
356                $records = $this->sitemapMedia($tree, $limit, $offset);
357                break;
358
359            case Note::RECORD_TYPE:
360                $records = $this->sitemapNotes($tree, $limit, $offset);
361                break;
362
363            case Repository::RECORD_TYPE:
364                $records = $this->sitemapRepositories($tree, $limit, $offset);
365                break;
366
367            case Source::RECORD_TYPE:
368                $records = $this->sitemapSources($tree, $limit, $offset);
369                break;
370
371            case Submitter::RECORD_TYPE:
372                $records = $this->sitemapSubmitters($tree, $limit, $offset);
373                break;
374
375            default:
376                throw new HttpNotFoundException('Invalid record type: ' . $type);
377        }
378
379        // Skip private records.
380        $records = $records->filter(static function (GedcomRecord $record): bool {
381            return $record->canShow(Auth::PRIV_PRIVATE);
382        });
383
384        return $records;
385    }
386
387    /**
388     * @param Tree $tree
389     * @param int  $limit
390     * @param int  $offset
391     *
392     * @return Collection<Family>
393     */
394    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
395    {
396        return DB::table('families')
397            ->where('f_file', '=', $tree->id())
398            ->orderBy('f_id')
399            ->skip($offset)
400            ->take($limit)
401            ->get()
402            ->map(Registry::familyFactory()->mapper($tree));
403    }
404
405    /**
406     * @param Tree $tree
407     * @param int  $limit
408     * @param int  $offset
409     *
410     * @return Collection<Individual>
411     */
412    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
413    {
414        return DB::table('individuals')
415            ->where('i_file', '=', $tree->id())
416            ->orderBy('i_id')
417            ->skip($offset)
418            ->take($limit)
419            ->get()
420            ->map(Registry::individualFactory()->mapper($tree));
421    }
422
423    /**
424     * @param Tree $tree
425     * @param int  $limit
426     * @param int  $offset
427     *
428     * @return Collection<Media>
429     */
430    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
431    {
432        return DB::table('media')
433            ->where('m_file', '=', $tree->id())
434            ->orderBy('m_id')
435            ->skip($offset)
436            ->take($limit)
437            ->get()
438            ->map(Registry::mediaFactory()->mapper($tree));
439    }
440
441    /**
442     * @param Tree $tree
443     * @param int  $limit
444     * @param int  $offset
445     *
446     * @return Collection<Note>
447     */
448    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
449    {
450        return DB::table('other')
451            ->where('o_file', '=', $tree->id())
452            ->where('o_type', '=', Note::RECORD_TYPE)
453            ->orderBy('o_id')
454            ->skip($offset)
455            ->take($limit)
456            ->get()
457            ->map(Registry::noteFactory()->mapper($tree));
458    }
459
460    /**
461     * @param Tree $tree
462     * @param int  $limit
463     * @param int  $offset
464     *
465     * @return Collection<Repository>
466     */
467    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
468    {
469        return DB::table('other')
470            ->where('o_file', '=', $tree->id())
471            ->where('o_type', '=', Repository::RECORD_TYPE)
472            ->orderBy('o_id')
473            ->skip($offset)
474            ->take($limit)
475            ->get()
476            ->map(Registry::repositoryFactory()->mapper($tree));
477    }
478
479    /**
480     * @param Tree $tree
481     * @param int  $limit
482     * @param int  $offset
483     *
484     * @return Collection<Source>
485     */
486    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
487    {
488        return DB::table('sources')
489            ->where('s_file', '=', $tree->id())
490            ->orderBy('s_id')
491            ->skip($offset)
492            ->take($limit)
493            ->get()
494            ->map(Registry::sourceFactory()->mapper($tree));
495    }
496
497    /**
498     * @param Tree $tree
499     * @param int  $limit
500     * @param int  $offset
501     *
502     * @return Collection<Submitter>
503     */
504    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
505    {
506        return DB::table('other')
507            ->where('o_file', '=', $tree->id())
508            ->where('o_type', '=', Submitter::RECORD_TYPE)
509            ->orderBy('o_id')
510            ->skip($offset)
511            ->take($limit)
512            ->get()
513            ->map(Registry::submitterFactory()->mapper($tree));
514    }
515}
516