xref: /webtrees/app/Module/SiteMapModule.php (revision 02467d3222d9362e48b39bb9221b32134076ae9c)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2020 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Aura\Router\Route;
23use Aura\Router\RouterContainer;
24use Fig\Http\Message\StatusCodeInterface;
25use Fisharebest\Webtrees\Auth;
26use Fisharebest\Webtrees\Cache;
27use Fisharebest\Webtrees\Exceptions\HttpNotFoundException;
28use Fisharebest\Webtrees\Factory;
29use Fisharebest\Webtrees\Family;
30use Fisharebest\Webtrees\FlashMessages;
31use Fisharebest\Webtrees\GedcomRecord;
32use Fisharebest\Webtrees\Html;
33use Fisharebest\Webtrees\I18N;
34use Fisharebest\Webtrees\Individual;
35use Fisharebest\Webtrees\Media;
36use Fisharebest\Webtrees\Note;
37use Fisharebest\Webtrees\Repository;
38use Fisharebest\Webtrees\Services\TreeService;
39use Fisharebest\Webtrees\Source;
40use Fisharebest\Webtrees\Submitter;
41use Fisharebest\Webtrees\Tree;
42use Illuminate\Database\Capsule\Manager as DB;
43use Illuminate\Database\Query\Expression;
44use Illuminate\Support\Collection;
45use Psr\Http\Message\ResponseInterface;
46use Psr\Http\Message\ServerRequestInterface;
47use Psr\Http\Server\RequestHandlerInterface;
48
49use function app;
50use function assert;
51use function date;
52use function redirect;
53use function response;
54use function route;
55use function view;
56
57/**
58 * Class SiteMapModule
59 */
60class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
61{
62    use ModuleConfigTrait;
63
64    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
65    private const CACHE_LIFE         = 209600; // Two weeks
66
67    private const PRIORITY = [
68        Family::RECORD_TYPE     => 0.7,
69        Individual::RECORD_TYPE => 0.9,
70        Media::RECORD_TYPE      => 0.5,
71        Note::RECORD_TYPE       => 0.3,
72        Repository::RECORD_TYPE => 0.5,
73        Source::RECORD_TYPE     => 0.5,
74        Submitter::RECORD_TYPE  => 0.3,
75    ];
76
77    /** @var TreeService */
78    private $tree_service;
79
80    /**
81     * TreesMenuModule constructor.
82     *
83     * @param TreeService $tree_service
84     */
85    public function __construct(TreeService $tree_service)
86    {
87        $this->tree_service = $tree_service;
88    }
89
90    /**
91     * Initialization.
92     *
93     * @return void
94     */
95    public function boot(): void
96    {
97        $router_container = app(RouterContainer::class);
98        assert($router_container instanceof RouterContainer);
99
100        $router_container->getMap()
101            ->get('sitemap-style', '/sitemap.xsl', $this);
102
103        $router_container->getMap()
104            ->get('sitemap-index', '/sitemap.xml', $this);
105
106        $router_container->getMap()
107            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
108    }
109
110    /**
111     * A sentence describing what this module does.
112     *
113     * @return string
114     */
115    public function description(): string
116    {
117        /* I18N: Description of the “Sitemaps” module */
118        return I18N::translate('Generate sitemap files for search engines.');
119    }
120
121    /**
122     * Should this module be enabled when it is first installed?
123     *
124     * @return bool
125     */
126    public function isEnabledByDefault(): bool
127    {
128        return false;
129    }
130
131    /**
132     * @param ServerRequestInterface $request
133     *
134     * @return ResponseInterface
135     */
136    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
137    {
138        $this->layout = 'layouts/administration';
139
140        $sitemap_url = route('sitemap-index');
141
142        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
143        $submit_urls = [
144            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
145            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
146        ];
147
148        return $this->viewResponse('modules/sitemap/config', [
149            'all_trees'   => $this->tree_service->all(),
150            'sitemap_url' => $sitemap_url,
151            'submit_urls' => $submit_urls,
152            'title'       => $this->title(),
153        ]);
154    }
155
156    /**
157     * How should this module be identified in the control panel, etc.?
158     *
159     * @return string
160     */
161    public function title(): string
162    {
163        /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */
164        return I18N::translate('Sitemaps');
165    }
166
167    /**
168     * @param ServerRequestInterface $request
169     *
170     * @return ResponseInterface
171     */
172    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
173    {
174        $params = (array) $request->getParsedBody();
175
176        foreach ($this->tree_service->all() as $tree) {
177            $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false);
178            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
179        }
180
181        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
182
183        return redirect($this->getConfigLink());
184    }
185
186    /**
187     * @param ServerRequestInterface $request
188     *
189     * @return ResponseInterface
190     */
191    public function handle(ServerRequestInterface $request): ResponseInterface
192    {
193        $route = $request->getAttribute('route');
194        assert($route instanceof Route);
195
196        if ($route->name === 'sitemap-style') {
197            $content = view('modules/sitemap/sitemap-xsl');
198
199            return response($content, StatusCodeInterface::STATUS_OK, [
200                'Content-Type' => 'application/xml',
201            ]);
202        }
203
204        if ($route->name === 'sitemap-index') {
205            return $this->siteMapIndex($request);
206        }
207
208        return $this->siteMapFile($request);
209    }
210
211    /**
212     * @param ServerRequestInterface $request
213     *
214     * @return ResponseInterface
215     */
216    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
217    {
218        $cache = app('cache.files');
219        assert($cache instanceof Cache);
220
221        $content = $cache->remember('sitemap.xml', function (): string {
222            // Which trees have sitemaps enabled?
223            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
224                return $tree->getPreference('include_in_sitemap') === '1';
225            })->map(static function (Tree $tree): int {
226                return $tree->id();
227            });
228
229            $count_families = DB::table('families')
230                ->join('gedcom', 'f_file', '=', 'gedcom_id')
231                ->whereIn('gedcom_id', $tree_ids)
232                ->groupBy(['gedcom_id'])
233                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
234                ->pluck('total', 'gedcom_name');
235
236            $count_individuals = DB::table('individuals')
237                ->join('gedcom', 'i_file', '=', 'gedcom_id')
238                ->whereIn('gedcom_id', $tree_ids)
239                ->groupBy(['gedcom_id'])
240                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
241                ->pluck('total', 'gedcom_name');
242
243            $count_media = DB::table('media')
244                ->join('gedcom', 'm_file', '=', 'gedcom_id')
245                ->whereIn('gedcom_id', $tree_ids)
246                ->groupBy(['gedcom_id'])
247                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
248                ->pluck('total', 'gedcom_name');
249
250            $count_notes = DB::table('other')
251                ->join('gedcom', 'o_file', '=', 'gedcom_id')
252                ->whereIn('gedcom_id', $tree_ids)
253                ->where('o_type', '=', Note::RECORD_TYPE)
254                ->groupBy(['gedcom_id'])
255                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
256                ->pluck('total', 'gedcom_name');
257
258            $count_repositories = DB::table('other')
259                ->join('gedcom', 'o_file', '=', 'gedcom_id')
260                ->whereIn('gedcom_id', $tree_ids)
261                ->where('o_type', '=', Repository::RECORD_TYPE)
262                ->groupBy(['gedcom_id'])
263                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
264                ->pluck('total', 'gedcom_name');
265
266            $count_sources = DB::table('sources')
267                ->join('gedcom', 's_file', '=', 'gedcom_id')
268                ->whereIn('gedcom_id', $tree_ids)
269                ->groupBy(['gedcom_id'])
270                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
271                ->pluck('total', 'gedcom_name');
272
273            $count_submitters = DB::table('other')
274                ->join('gedcom', 'o_file', '=', 'gedcom_id')
275                ->whereIn('gedcom_id', $tree_ids)
276                ->where('o_type', '=', Submitter::RECORD_TYPE)
277                ->groupBy(['gedcom_id'])
278                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
279                ->pluck('total', 'gedcom_name');
280
281            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
282            DB::table('module_setting')
283                ->where('module_name', '=', $this->name())
284                ->delete();
285
286            return view('modules/sitemap/sitemap-index-xml', [
287                'all_trees'          => $this->tree_service->all(),
288                'count_families'     => $count_families,
289                'count_individuals'  => $count_individuals,
290                'count_media'        => $count_media,
291                'count_notes'        => $count_notes,
292                'count_repositories' => $count_repositories,
293                'count_sources'      => $count_sources,
294                'count_submitters'   => $count_submitters,
295                'last_mod'           => date('Y-m-d'),
296                'records_per_volume' => self::RECORDS_PER_VOLUME,
297                'sitemap_xsl'        => route('sitemap-style'),
298            ]);
299        }, self::CACHE_LIFE);
300
301        return response($content, StatusCodeInterface::STATUS_OK, [
302            'Content-Type' => 'application/xml',
303        ]);
304    }
305
306    /**
307     * @param ServerRequestInterface $request
308     *
309     * @return ResponseInterface
310     */
311    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
312    {
313        $tree = $request->getAttribute('tree');
314        assert($tree instanceof Tree);
315
316        $type = $request->getAttribute('type');
317        $page = (int) $request->getAttribute('page');
318
319        if ($tree->getPreference('include_in_sitemap') !== '1') {
320            throw new HttpNotFoundException();
321        }
322
323        $cache = app('cache.files');
324        assert($cache instanceof Cache);
325
326        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
327
328        $content = $cache->remember($cache_key, function () use ($tree, $type, $page): string {
329            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
330
331            return view('modules/sitemap/sitemap-file-xml', [
332                'priority'    => self::PRIORITY[$type],
333                'records'     => $records,
334                'sitemap_xsl' => route('sitemap-style'),
335                'tree'        => $tree,
336            ]);
337        }, self::CACHE_LIFE);
338
339        return response($content, StatusCodeInterface::STATUS_OK, [
340            'Content-Type' => 'application/xml',
341        ]);
342    }
343
344    /**
345     * @param Tree   $tree
346     * @param string $type
347     * @param int    $limit
348     * @param int    $offset
349     *
350     * @return Collection<GedcomRecord>
351     */
352    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
353    {
354        switch ($type) {
355            case Family::RECORD_TYPE:
356                $records = $this->sitemapFamilies($tree, $limit, $offset);
357                break;
358
359            case Individual::RECORD_TYPE:
360                $records = $this->sitemapIndividuals($tree, $limit, $offset);
361                break;
362
363            case Media::RECORD_TYPE:
364                $records = $this->sitemapMedia($tree, $limit, $offset);
365                break;
366
367            case Note::RECORD_TYPE:
368                $records = $this->sitemapNotes($tree, $limit, $offset);
369                break;
370
371            case Repository::RECORD_TYPE:
372                $records = $this->sitemapRepositories($tree, $limit, $offset);
373                break;
374
375            case Source::RECORD_TYPE:
376                $records = $this->sitemapSources($tree, $limit, $offset);
377                break;
378
379            case Submitter::RECORD_TYPE:
380                $records = $this->sitemapSubmitters($tree, $limit, $offset);
381                break;
382
383            default:
384                throw new HttpNotFoundException('Invalid record type: ' . $type);
385        }
386
387        // Skip private records.
388        $records = $records->filter(static function (GedcomRecord $record): bool {
389            return $record->canShow(Auth::PRIV_PRIVATE);
390        });
391
392        return $records;
393    }
394
395    /**
396     * @param Tree $tree
397     * @param int  $limit
398     * @param int  $offset
399     *
400     * @return Collection<Family>
401     */
402    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
403    {
404        return DB::table('families')
405            ->where('f_file', '=', $tree->id())
406            ->orderBy('f_id')
407            ->skip($offset)
408            ->take($limit)
409            ->get()
410            ->map(Factory::family()->mapper($tree));
411    }
412
413    /**
414     * @param Tree $tree
415     * @param int  $limit
416     * @param int  $offset
417     *
418     * @return Collection<Individual>
419     */
420    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
421    {
422        return DB::table('individuals')
423            ->where('i_file', '=', $tree->id())
424            ->orderBy('i_id')
425            ->skip($offset)
426            ->take($limit)
427            ->get()
428            ->map(Factory::individual()->mapper($tree));
429    }
430
431    /**
432     * @param Tree $tree
433     * @param int  $limit
434     * @param int  $offset
435     *
436     * @return Collection<Media>
437     */
438    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
439    {
440        return DB::table('media')
441            ->where('m_file', '=', $tree->id())
442            ->orderBy('m_id')
443            ->skip($offset)
444            ->take($limit)
445            ->get()
446            ->map(Factory::media()->mapper($tree));
447    }
448
449    /**
450     * @param Tree $tree
451     * @param int  $limit
452     * @param int  $offset
453     *
454     * @return Collection<Note>
455     */
456    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
457    {
458        return DB::table('other')
459            ->where('o_file', '=', $tree->id())
460            ->where('o_type', '=', Note::RECORD_TYPE)
461            ->orderBy('o_id')
462            ->skip($offset)
463            ->take($limit)
464            ->get()
465            ->map(Factory::note()->mapper($tree));
466    }
467
468    /**
469     * @param Tree $tree
470     * @param int  $limit
471     * @param int  $offset
472     *
473     * @return Collection<Repository>
474     */
475    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
476    {
477        return DB::table('other')
478            ->where('o_file', '=', $tree->id())
479            ->where('o_type', '=', Repository::RECORD_TYPE)
480            ->orderBy('o_id')
481            ->skip($offset)
482            ->take($limit)
483            ->get()
484            ->map(Factory::repository()->mapper($tree));
485    }
486
487    /**
488     * @param Tree $tree
489     * @param int  $limit
490     * @param int  $offset
491     *
492     * @return Collection<Source>
493     */
494    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
495    {
496        return DB::table('sources')
497            ->where('s_file', '=', $tree->id())
498            ->orderBy('s_id')
499            ->skip($offset)
500            ->take($limit)
501            ->get()
502            ->map(Factory::source()->mapper($tree));
503    }
504
505    /**
506     * @param Tree $tree
507     * @param int  $limit
508     * @param int  $offset
509     *
510     * @return Collection<Submitter>
511     */
512    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
513    {
514        return DB::table('other')
515            ->where('o_file', '=', $tree->id())
516            ->where('o_type', '=', Submitter::RECORD_TYPE)
517            ->orderBy('o_id')
518            ->skip($offset)
519            ->take($limit)
520            ->get()
521            ->map(Factory::submitter()->mapper($tree));
522    }
523}
524