xref: /webtrees/app/Module/SiteMapModule.php (revision 1b47c2feedb65f946198e7c18aeb4286b98ceeb5)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Aura\Router\Route;
23use Aura\Router\RouterContainer;
24use Fig\Http\Message\StatusCodeInterface;
25use Fisharebest\Webtrees\Auth;
26use Fisharebest\Webtrees\Cache;
27use Fisharebest\Webtrees\Exceptions\HttpNotFoundException;
28use Fisharebest\Webtrees\Family;
29use Fisharebest\Webtrees\FlashMessages;
30use Fisharebest\Webtrees\GedcomRecord;
31use Fisharebest\Webtrees\Html;
32use Fisharebest\Webtrees\I18N;
33use Fisharebest\Webtrees\Individual;
34use Fisharebest\Webtrees\Media;
35use Fisharebest\Webtrees\Note;
36use Fisharebest\Webtrees\Repository;
37use Fisharebest\Webtrees\Services\TreeService;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submitter;
40use Fisharebest\Webtrees\Tree;
41use Illuminate\Database\Capsule\Manager as DB;
42use Illuminate\Database\Query\Expression;
43use Illuminate\Support\Collection;
44use Psr\Http\Message\ResponseInterface;
45use Psr\Http\Message\ServerRequestInterface;
46use Psr\Http\Server\RequestHandlerInterface;
47
48use function app;
49use function assert;
50use function date;
51use function redirect;
52use function response;
53use function route;
54use function view;
55
56/**
57 * Class SiteMapModule
58 */
59class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
60{
61    use ModuleConfigTrait;
62
63    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
64    private const CACHE_LIFE         = 209600; // Two weeks
65
66    private const PRIORITY = [
67        Family::RECORD_TYPE     => 0.7,
68        Individual::RECORD_TYPE => 0.9,
69        Media::RECORD_TYPE      => 0.5,
70        Note::RECORD_TYPE       => 0.3,
71        Repository::RECORD_TYPE => 0.5,
72        Source::RECORD_TYPE     => 0.5,
73        Submitter::RECORD_TYPE  => 0.3,
74    ];
75
76    /** @var TreeService */
77    private $tree_service;
78
79    /**
80     * TreesMenuModule constructor.
81     *
82     * @param TreeService $tree_service
83     */
84    public function __construct(TreeService $tree_service)
85    {
86        $this->tree_service = $tree_service;
87    }
88
89    /**
90     * Initialization.
91     *
92     * @return void
93     */
94    public function boot(): void
95    {
96        $router_container = app(RouterContainer::class);
97        assert($router_container instanceof RouterContainer);
98
99        $router_container->getMap()
100            ->get('sitemap-style', '/sitemap.xsl', $this);
101
102        $router_container->getMap()
103            ->get('sitemap-index', '/sitemap.xml', $this);
104
105        $router_container->getMap()
106            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
107    }
108
109    /**
110     * A sentence describing what this module does.
111     *
112     * @return string
113     */
114    public function description(): string
115    {
116        /* I18N: Description of the “Sitemaps” module */
117        return I18N::translate('Generate sitemap files for search engines.');
118    }
119
120    /**
121     * Should this module be enabled when it is first installed?
122     *
123     * @return bool
124     */
125    public function isEnabledByDefault(): bool
126    {
127        return false;
128    }
129
130    /**
131     * @param ServerRequestInterface $request
132     *
133     * @return ResponseInterface
134     */
135    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
136    {
137        $this->layout = 'layouts/administration';
138
139        $sitemap_url = route('sitemap-index');
140
141        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
142        $submit_urls = [
143            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
144            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
145        ];
146
147        return $this->viewResponse('modules/sitemap/config', [
148            'all_trees'   => $this->tree_service->all(),
149            'sitemap_url' => $sitemap_url,
150            'submit_urls' => $submit_urls,
151            'title'       => $this->title(),
152        ]);
153    }
154
155    /**
156     * How should this module be identified in the control panel, etc.?
157     *
158     * @return string
159     */
160    public function title(): string
161    {
162        /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */
163        return I18N::translate('Sitemaps');
164    }
165
166    /**
167     * @param ServerRequestInterface $request
168     *
169     * @return ResponseInterface
170     */
171    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
172    {
173        $params = (array) $request->getParsedBody();
174
175        foreach ($this->tree_service->all() as $tree) {
176            $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false);
177            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
178        }
179
180        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
181
182        return redirect($this->getConfigLink());
183    }
184
185    /**
186     * @param ServerRequestInterface $request
187     *
188     * @return ResponseInterface
189     */
190    public function handle(ServerRequestInterface $request): ResponseInterface
191    {
192        $route = $request->getAttribute('route');
193        assert($route instanceof Route);
194
195        if ($route->name === 'sitemap-style') {
196            $content = view('modules/sitemap/sitemap-xsl');
197
198            return response($content, StatusCodeInterface::STATUS_OK, [
199                'Content-Type' => 'application/xml',
200            ]);
201        }
202
203        if ($route->name === 'sitemap-index') {
204            return $this->siteMapIndex($request);
205        }
206
207        return $this->siteMapFile($request);
208    }
209
210    /**
211     * @param ServerRequestInterface $request
212     *
213     * @return ResponseInterface
214     */
215    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
216    {
217        $cache = app('cache.files');
218        assert($cache instanceof Cache);
219
220        $content = $cache->remember('sitemap.xml', function (): string {
221            // Which trees have sitemaps enabled?
222            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
223                return $tree->getPreference('include_in_sitemap') === '1';
224            })->map(static function (Tree $tree): int {
225                return $tree->id();
226            });
227
228            $count_families = DB::table('families')
229                ->join('gedcom', 'f_file', '=', 'gedcom_id')
230                ->whereIn('gedcom_id', $tree_ids)
231                ->groupBy(['gedcom_id'])
232                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
233                ->pluck('total', 'gedcom_name');
234
235            $count_individuals = DB::table('individuals')
236                ->join('gedcom', 'i_file', '=', 'gedcom_id')
237                ->whereIn('gedcom_id', $tree_ids)
238                ->groupBy(['gedcom_id'])
239                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
240                ->pluck('total', 'gedcom_name');
241
242            $count_media = DB::table('media')
243                ->join('gedcom', 'm_file', '=', 'gedcom_id')
244                ->whereIn('gedcom_id', $tree_ids)
245                ->groupBy(['gedcom_id'])
246                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
247                ->pluck('total', 'gedcom_name');
248
249            $count_notes = DB::table('other')
250                ->join('gedcom', 'o_file', '=', 'gedcom_id')
251                ->whereIn('gedcom_id', $tree_ids)
252                ->where('o_type', '=', Note::RECORD_TYPE)
253                ->groupBy(['gedcom_id'])
254                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
255                ->pluck('total', 'gedcom_name');
256
257            $count_repositories = DB::table('other')
258                ->join('gedcom', 'o_file', '=', 'gedcom_id')
259                ->whereIn('gedcom_id', $tree_ids)
260                ->where('o_type', '=', Repository::RECORD_TYPE)
261                ->groupBy(['gedcom_id'])
262                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
263                ->pluck('total', 'gedcom_name');
264
265            $count_sources = DB::table('sources')
266                ->join('gedcom', 's_file', '=', 'gedcom_id')
267                ->whereIn('gedcom_id', $tree_ids)
268                ->groupBy(['gedcom_id'])
269                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
270                ->pluck('total', 'gedcom_name');
271
272            $count_submitters = DB::table('other')
273                ->join('gedcom', 'o_file', '=', 'gedcom_id')
274                ->whereIn('gedcom_id', $tree_ids)
275                ->where('o_type', '=', Submitter::RECORD_TYPE)
276                ->groupBy(['gedcom_id'])
277                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
278                ->pluck('total', 'gedcom_name');
279
280            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
281            DB::table('module_setting')
282                ->where('module_name', '=', $this->name())
283                ->delete();
284
285            return view('modules/sitemap/sitemap-index-xml', [
286                'all_trees'          => $this->tree_service->all(),
287                'count_families'     => $count_families,
288                'count_individuals'  => $count_individuals,
289                'count_media'        => $count_media,
290                'count_notes'        => $count_notes,
291                'count_repositories' => $count_repositories,
292                'count_sources'      => $count_sources,
293                'count_submitters'   => $count_submitters,
294                'last_mod'           => date('Y-m-d'),
295                'records_per_volume' => self::RECORDS_PER_VOLUME,
296                'sitemap_xsl'        => route('sitemap-style'),
297            ]);
298        }, self::CACHE_LIFE);
299
300        return response($content, StatusCodeInterface::STATUS_OK, [
301            'Content-Type' => 'application/xml',
302        ]);
303    }
304
305    /**
306     * @param ServerRequestInterface $request
307     *
308     * @return ResponseInterface
309     */
310    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
311    {
312        $tree = $request->getAttribute('tree');
313        assert($tree instanceof Tree);
314
315        $type = $request->getAttribute('type');
316        $page = (int) $request->getAttribute('page');
317
318        if ($tree->getPreference('include_in_sitemap') !== '1') {
319            throw new HttpNotFoundException();
320        }
321
322        $cache = app('cache.files');
323        assert($cache instanceof Cache);
324
325        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
326
327        $content = $cache->remember($cache_key, function () use ($tree, $type, $page): string {
328            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
329
330            return view('modules/sitemap/sitemap-file-xml', [
331                'priority'    => self::PRIORITY[$type],
332                'records'     => $records,
333                'sitemap_xsl' => route('sitemap-style'),
334                'tree'        => $tree,
335            ]);
336        }, self::CACHE_LIFE);
337
338        return response($content, StatusCodeInterface::STATUS_OK, [
339            'Content-Type' => 'application/xml',
340        ]);
341    }
342
343    /**
344     * @param Tree   $tree
345     * @param string $type
346     * @param int    $limit
347     * @param int    $offset
348     *
349     * @return Collection<GedcomRecord>
350     */
351    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
352    {
353        switch ($type) {
354            case Family::RECORD_TYPE:
355                $records = $this->sitemapFamilies($tree, $limit, $offset);
356                break;
357
358            case Individual::RECORD_TYPE:
359                $records = $this->sitemapIndividuals($tree, $limit, $offset);
360                break;
361
362            case Media::RECORD_TYPE:
363                $records = $this->sitemapMedia($tree, $limit, $offset);
364                break;
365
366            case Note::RECORD_TYPE:
367                $records = $this->sitemapNotes($tree, $limit, $offset);
368                break;
369
370            case Repository::RECORD_TYPE:
371                $records = $this->sitemapRepositories($tree, $limit, $offset);
372                break;
373
374            case Source::RECORD_TYPE:
375                $records = $this->sitemapSources($tree, $limit, $offset);
376                break;
377
378            case Submitter::RECORD_TYPE:
379                $records = $this->sitemapSubmitters($tree, $limit, $offset);
380                break;
381
382            default:
383                throw new HttpNotFoundException('Invalid record type: ' . $type);
384        }
385
386        // Skip private records.
387        $records = $records->filter(static function (GedcomRecord $record): bool {
388            return $record->canShow(Auth::PRIV_PRIVATE);
389        });
390
391        return $records;
392    }
393
394    /**
395     * @param Tree $tree
396     * @param int  $limit
397     * @param int  $offset
398     *
399     * @return Collection<Family>
400     */
401    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
402    {
403        return DB::table('families')
404            ->where('f_file', '=', $tree->id())
405            ->orderBy('f_id')
406            ->skip($offset)
407            ->take($limit)
408            ->get()
409            ->map(Family::rowMapper($tree));
410    }
411
412    /**
413     * @param Tree $tree
414     * @param int  $limit
415     * @param int  $offset
416     *
417     * @return Collection<Individual>
418     */
419    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
420    {
421        return DB::table('individuals')
422            ->where('i_file', '=', $tree->id())
423            ->orderBy('i_id')
424            ->skip($offset)
425            ->take($limit)
426            ->get()
427            ->map(Individual::rowMapper($tree));
428    }
429
430    /**
431     * @param Tree $tree
432     * @param int  $limit
433     * @param int  $offset
434     *
435     * @return Collection<Media>
436     */
437    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
438    {
439        return DB::table('media')
440            ->where('m_file', '=', $tree->id())
441            ->orderBy('m_id')
442            ->skip($offset)
443            ->take($limit)
444            ->get()
445            ->map(Media::rowMapper($tree));
446    }
447
448    /**
449     * @param Tree $tree
450     * @param int  $limit
451     * @param int  $offset
452     *
453     * @return Collection<Note>
454     */
455    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
456    {
457        return DB::table('other')
458            ->where('o_file', '=', $tree->id())
459            ->where('o_type', '=', Note::RECORD_TYPE)
460            ->orderBy('o_id')
461            ->skip($offset)
462            ->take($limit)
463            ->get()
464            ->map(Note::rowMapper($tree));
465    }
466
467    /**
468     * @param Tree $tree
469     * @param int  $limit
470     * @param int  $offset
471     *
472     * @return Collection<Repository>
473     */
474    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
475    {
476        return DB::table('other')
477            ->where('o_file', '=', $tree->id())
478            ->where('o_type', '=', Repository::RECORD_TYPE)
479            ->orderBy('o_id')
480            ->skip($offset)
481            ->take($limit)
482            ->get()
483            ->map(Repository::rowMapper($tree));
484    }
485
486    /**
487     * @param Tree $tree
488     * @param int  $limit
489     * @param int  $offset
490     *
491     * @return Collection<Source>
492     */
493    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
494    {
495        return DB::table('sources')
496            ->where('s_file', '=', $tree->id())
497            ->orderBy('s_id')
498            ->skip($offset)
499            ->take($limit)
500            ->get()
501            ->map(Source::rowMapper($tree));
502    }
503
504    /**
505     * @param Tree $tree
506     * @param int  $limit
507     * @param int  $offset
508     *
509     * @return Collection<Submitter>
510     */
511    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
512    {
513        return DB::table('other')
514            ->where('o_file', '=', $tree->id())
515            ->where('o_type', '=', Submitter::RECORD_TYPE)
516            ->orderBy('o_id')
517            ->skip($offset)
518            ->take($limit)
519            ->get()
520            ->map(Submitter::rowMapper($tree));
521    }
522}
523