xref: /webtrees/app/Module/SiteMapModule.php (revision f5fab074fe3b714ee74c1a5752657e80fc9b66fb)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Aura\Router\Route;
23use Aura\Router\RouterContainer;
24use Fig\Http\Message\StatusCodeInterface;
25use Fisharebest\Webtrees\Auth;
26use Fisharebest\Webtrees\Family;
27use Fisharebest\Webtrees\FlashMessages;
28use Fisharebest\Webtrees\GedcomRecord;
29use Fisharebest\Webtrees\Html;
30use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
31use Fisharebest\Webtrees\I18N;
32use Fisharebest\Webtrees\Individual;
33use Fisharebest\Webtrees\Media;
34use Fisharebest\Webtrees\Note;
35use Fisharebest\Webtrees\Registry;
36use Fisharebest\Webtrees\Repository;
37use Fisharebest\Webtrees\Services\TreeService;
38use Fisharebest\Webtrees\Source;
39use Fisharebest\Webtrees\Submitter;
40use Fisharebest\Webtrees\Tree;
41use Fisharebest\Webtrees\Validator;
42use Illuminate\Database\Capsule\Manager as DB;
43use Illuminate\Database\Query\Expression;
44use Illuminate\Support\Collection;
45use Psr\Http\Message\ResponseInterface;
46use Psr\Http\Message\ServerRequestInterface;
47use Psr\Http\Server\RequestHandlerInterface;
48
49use function app;
50use function assert;
51use function date;
52use function redirect;
53use function response;
54use function route;
55use function view;
56
57/**
58 * Class SiteMapModule
59 */
60class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
61{
62    use ModuleConfigTrait;
63
64    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
65    private const CACHE_LIFE         = 209600; // Two weeks
66
67    private const PRIORITY = [
68        Family::RECORD_TYPE     => 0.7,
69        Individual::RECORD_TYPE => 0.9,
70        Media::RECORD_TYPE      => 0.5,
71        Note::RECORD_TYPE       => 0.3,
72        Repository::RECORD_TYPE => 0.5,
73        Source::RECORD_TYPE     => 0.5,
74        Submitter::RECORD_TYPE  => 0.3,
75    ];
76
77    private TreeService $tree_service;
78
79    /**
80     * TreesMenuModule constructor.
81     *
82     * @param TreeService $tree_service
83     */
84    public function __construct(TreeService $tree_service)
85    {
86        $this->tree_service = $tree_service;
87    }
88
89    /**
90     * Initialization.
91     *
92     * @return void
93     */
94    public function boot(): void
95    {
96        Registry::routeFactory()->routeMap()
97            ->get('sitemap-style', '/sitemap.xsl', $this);
98
99        Registry::routeFactory()->routeMap()
100            ->get('sitemap-index', '/sitemap.xml', $this);
101
102        Registry::routeFactory()->routeMap()
103            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
104    }
105
106    /**
107     * A sentence describing what this module does.
108     *
109     * @return string
110     */
111    public function description(): string
112    {
113        /* I18N: Description of the “Sitemaps” module */
114        return I18N::translate('Generate sitemap files for search engines.');
115    }
116
117    /**
118     * Should this module be enabled when it is first installed?
119     *
120     * @return bool
121     */
122    public function isEnabledByDefault(): bool
123    {
124        return false;
125    }
126
127    /**
128     * @param ServerRequestInterface $request
129     *
130     * @return ResponseInterface
131     */
132    public function getAdminAction(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface
133    {
134        $this->layout = 'layouts/administration';
135
136        $sitemap_url = route('sitemap-index');
137
138        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
139        $submit_urls = [
140            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
141            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
142        ];
143
144        return $this->viewResponse('modules/sitemap/config', [
145            'all_trees'   => $this->tree_service->all(),
146            'sitemap_url' => $sitemap_url,
147            'submit_urls' => $submit_urls,
148            'title'       => $this->title(),
149        ]);
150    }
151
152    /**
153     * How should this module be identified in the control panel, etc.?
154     *
155     * @return string
156     */
157    public function title(): string
158    {
159        /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */
160        return I18N::translate('Sitemaps');
161    }
162
163    /**
164     * @param ServerRequestInterface $request
165     *
166     * @return ResponseInterface
167     */
168    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
169    {
170        $params = (array) $request->getParsedBody();
171
172        foreach ($this->tree_service->all() as $tree) {
173            $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false);
174            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
175        }
176
177        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
178
179        return redirect($this->getConfigLink());
180    }
181
182    /**
183     * @param ServerRequestInterface $request
184     *
185     * @return ResponseInterface
186     */
187    public function handle(ServerRequestInterface $request): ResponseInterface
188    {
189        $route = Validator::attributes($request)->route();
190
191        if ($route->name === 'sitemap-style') {
192            $content = view('modules/sitemap/sitemap-xsl');
193
194            return response($content, StatusCodeInterface::STATUS_OK, [
195                'Content-Type' => 'application/xml',
196            ]);
197        }
198
199        if ($route->name === 'sitemap-index') {
200            return $this->siteMapIndex($request);
201        }
202
203        return $this->siteMapFile($request);
204    }
205
206    /**
207     * @param ServerRequestInterface $request
208     *
209     * @return ResponseInterface
210     */
211    private function siteMapIndex(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface
212    {
213        $content = Registry::cache()->file()->remember('sitemap.xml', function (): string {
214            // Which trees have sitemaps enabled?
215            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
216                return $tree->getPreference('include_in_sitemap') === '1';
217            })->map(static function (Tree $tree): int {
218                return $tree->id();
219            });
220
221            $count_families = DB::table('families')
222                ->join('gedcom', 'f_file', '=', 'gedcom_id')
223                ->whereIn('gedcom_id', $tree_ids)
224                ->groupBy(['gedcom_id'])
225                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
226                ->pluck('total', 'gedcom_name');
227
228            $count_individuals = DB::table('individuals')
229                ->join('gedcom', 'i_file', '=', 'gedcom_id')
230                ->whereIn('gedcom_id', $tree_ids)
231                ->groupBy(['gedcom_id'])
232                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
233                ->pluck('total', 'gedcom_name');
234
235            $count_media = DB::table('media')
236                ->join('gedcom', 'm_file', '=', 'gedcom_id')
237                ->whereIn('gedcom_id', $tree_ids)
238                ->groupBy(['gedcom_id'])
239                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
240                ->pluck('total', 'gedcom_name');
241
242            $count_notes = DB::table('other')
243                ->join('gedcom', 'o_file', '=', 'gedcom_id')
244                ->whereIn('gedcom_id', $tree_ids)
245                ->where('o_type', '=', Note::RECORD_TYPE)
246                ->groupBy(['gedcom_id'])
247                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
248                ->pluck('total', 'gedcom_name');
249
250            $count_repositories = DB::table('other')
251                ->join('gedcom', 'o_file', '=', 'gedcom_id')
252                ->whereIn('gedcom_id', $tree_ids)
253                ->where('o_type', '=', Repository::RECORD_TYPE)
254                ->groupBy(['gedcom_id'])
255                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
256                ->pluck('total', 'gedcom_name');
257
258            $count_sources = DB::table('sources')
259                ->join('gedcom', 's_file', '=', 'gedcom_id')
260                ->whereIn('gedcom_id', $tree_ids)
261                ->groupBy(['gedcom_id'])
262                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
263                ->pluck('total', 'gedcom_name');
264
265            $count_submitters = DB::table('other')
266                ->join('gedcom', 'o_file', '=', 'gedcom_id')
267                ->whereIn('gedcom_id', $tree_ids)
268                ->where('o_type', '=', Submitter::RECORD_TYPE)
269                ->groupBy(['gedcom_id'])
270                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
271                ->pluck('total', 'gedcom_name');
272
273            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
274            DB::table('module_setting')
275                ->where('module_name', '=', $this->name())
276                ->delete();
277
278            return view('modules/sitemap/sitemap-index-xml', [
279                'all_trees'          => $this->tree_service->all(),
280                'count_families'     => $count_families,
281                'count_individuals'  => $count_individuals,
282                'count_media'        => $count_media,
283                'count_notes'        => $count_notes,
284                'count_repositories' => $count_repositories,
285                'count_sources'      => $count_sources,
286                'count_submitters'   => $count_submitters,
287                'last_mod'           => date('Y-m-d'),
288                'records_per_volume' => self::RECORDS_PER_VOLUME,
289                'sitemap_xsl'        => route('sitemap-style'),
290            ]);
291        }, self::CACHE_LIFE);
292
293        return response($content, StatusCodeInterface::STATUS_OK, [
294            'Content-Type' => 'application/xml',
295        ]);
296    }
297
298    /**
299     * @param ServerRequestInterface $request
300     *
301     * @return ResponseInterface
302     */
303    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
304    {
305        $tree = Validator::attributes($request)->tree('tree');
306        $type = Validator::attributes($request)->string('type');
307        $page = Validator::attributes($request)->integer('page');
308
309        if ($tree->getPreference('include_in_sitemap') !== '1') {
310            throw new HttpNotFoundException();
311        }
312
313        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
314
315        $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string {
316            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
317
318            return view('modules/sitemap/sitemap-file-xml', [
319                'priority'    => self::PRIORITY[$type],
320                'records'     => $records,
321                'sitemap_xsl' => route('sitemap-style'),
322                'tree'        => $tree,
323            ]);
324        }, self::CACHE_LIFE);
325
326        return response($content, StatusCodeInterface::STATUS_OK, [
327            'Content-Type' => 'application/xml',
328        ]);
329    }
330
331    /**
332     * @param Tree   $tree
333     * @param string $type
334     * @param int    $limit
335     * @param int    $offset
336     *
337     * @return Collection<int,GedcomRecord>
338     */
339    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
340    {
341        switch ($type) {
342            case Family::RECORD_TYPE:
343                $records = $this->sitemapFamilies($tree, $limit, $offset);
344                break;
345
346            case Individual::RECORD_TYPE:
347                $records = $this->sitemapIndividuals($tree, $limit, $offset);
348                break;
349
350            case Media::RECORD_TYPE:
351                $records = $this->sitemapMedia($tree, $limit, $offset);
352                break;
353
354            case Note::RECORD_TYPE:
355                $records = $this->sitemapNotes($tree, $limit, $offset);
356                break;
357
358            case Repository::RECORD_TYPE:
359                $records = $this->sitemapRepositories($tree, $limit, $offset);
360                break;
361
362            case Source::RECORD_TYPE:
363                $records = $this->sitemapSources($tree, $limit, $offset);
364                break;
365
366            case Submitter::RECORD_TYPE:
367                $records = $this->sitemapSubmitters($tree, $limit, $offset);
368                break;
369
370            default:
371                throw new HttpNotFoundException('Invalid record type: ' . $type);
372        }
373
374        // Skip private records.
375        $records = $records->filter(static function (GedcomRecord $record): bool {
376            return $record->canShow(Auth::PRIV_PRIVATE);
377        });
378
379        return $records;
380    }
381
382    /**
383     * @param Tree $tree
384     * @param int  $limit
385     * @param int  $offset
386     *
387     * @return Collection<int,Family>
388     */
389    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
390    {
391        return DB::table('families')
392            ->where('f_file', '=', $tree->id())
393            ->orderBy('f_id')
394            ->skip($offset)
395            ->take($limit)
396            ->get()
397            ->map(Registry::familyFactory()->mapper($tree));
398    }
399
400    /**
401     * @param Tree $tree
402     * @param int  $limit
403     * @param int  $offset
404     *
405     * @return Collection<int,Individual>
406     */
407    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
408    {
409        return DB::table('individuals')
410            ->where('i_file', '=', $tree->id())
411            ->orderBy('i_id')
412            ->skip($offset)
413            ->take($limit)
414            ->get()
415            ->map(Registry::individualFactory()->mapper($tree));
416    }
417
418    /**
419     * @param Tree $tree
420     * @param int  $limit
421     * @param int  $offset
422     *
423     * @return Collection<int,Media>
424     */
425    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
426    {
427        return DB::table('media')
428            ->where('m_file', '=', $tree->id())
429            ->orderBy('m_id')
430            ->skip($offset)
431            ->take($limit)
432            ->get()
433            ->map(Registry::mediaFactory()->mapper($tree));
434    }
435
436    /**
437     * @param Tree $tree
438     * @param int  $limit
439     * @param int  $offset
440     *
441     * @return Collection<int,Note>
442     */
443    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
444    {
445        return DB::table('other')
446            ->where('o_file', '=', $tree->id())
447            ->where('o_type', '=', Note::RECORD_TYPE)
448            ->orderBy('o_id')
449            ->skip($offset)
450            ->take($limit)
451            ->get()
452            ->map(Registry::noteFactory()->mapper($tree));
453    }
454
455    /**
456     * @param Tree $tree
457     * @param int  $limit
458     * @param int  $offset
459     *
460     * @return Collection<int,Repository>
461     */
462    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
463    {
464        return DB::table('other')
465            ->where('o_file', '=', $tree->id())
466            ->where('o_type', '=', Repository::RECORD_TYPE)
467            ->orderBy('o_id')
468            ->skip($offset)
469            ->take($limit)
470            ->get()
471            ->map(Registry::repositoryFactory()->mapper($tree));
472    }
473
474    /**
475     * @param Tree $tree
476     * @param int  $limit
477     * @param int  $offset
478     *
479     * @return Collection<int,Source>
480     */
481    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
482    {
483        return DB::table('sources')
484            ->where('s_file', '=', $tree->id())
485            ->orderBy('s_id')
486            ->skip($offset)
487            ->take($limit)
488            ->get()
489            ->map(Registry::sourceFactory()->mapper($tree));
490    }
491
492    /**
493     * @param Tree $tree
494     * @param int  $limit
495     * @param int  $offset
496     *
497     * @return Collection<int,Submitter>
498     */
499    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
500    {
501        return DB::table('other')
502            ->where('o_file', '=', $tree->id())
503            ->where('o_type', '=', Submitter::RECORD_TYPE)
504            ->orderBy('o_id')
505            ->skip($offset)
506            ->take($limit)
507            ->get()
508            ->map(Registry::submitterFactory()->mapper($tree));
509    }
510}
511