xref: /webtrees/app/Module/SiteMapModule.php (revision acb90a3897a0c2932393544961e7311805adb49a)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fig\Http\Message\StatusCodeInterface;
23use Fisharebest\Webtrees\Auth;
24use Fisharebest\Webtrees\Family;
25use Fisharebest\Webtrees\FlashMessages;
26use Fisharebest\Webtrees\GedcomRecord;
27use Fisharebest\Webtrees\Html;
28use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
29use Fisharebest\Webtrees\I18N;
30use Fisharebest\Webtrees\Individual;
31use Fisharebest\Webtrees\Media;
32use Fisharebest\Webtrees\Note;
33use Fisharebest\Webtrees\Registry;
34use Fisharebest\Webtrees\Repository;
35use Fisharebest\Webtrees\Services\TreeService;
36use Fisharebest\Webtrees\Source;
37use Fisharebest\Webtrees\Submitter;
38use Fisharebest\Webtrees\Tree;
39use Fisharebest\Webtrees\Validator;
40use Illuminate\Database\Capsule\Manager as DB;
41use Illuminate\Database\Query\Expression;
42use Illuminate\Support\Collection;
43use Psr\Http\Message\ResponseInterface;
44use Psr\Http\Message\ServerRequestInterface;
45use Psr\Http\Server\RequestHandlerInterface;
46
47use function date;
48use function redirect;
49use function response;
50use function route;
51use function view;
52
53/**
54 * Class SiteMapModule
55 */
56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
57{
58    use ModuleConfigTrait;
59
60    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
61    private const CACHE_LIFE         = 209600; // Two weeks
62
63    private const PRIORITY = [
64        Family::RECORD_TYPE     => 0.7,
65        Individual::RECORD_TYPE => 0.9,
66        Media::RECORD_TYPE      => 0.5,
67        Note::RECORD_TYPE       => 0.3,
68        Repository::RECORD_TYPE => 0.5,
69        Source::RECORD_TYPE     => 0.5,
70        Submitter::RECORD_TYPE  => 0.3,
71    ];
72
73    private TreeService $tree_service;
74
75    /**
76     * TreesMenuModule constructor.
77     *
78     * @param TreeService $tree_service
79     */
80    public function __construct(TreeService $tree_service)
81    {
82        $this->tree_service = $tree_service;
83    }
84
85    /**
86     * Initialization.
87     *
88     * @return void
89     */
90    public function boot(): void
91    {
92        Registry::routeFactory()->routeMap()
93            ->get('sitemap-style', '/sitemap.xsl', $this);
94
95        Registry::routeFactory()->routeMap()
96            ->get('sitemap-index', '/sitemap.xml', $this);
97
98        Registry::routeFactory()->routeMap()
99            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
100    }
101
102    /**
103     * A sentence describing what this module does.
104     *
105     * @return string
106     */
107    public function description(): string
108    {
109        /* I18N: Description of the “Sitemaps” module */
110        return I18N::translate('Generate sitemap files for search engines.');
111    }
112
113    /**
114     * Should this module be enabled when it is first installed?
115     *
116     * @return bool
117     */
118    public function isEnabledByDefault(): bool
119    {
120        return false;
121    }
122
123    /**
124     * @param ServerRequestInterface $request
125     *
126     * @return ResponseInterface
127     */
128    public function getAdminAction(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface
129    {
130        $this->layout = 'layouts/administration';
131
132        $sitemap_url = route('sitemap-index');
133
134        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
135        $submit_urls = [
136            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
137            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
138        ];
139
140        return $this->viewResponse('modules/sitemap/config', [
141            'all_trees'   => $this->tree_service->all(),
142            'sitemap_url' => $sitemap_url,
143            'submit_urls' => $submit_urls,
144            'title'       => $this->title(),
145        ]);
146    }
147
148    /**
149     * How should this module be identified in the control panel, etc.?
150     *
151     * @return string
152     */
153    public function title(): string
154    {
155        /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */
156        return I18N::translate('Sitemaps');
157    }
158
159    /**
160     * @param ServerRequestInterface $request
161     *
162     * @return ResponseInterface
163     */
164    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
165    {
166        $params = (array) $request->getParsedBody();
167
168        foreach ($this->tree_service->all() as $tree) {
169            $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false);
170            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
171        }
172
173        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
174
175        return redirect($this->getConfigLink());
176    }
177
178    /**
179     * @param ServerRequestInterface $request
180     *
181     * @return ResponseInterface
182     */
183    public function handle(ServerRequestInterface $request): ResponseInterface
184    {
185        $route = Validator::attributes($request)->route();
186
187        if ($route->name === 'sitemap-style') {
188            $content = view('modules/sitemap/sitemap-xsl');
189
190            return response($content, StatusCodeInterface::STATUS_OK, [
191                'content-type' => 'application/xml',
192            ]);
193        }
194
195        if ($route->name === 'sitemap-index') {
196            return $this->siteMapIndex($request);
197        }
198
199        return $this->siteMapFile($request);
200    }
201
202    /**
203     * @param ServerRequestInterface $request
204     *
205     * @return ResponseInterface
206     */
207    private function siteMapIndex(/** @scrutinizer ignore-unused */ ServerRequestInterface $request): ResponseInterface
208    {
209        $content = Registry::cache()->file()->remember('sitemap.xml', function (): string {
210            // Which trees have sitemaps enabled?
211            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
212                return $tree->getPreference('include_in_sitemap') === '1';
213            })->map(static function (Tree $tree): int {
214                return $tree->id();
215            });
216
217            $count_families = DB::table('families')
218                ->join('gedcom', 'f_file', '=', 'gedcom_id')
219                ->whereIn('gedcom_id', $tree_ids)
220                ->groupBy(['gedcom_id'])
221                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
222                ->pluck('total', 'gedcom_name');
223
224            $count_individuals = DB::table('individuals')
225                ->join('gedcom', 'i_file', '=', 'gedcom_id')
226                ->whereIn('gedcom_id', $tree_ids)
227                ->groupBy(['gedcom_id'])
228                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
229                ->pluck('total', 'gedcom_name');
230
231            $count_media = DB::table('media')
232                ->join('gedcom', 'm_file', '=', 'gedcom_id')
233                ->whereIn('gedcom_id', $tree_ids)
234                ->groupBy(['gedcom_id'])
235                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
236                ->pluck('total', 'gedcom_name');
237
238            $count_notes = DB::table('other')
239                ->join('gedcom', 'o_file', '=', 'gedcom_id')
240                ->whereIn('gedcom_id', $tree_ids)
241                ->where('o_type', '=', Note::RECORD_TYPE)
242                ->groupBy(['gedcom_id'])
243                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
244                ->pluck('total', 'gedcom_name');
245
246            $count_repositories = DB::table('other')
247                ->join('gedcom', 'o_file', '=', 'gedcom_id')
248                ->whereIn('gedcom_id', $tree_ids)
249                ->where('o_type', '=', Repository::RECORD_TYPE)
250                ->groupBy(['gedcom_id'])
251                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
252                ->pluck('total', 'gedcom_name');
253
254            $count_sources = DB::table('sources')
255                ->join('gedcom', 's_file', '=', 'gedcom_id')
256                ->whereIn('gedcom_id', $tree_ids)
257                ->groupBy(['gedcom_id'])
258                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
259                ->pluck('total', 'gedcom_name');
260
261            $count_submitters = DB::table('other')
262                ->join('gedcom', 'o_file', '=', 'gedcom_id')
263                ->whereIn('gedcom_id', $tree_ids)
264                ->where('o_type', '=', Submitter::RECORD_TYPE)
265                ->groupBy(['gedcom_id'])
266                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
267                ->pluck('total', 'gedcom_name');
268
269            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
270            DB::table('module_setting')
271                ->where('module_name', '=', $this->name())
272                ->delete();
273
274            return view('modules/sitemap/sitemap-index-xml', [
275                'all_trees'          => $this->tree_service->all(),
276                'count_families'     => $count_families,
277                'count_individuals'  => $count_individuals,
278                'count_media'        => $count_media,
279                'count_notes'        => $count_notes,
280                'count_repositories' => $count_repositories,
281                'count_sources'      => $count_sources,
282                'count_submitters'   => $count_submitters,
283                'last_mod'           => date('Y-m-d'),
284                'records_per_volume' => self::RECORDS_PER_VOLUME,
285                'sitemap_xsl'        => route('sitemap-style'),
286            ]);
287        }, self::CACHE_LIFE);
288
289        return response($content, StatusCodeInterface::STATUS_OK, [
290            'content-type' => 'application/xml',
291        ]);
292    }
293
294    /**
295     * @param ServerRequestInterface $request
296     *
297     * @return ResponseInterface
298     */
299    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
300    {
301        $tree = Validator::attributes($request)->tree('tree');
302        $type = Validator::attributes($request)->string('type');
303        $page = Validator::attributes($request)->integer('page');
304
305        if ($tree->getPreference('include_in_sitemap') !== '1') {
306            throw new HttpNotFoundException();
307        }
308
309        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
310
311        $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string {
312            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
313
314            return view('modules/sitemap/sitemap-file-xml', [
315                'priority'    => self::PRIORITY[$type],
316                'records'     => $records,
317                'sitemap_xsl' => route('sitemap-style'),
318                'tree'        => $tree,
319            ]);
320        }, self::CACHE_LIFE);
321
322        return response($content, StatusCodeInterface::STATUS_OK, [
323            'content-type' => 'application/xml',
324        ]);
325    }
326
327    /**
328     * @param Tree   $tree
329     * @param string $type
330     * @param int    $limit
331     * @param int    $offset
332     *
333     * @return Collection<int,GedcomRecord>
334     */
335    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
336    {
337        switch ($type) {
338            case Family::RECORD_TYPE:
339                $records = $this->sitemapFamilies($tree, $limit, $offset);
340                break;
341
342            case Individual::RECORD_TYPE:
343                $records = $this->sitemapIndividuals($tree, $limit, $offset);
344                break;
345
346            case Media::RECORD_TYPE:
347                $records = $this->sitemapMedia($tree, $limit, $offset);
348                break;
349
350            case Note::RECORD_TYPE:
351                $records = $this->sitemapNotes($tree, $limit, $offset);
352                break;
353
354            case Repository::RECORD_TYPE:
355                $records = $this->sitemapRepositories($tree, $limit, $offset);
356                break;
357
358            case Source::RECORD_TYPE:
359                $records = $this->sitemapSources($tree, $limit, $offset);
360                break;
361
362            case Submitter::RECORD_TYPE:
363                $records = $this->sitemapSubmitters($tree, $limit, $offset);
364                break;
365
366            default:
367                throw new HttpNotFoundException('Invalid record type: ' . $type);
368        }
369
370        // Skip private records.
371        $records = $records->filter(static function (GedcomRecord $record): bool {
372            return $record->canShow(Auth::PRIV_PRIVATE);
373        });
374
375        return $records;
376    }
377
378    /**
379     * @param Tree $tree
380     * @param int  $limit
381     * @param int  $offset
382     *
383     * @return Collection<int,Family>
384     */
385    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
386    {
387        return DB::table('families')
388            ->where('f_file', '=', $tree->id())
389            ->orderBy('f_id')
390            ->skip($offset)
391            ->take($limit)
392            ->get()
393            ->map(Registry::familyFactory()->mapper($tree));
394    }
395
396    /**
397     * @param Tree $tree
398     * @param int  $limit
399     * @param int  $offset
400     *
401     * @return Collection<int,Individual>
402     */
403    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
404    {
405        return DB::table('individuals')
406            ->where('i_file', '=', $tree->id())
407            ->orderBy('i_id')
408            ->skip($offset)
409            ->take($limit)
410            ->get()
411            ->map(Registry::individualFactory()->mapper($tree));
412    }
413
414    /**
415     * @param Tree $tree
416     * @param int  $limit
417     * @param int  $offset
418     *
419     * @return Collection<int,Media>
420     */
421    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
422    {
423        return DB::table('media')
424            ->where('m_file', '=', $tree->id())
425            ->orderBy('m_id')
426            ->skip($offset)
427            ->take($limit)
428            ->get()
429            ->map(Registry::mediaFactory()->mapper($tree));
430    }
431
432    /**
433     * @param Tree $tree
434     * @param int  $limit
435     * @param int  $offset
436     *
437     * @return Collection<int,Note>
438     */
439    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
440    {
441        return DB::table('other')
442            ->where('o_file', '=', $tree->id())
443            ->where('o_type', '=', Note::RECORD_TYPE)
444            ->orderBy('o_id')
445            ->skip($offset)
446            ->take($limit)
447            ->get()
448            ->map(Registry::noteFactory()->mapper($tree));
449    }
450
451    /**
452     * @param Tree $tree
453     * @param int  $limit
454     * @param int  $offset
455     *
456     * @return Collection<int,Repository>
457     */
458    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
459    {
460        return DB::table('other')
461            ->where('o_file', '=', $tree->id())
462            ->where('o_type', '=', Repository::RECORD_TYPE)
463            ->orderBy('o_id')
464            ->skip($offset)
465            ->take($limit)
466            ->get()
467            ->map(Registry::repositoryFactory()->mapper($tree));
468    }
469
470    /**
471     * @param Tree $tree
472     * @param int  $limit
473     * @param int  $offset
474     *
475     * @return Collection<int,Source>
476     */
477    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
478    {
479        return DB::table('sources')
480            ->where('s_file', '=', $tree->id())
481            ->orderBy('s_id')
482            ->skip($offset)
483            ->take($limit)
484            ->get()
485            ->map(Registry::sourceFactory()->mapper($tree));
486    }
487
488    /**
489     * @param Tree $tree
490     * @param int  $limit
491     * @param int  $offset
492     *
493     * @return Collection<int,Submitter>
494     */
495    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
496    {
497        return DB::table('other')
498            ->where('o_file', '=', $tree->id())
499            ->where('o_type', '=', Submitter::RECORD_TYPE)
500            ->orderBy('o_id')
501            ->skip($offset)
502            ->take($limit)
503            ->get()
504            ->map(Registry::submitterFactory()->mapper($tree));
505    }
506}
507