xref: /webtrees/app/Module/SiteMapModule.php (revision e873f434551745f888937263ff89e80db3b0f785)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fig\Http\Message\StatusCodeInterface;
23use Fisharebest\Webtrees\Auth;
24use Fisharebest\Webtrees\DB;
25use Fisharebest\Webtrees\Family;
26use Fisharebest\Webtrees\FlashMessages;
27use Fisharebest\Webtrees\GedcomRecord;
28use Fisharebest\Webtrees\Html;
29use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
30use Fisharebest\Webtrees\I18N;
31use Fisharebest\Webtrees\Individual;
32use Fisharebest\Webtrees\Media;
33use Fisharebest\Webtrees\Note;
34use Fisharebest\Webtrees\Registry;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\Services\TreeService;
37use Fisharebest\Webtrees\Source;
38use Fisharebest\Webtrees\Submitter;
39use Fisharebest\Webtrees\Tree;
40use Fisharebest\Webtrees\Validator;
41use Illuminate\Database\Query\Expression;
42use Illuminate\Support\Collection;
43use Psr\Http\Message\ResponseInterface;
44use Psr\Http\Message\ServerRequestInterface;
45use Psr\Http\Server\RequestHandlerInterface;
46
47use function date;
48use function redirect;
49use function response;
50use function route;
51use function view;
52
53/**
54 * Class SiteMapModule
55 */
56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
57{
58    use ModuleConfigTrait;
59
60    private const int RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
61    private const int CACHE_LIFE         = 209600; // Two weeks
62
63    private const array PRIORITY = [
64        Family::RECORD_TYPE     => 0.7,
65        Individual::RECORD_TYPE => 0.9,
66        Media::RECORD_TYPE      => 0.5,
67        Note::RECORD_TYPE       => 0.3,
68        Repository::RECORD_TYPE => 0.5,
69        Source::RECORD_TYPE     => 0.5,
70        Submitter::RECORD_TYPE  => 0.3,
71    ];
72
73    private TreeService $tree_service;
74
75    /**
76     * @param TreeService $tree_service
77     */
78    public function __construct(TreeService $tree_service)
79    {
80        $this->tree_service = $tree_service;
81    }
82
83    /**
84     * Initialization.
85     *
86     * @return void
87     */
88    public function boot(): void
89    {
90        Registry::routeFactory()->routeMap()
91            ->get('sitemap-style', '/sitemap.xsl', $this);
92
93        Registry::routeFactory()->routeMap()
94            ->get('sitemap-index', '/sitemap.xml', $this);
95
96        Registry::routeFactory()->routeMap()
97            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
98    }
99
100    public function description(): string
101    {
102        /* I18N: Description of the “Sitemaps” module */
103        return I18N::translate('Generate sitemap files for search engines.');
104    }
105
106    /**
107     * Should this module be enabled when it is first installed?
108     *
109     * @return bool
110     */
111    public function isEnabledByDefault(): bool
112    {
113        return false;
114    }
115
116    /**
117     * @param ServerRequestInterface $request
118     *
119     * @return ResponseInterface
120     */
121    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
122    {
123        $this->layout = 'layouts/administration';
124
125        $sitemap_url = route('sitemap-index');
126
127        return $this->viewResponse('modules/sitemap/config', [
128            'all_trees'   => $this->tree_service->all(),
129            'sitemap_url' => $sitemap_url,
130            'title'       => $this->title(),
131        ]);
132    }
133
134    /**
135     * How should this module be identified in the control panel, etc.?
136     *
137     * @return string
138     */
139    public function title(): string
140    {
141        /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */
142        return I18N::translate('Sitemaps');
143    }
144
145    /**
146     * @param ServerRequestInterface $request
147     *
148     * @return ResponseInterface
149     */
150    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
151    {
152        foreach ($this->tree_service->all() as $tree) {
153            $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false);
154            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
155        }
156
157        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
158
159        return redirect($this->getConfigLink());
160    }
161
162    /**
163     * @param ServerRequestInterface $request
164     *
165     * @return ResponseInterface
166     */
167    public function handle(ServerRequestInterface $request): ResponseInterface
168    {
169        $route = Validator::attributes($request)->route();
170
171        if ($route->name === 'sitemap-style') {
172            $content = view('modules/sitemap/sitemap-xsl');
173
174            return response($content, StatusCodeInterface::STATUS_OK, [
175                'content-type' => 'application/xml',
176            ]);
177        }
178
179        if ($route->name === 'sitemap-index') {
180            return $this->siteMapIndex($request);
181        }
182
183        return $this->siteMapFile($request);
184    }
185
186    /**
187     * @param ServerRequestInterface $request
188     *
189     * @return ResponseInterface
190     */
191    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
192    {
193        $content = Registry::cache()->file()->remember('sitemap.xml', function (): string {
194            // Which trees have sitemaps enabled?
195            $tree_ids = $this->tree_service->all()
196                ->filter(static fn (Tree $tree): bool => $tree->getPreference('include_in_sitemap') === '1')
197                ->map(static fn (Tree $tree): int => $tree->id());
198
199            $count_families = DB::table('families')
200                ->join('gedcom', 'f_file', '=', 'gedcom_id')
201                ->whereIn('gedcom_id', $tree_ids)
202                ->groupBy(['gedcom_id'])
203                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
204
205            $count_individuals = DB::table('individuals')
206                ->join('gedcom', 'i_file', '=', 'gedcom_id')
207                ->whereIn('gedcom_id', $tree_ids)
208                ->groupBy(['gedcom_id'])
209                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
210
211            $count_media = DB::table('media')
212                ->join('gedcom', 'm_file', '=', 'gedcom_id')
213                ->whereIn('gedcom_id', $tree_ids)
214                ->groupBy(['gedcom_id'])
215                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
216
217            $count_notes = DB::table('other')
218                ->join('gedcom', 'o_file', '=', 'gedcom_id')
219                ->whereIn('gedcom_id', $tree_ids)
220                ->where('o_type', '=', Note::RECORD_TYPE)
221                ->groupBy(['gedcom_id'])
222                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
223
224            $count_repositories = DB::table('other')
225                ->join('gedcom', 'o_file', '=', 'gedcom_id')
226                ->whereIn('gedcom_id', $tree_ids)
227                ->where('o_type', '=', Repository::RECORD_TYPE)
228                ->groupBy(['gedcom_id'])
229                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
230
231            $count_sources = DB::table('sources')
232                ->join('gedcom', 's_file', '=', 'gedcom_id')
233                ->whereIn('gedcom_id', $tree_ids)
234                ->groupBy(['gedcom_id'])
235                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
236
237            $count_submitters = DB::table('other')
238                ->join('gedcom', 'o_file', '=', 'gedcom_id')
239                ->whereIn('gedcom_id', $tree_ids)
240                ->where('o_type', '=', Submitter::RECORD_TYPE)
241                ->groupBy(['gedcom_id'])
242                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
243
244            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
245            DB::table('module_setting')
246                ->where('module_name', '=', $this->name())
247                ->delete();
248
249            return view('modules/sitemap/sitemap-index-xml', [
250                'all_trees'          => $this->tree_service->all(),
251                'count_families'     => $count_families,
252                'count_individuals'  => $count_individuals,
253                'count_media'        => $count_media,
254                'count_notes'        => $count_notes,
255                'count_repositories' => $count_repositories,
256                'count_sources'      => $count_sources,
257                'count_submitters'   => $count_submitters,
258                'last_mod'           => date('Y-m-d'),
259                'records_per_volume' => self::RECORDS_PER_VOLUME,
260                'sitemap_xsl'        => route('sitemap-style'),
261            ]);
262        }, self::CACHE_LIFE);
263
264        return response($content, StatusCodeInterface::STATUS_OK, [
265            'content-type' => 'application/xml',
266        ]);
267    }
268
269    /**
270     * @param ServerRequestInterface $request
271     *
272     * @return ResponseInterface
273     */
274    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
275    {
276        $tree = Validator::attributes($request)->tree('tree');
277        $type = Validator::attributes($request)->string('type');
278        $page = Validator::attributes($request)->integer('page');
279
280        if ($tree->getPreference('include_in_sitemap') !== '1') {
281            throw new HttpNotFoundException();
282        }
283
284        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
285
286        $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string {
287            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
288
289            return view('modules/sitemap/sitemap-file-xml', [
290                'priority'    => self::PRIORITY[$type],
291                'records'     => $records,
292                'sitemap_xsl' => route('sitemap-style'),
293                'tree'        => $tree,
294            ]);
295        }, self::CACHE_LIFE);
296
297        return response($content, StatusCodeInterface::STATUS_OK, [
298            'content-type' => 'application/xml',
299        ]);
300    }
301
302    /**
303     * @param Tree   $tree
304     * @param string $type
305     * @param int    $limit
306     * @param int    $offset
307     *
308     * @return Collection<int,GedcomRecord>
309     */
310    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
311    {
312        switch ($type) {
313            case Family::RECORD_TYPE:
314                $records = $this->sitemapFamilies($tree, $limit, $offset);
315                break;
316
317            case Individual::RECORD_TYPE:
318                $records = $this->sitemapIndividuals($tree, $limit, $offset);
319                break;
320
321            case Media::RECORD_TYPE:
322                $records = $this->sitemapMedia($tree, $limit, $offset);
323                break;
324
325            case Note::RECORD_TYPE:
326                $records = $this->sitemapNotes($tree, $limit, $offset);
327                break;
328
329            case Repository::RECORD_TYPE:
330                $records = $this->sitemapRepositories($tree, $limit, $offset);
331                break;
332
333            case Source::RECORD_TYPE:
334                $records = $this->sitemapSources($tree, $limit, $offset);
335                break;
336
337            case Submitter::RECORD_TYPE:
338                $records = $this->sitemapSubmitters($tree, $limit, $offset);
339                break;
340
341            default:
342                throw new HttpNotFoundException('Invalid record type: ' . $type);
343        }
344
345        // Skip private records.
346        $records = $records->filter(static fn (GedcomRecord $record): bool => $record->canShow(Auth::PRIV_PRIVATE));
347
348        return $records;
349    }
350
351    /**
352     * @param Tree $tree
353     * @param int  $limit
354     * @param int  $offset
355     *
356     * @return Collection<int,Family>
357     */
358    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
359    {
360        return DB::table('families')
361            ->where('f_file', '=', $tree->id())
362            ->orderBy('f_id')
363            ->skip($offset)
364            ->take($limit)
365            ->get()
366            ->map(Registry::familyFactory()->mapper($tree));
367    }
368
369    /**
370     * @param Tree $tree
371     * @param int  $limit
372     * @param int  $offset
373     *
374     * @return Collection<int,Individual>
375     */
376    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
377    {
378        return DB::table('individuals')
379            ->where('i_file', '=', $tree->id())
380            ->orderBy('i_id')
381            ->skip($offset)
382            ->take($limit)
383            ->get()
384            ->map(Registry::individualFactory()->mapper($tree));
385    }
386
387    /**
388     * @param Tree $tree
389     * @param int  $limit
390     * @param int  $offset
391     *
392     * @return Collection<int,Media>
393     */
394    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
395    {
396        return DB::table('media')
397            ->where('m_file', '=', $tree->id())
398            ->orderBy('m_id')
399            ->skip($offset)
400            ->take($limit)
401            ->get()
402            ->map(Registry::mediaFactory()->mapper($tree));
403    }
404
405    /**
406     * @param Tree $tree
407     * @param int  $limit
408     * @param int  $offset
409     *
410     * @return Collection<int,Note>
411     */
412    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
413    {
414        return DB::table('other')
415            ->where('o_file', '=', $tree->id())
416            ->where('o_type', '=', Note::RECORD_TYPE)
417            ->orderBy('o_id')
418            ->skip($offset)
419            ->take($limit)
420            ->get()
421            ->map(Registry::noteFactory()->mapper($tree));
422    }
423
424    /**
425     * @param Tree $tree
426     * @param int  $limit
427     * @param int  $offset
428     *
429     * @return Collection<int,Repository>
430     */
431    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
432    {
433        return DB::table('other')
434            ->where('o_file', '=', $tree->id())
435            ->where('o_type', '=', Repository::RECORD_TYPE)
436            ->orderBy('o_id')
437            ->skip($offset)
438            ->take($limit)
439            ->get()
440            ->map(Registry::repositoryFactory()->mapper($tree));
441    }
442
443    /**
444     * @param Tree $tree
445     * @param int  $limit
446     * @param int  $offset
447     *
448     * @return Collection<int,Source>
449     */
450    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
451    {
452        return DB::table('sources')
453            ->where('s_file', '=', $tree->id())
454            ->orderBy('s_id')
455            ->skip($offset)
456            ->take($limit)
457            ->get()
458            ->map(Registry::sourceFactory()->mapper($tree));
459    }
460
461    /**
462     * @param Tree $tree
463     * @param int  $limit
464     * @param int  $offset
465     *
466     * @return Collection<int,Submitter>
467     */
468    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
469    {
470        return DB::table('other')
471            ->where('o_file', '=', $tree->id())
472            ->where('o_type', '=', Submitter::RECORD_TYPE)
473            ->orderBy('o_id')
474            ->skip($offset)
475            ->take($limit)
476            ->get()
477            ->map(Registry::submitterFactory()->mapper($tree));
478    }
479}
480