xref: /webtrees/app/Module/SiteMapModule.php (revision c7facbf794b4789bdbe2dfb7ff09b3f19f496179)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2023 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Fig\Http\Message\StatusCodeInterface;
23use Fisharebest\Webtrees\Auth;
24use Fisharebest\Webtrees\DB;
25use Fisharebest\Webtrees\Family;
26use Fisharebest\Webtrees\FlashMessages;
27use Fisharebest\Webtrees\GedcomRecord;
28use Fisharebest\Webtrees\Html;
29use Fisharebest\Webtrees\Http\Exceptions\HttpNotFoundException;
30use Fisharebest\Webtrees\I18N;
31use Fisharebest\Webtrees\Individual;
32use Fisharebest\Webtrees\Media;
33use Fisharebest\Webtrees\Note;
34use Fisharebest\Webtrees\Registry;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\Services\TreeService;
37use Fisharebest\Webtrees\Source;
38use Fisharebest\Webtrees\Submitter;
39use Fisharebest\Webtrees\Tree;
40use Fisharebest\Webtrees\Validator;
41use Illuminate\Database\Query\Expression;
42use Illuminate\Support\Collection;
43use Psr\Http\Message\ResponseInterface;
44use Psr\Http\Message\ServerRequestInterface;
45use Psr\Http\Server\RequestHandlerInterface;
46
47use function date;
48use function redirect;
49use function response;
50use function route;
51use function view;
52
53/**
54 * Class SiteMapModule
55 */
56class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
57{
58    use ModuleConfigTrait;
59
60    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
61    private const CACHE_LIFE         = 209600; // Two weeks
62
63    private const PRIORITY = [
64        Family::RECORD_TYPE     => 0.7,
65        Individual::RECORD_TYPE => 0.9,
66        Media::RECORD_TYPE      => 0.5,
67        Note::RECORD_TYPE       => 0.3,
68        Repository::RECORD_TYPE => 0.5,
69        Source::RECORD_TYPE     => 0.5,
70        Submitter::RECORD_TYPE  => 0.3,
71    ];
72
73    private TreeService $tree_service;
74
75    /**
76     * @param TreeService $tree_service
77     */
78    public function __construct(TreeService $tree_service)
79    {
80        $this->tree_service = $tree_service;
81    }
82
83    /**
84     * Initialization.
85     *
86     * @return void
87     */
88    public function boot(): void
89    {
90        Registry::routeFactory()->routeMap()
91            ->get('sitemap-style', '/sitemap.xsl', $this);
92
93        Registry::routeFactory()->routeMap()
94            ->get('sitemap-index', '/sitemap.xml', $this);
95
96        Registry::routeFactory()->routeMap()
97            ->get('sitemap-file', '/sitemap-{tree}-{type}-{page}.xml', $this);
98    }
99
100    /**
101     * A sentence describing what this module does.
102     *
103     * @return string
104     */
105    public function description(): string
106    {
107        /* I18N: Description of the “Sitemaps” module */
108        return I18N::translate('Generate sitemap files for search engines.');
109    }
110
111    /**
112     * Should this module be enabled when it is first installed?
113     *
114     * @return bool
115     */
116    public function isEnabledByDefault(): bool
117    {
118        return false;
119    }
120
121    /**
122     * @param ServerRequestInterface $request
123     *
124     * @return ResponseInterface
125     */
126    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
127    {
128        $this->layout = 'layouts/administration';
129
130        $sitemap_url = route('sitemap-index');
131
132        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
133        $submit_urls = [
134            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
135            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
136        ];
137
138        return $this->viewResponse('modules/sitemap/config', [
139            'all_trees'   => $this->tree_service->all(),
140            'sitemap_url' => $sitemap_url,
141            'submit_urls' => $submit_urls,
142            'title'       => $this->title(),
143        ]);
144    }
145
146    /**
147     * How should this module be identified in the control panel, etc.?
148     *
149     * @return string
150     */
151    public function title(): string
152    {
153        /* I18N: Name of a module - see https://en.wikipedia.org/wiki/Sitemaps */
154        return I18N::translate('Sitemaps');
155    }
156
157    /**
158     * @param ServerRequestInterface $request
159     *
160     * @return ResponseInterface
161     */
162    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
163    {
164        foreach ($this->tree_service->all() as $tree) {
165            $include_in_sitemap = Validator::parsedBody($request)->boolean('sitemap' . $tree->id(), false);
166            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
167        }
168
169        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
170
171        return redirect($this->getConfigLink());
172    }
173
174    /**
175     * @param ServerRequestInterface $request
176     *
177     * @return ResponseInterface
178     */
179    public function handle(ServerRequestInterface $request): ResponseInterface
180    {
181        $route = Validator::attributes($request)->route();
182
183        if ($route->name === 'sitemap-style') {
184            $content = view('modules/sitemap/sitemap-xsl');
185
186            return response($content, StatusCodeInterface::STATUS_OK, [
187                'content-type' => 'application/xml',
188            ]);
189        }
190
191        if ($route->name === 'sitemap-index') {
192            return $this->siteMapIndex($request);
193        }
194
195        return $this->siteMapFile($request);
196    }
197
198    /**
199     * @param ServerRequestInterface $request
200     *
201     * @return ResponseInterface
202     */
203    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
204    {
205        $content = Registry::cache()->file()->remember('sitemap.xml', function (): string {
206            // Which trees have sitemaps enabled?
207            $tree_ids = $this->tree_service->all()
208                ->filter(static fn (Tree $tree): bool => $tree->getPreference('include_in_sitemap') === '1')
209                ->map(static fn (Tree $tree): int => $tree->id());
210
211            $count_families = DB::table('families')
212                ->join('gedcom', 'f_file', '=', 'gedcom_id')
213                ->whereIn('gedcom_id', $tree_ids)
214                ->groupBy(['gedcom_id'])
215                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
216
217            $count_individuals = DB::table('individuals')
218                ->join('gedcom', 'i_file', '=', 'gedcom_id')
219                ->whereIn('gedcom_id', $tree_ids)
220                ->groupBy(['gedcom_id'])
221                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
222
223            $count_media = DB::table('media')
224                ->join('gedcom', 'm_file', '=', 'gedcom_id')
225                ->whereIn('gedcom_id', $tree_ids)
226                ->groupBy(['gedcom_id'])
227                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
228
229            $count_notes = DB::table('other')
230                ->join('gedcom', 'o_file', '=', 'gedcom_id')
231                ->whereIn('gedcom_id', $tree_ids)
232                ->where('o_type', '=', Note::RECORD_TYPE)
233                ->groupBy(['gedcom_id'])
234                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
235
236            $count_repositories = DB::table('other')
237                ->join('gedcom', 'o_file', '=', 'gedcom_id')
238                ->whereIn('gedcom_id', $tree_ids)
239                ->where('o_type', '=', Repository::RECORD_TYPE)
240                ->groupBy(['gedcom_id'])
241                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
242
243            $count_sources = DB::table('sources')
244                ->join('gedcom', 's_file', '=', 'gedcom_id')
245                ->whereIn('gedcom_id', $tree_ids)
246                ->groupBy(['gedcom_id'])
247                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
248
249            $count_submitters = DB::table('other')
250                ->join('gedcom', 'o_file', '=', 'gedcom_id')
251                ->whereIn('gedcom_id', $tree_ids)
252                ->where('o_type', '=', Submitter::RECORD_TYPE)
253                ->groupBy(['gedcom_id'])
254                ->pluck(new Expression('COUNT(*) AS total'), 'gedcom_name');
255
256            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
257            DB::table('module_setting')
258                ->where('module_name', '=', $this->name())
259                ->delete();
260
261            return view('modules/sitemap/sitemap-index-xml', [
262                'all_trees'          => $this->tree_service->all(),
263                'count_families'     => $count_families,
264                'count_individuals'  => $count_individuals,
265                'count_media'        => $count_media,
266                'count_notes'        => $count_notes,
267                'count_repositories' => $count_repositories,
268                'count_sources'      => $count_sources,
269                'count_submitters'   => $count_submitters,
270                'last_mod'           => date('Y-m-d'),
271                'records_per_volume' => self::RECORDS_PER_VOLUME,
272                'sitemap_xsl'        => route('sitemap-style'),
273            ]);
274        }, self::CACHE_LIFE);
275
276        return response($content, StatusCodeInterface::STATUS_OK, [
277            'content-type' => 'application/xml',
278        ]);
279    }
280
281    /**
282     * @param ServerRequestInterface $request
283     *
284     * @return ResponseInterface
285     */
286    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
287    {
288        $tree = Validator::attributes($request)->tree('tree');
289        $type = Validator::attributes($request)->string('type');
290        $page = Validator::attributes($request)->integer('page');
291
292        if ($tree->getPreference('include_in_sitemap') !== '1') {
293            throw new HttpNotFoundException();
294        }
295
296        $cache_key = 'sitemap/' . $tree->id() . '/' . $type . '/' . $page . '.xml';
297
298        $content = Registry::cache()->file()->remember($cache_key, function () use ($tree, $type, $page): string {
299            $records = $this->sitemapRecords($tree, $type, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
300
301            return view('modules/sitemap/sitemap-file-xml', [
302                'priority'    => self::PRIORITY[$type],
303                'records'     => $records,
304                'sitemap_xsl' => route('sitemap-style'),
305                'tree'        => $tree,
306            ]);
307        }, self::CACHE_LIFE);
308
309        return response($content, StatusCodeInterface::STATUS_OK, [
310            'content-type' => 'application/xml',
311        ]);
312    }
313
314    /**
315     * @param Tree   $tree
316     * @param string $type
317     * @param int    $limit
318     * @param int    $offset
319     *
320     * @return Collection<int,GedcomRecord>
321     */
322    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
323    {
324        switch ($type) {
325            case Family::RECORD_TYPE:
326                $records = $this->sitemapFamilies($tree, $limit, $offset);
327                break;
328
329            case Individual::RECORD_TYPE:
330                $records = $this->sitemapIndividuals($tree, $limit, $offset);
331                break;
332
333            case Media::RECORD_TYPE:
334                $records = $this->sitemapMedia($tree, $limit, $offset);
335                break;
336
337            case Note::RECORD_TYPE:
338                $records = $this->sitemapNotes($tree, $limit, $offset);
339                break;
340
341            case Repository::RECORD_TYPE:
342                $records = $this->sitemapRepositories($tree, $limit, $offset);
343                break;
344
345            case Source::RECORD_TYPE:
346                $records = $this->sitemapSources($tree, $limit, $offset);
347                break;
348
349            case Submitter::RECORD_TYPE:
350                $records = $this->sitemapSubmitters($tree, $limit, $offset);
351                break;
352
353            default:
354                throw new HttpNotFoundException('Invalid record type: ' . $type);
355        }
356
357        // Skip private records.
358        $records = $records->filter(static fn (GedcomRecord $record): bool => $record->canShow(Auth::PRIV_PRIVATE));
359
360        return $records;
361    }
362
363    /**
364     * @param Tree $tree
365     * @param int  $limit
366     * @param int  $offset
367     *
368     * @return Collection<int,Family>
369     */
370    private function sitemapFamilies(Tree $tree, int $limit, int $offset): Collection
371    {
372        return DB::table('families')
373            ->where('f_file', '=', $tree->id())
374            ->orderBy('f_id')
375            ->skip($offset)
376            ->take($limit)
377            ->get()
378            ->map(Registry::familyFactory()->mapper($tree));
379    }
380
381    /**
382     * @param Tree $tree
383     * @param int  $limit
384     * @param int  $offset
385     *
386     * @return Collection<int,Individual>
387     */
388    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
389    {
390        return DB::table('individuals')
391            ->where('i_file', '=', $tree->id())
392            ->orderBy('i_id')
393            ->skip($offset)
394            ->take($limit)
395            ->get()
396            ->map(Registry::individualFactory()->mapper($tree));
397    }
398
399    /**
400     * @param Tree $tree
401     * @param int  $limit
402     * @param int  $offset
403     *
404     * @return Collection<int,Media>
405     */
406    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
407    {
408        return DB::table('media')
409            ->where('m_file', '=', $tree->id())
410            ->orderBy('m_id')
411            ->skip($offset)
412            ->take($limit)
413            ->get()
414            ->map(Registry::mediaFactory()->mapper($tree));
415    }
416
417    /**
418     * @param Tree $tree
419     * @param int  $limit
420     * @param int  $offset
421     *
422     * @return Collection<int,Note>
423     */
424    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
425    {
426        return DB::table('other')
427            ->where('o_file', '=', $tree->id())
428            ->where('o_type', '=', Note::RECORD_TYPE)
429            ->orderBy('o_id')
430            ->skip($offset)
431            ->take($limit)
432            ->get()
433            ->map(Registry::noteFactory()->mapper($tree));
434    }
435
436    /**
437     * @param Tree $tree
438     * @param int  $limit
439     * @param int  $offset
440     *
441     * @return Collection<int,Repository>
442     */
443    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
444    {
445        return DB::table('other')
446            ->where('o_file', '=', $tree->id())
447            ->where('o_type', '=', Repository::RECORD_TYPE)
448            ->orderBy('o_id')
449            ->skip($offset)
450            ->take($limit)
451            ->get()
452            ->map(Registry::repositoryFactory()->mapper($tree));
453    }
454
455    /**
456     * @param Tree $tree
457     * @param int  $limit
458     * @param int  $offset
459     *
460     * @return Collection<int,Source>
461     */
462    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
463    {
464        return DB::table('sources')
465            ->where('s_file', '=', $tree->id())
466            ->orderBy('s_id')
467            ->skip($offset)
468            ->take($limit)
469            ->get()
470            ->map(Registry::sourceFactory()->mapper($tree));
471    }
472
473    /**
474     * @param Tree $tree
475     * @param int  $limit
476     * @param int  $offset
477     *
478     * @return Collection<int,Submitter>
479     */
480    private function sitemapSubmitters(Tree $tree, int $limit, int $offset): Collection
481    {
482        return DB::table('other')
483            ->where('o_file', '=', $tree->id())
484            ->where('o_type', '=', Submitter::RECORD_TYPE)
485            ->orderBy('o_id')
486            ->skip($offset)
487            ->take($limit)
488            ->get()
489            ->map(Registry::submitterFactory()->mapper($tree));
490    }
491}
492