xref: /webtrees/app/Module/SiteMapModule.php (revision c82761e34f1c58f6aa906b2ff55fa73a937de867)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Aura\Router\Route;
23use Aura\Router\RouterContainer;
24use Fig\Http\Message\StatusCodeInterface;
25use Fisharebest\Webtrees\Auth;
26use Fisharebest\Webtrees\Cache;
27use Fisharebest\Webtrees\Exceptions\HttpNotFoundException;
28use Fisharebest\Webtrees\FlashMessages;
29use Fisharebest\Webtrees\GedcomRecord;
30use Fisharebest\Webtrees\Html;
31use Fisharebest\Webtrees\I18N;
32use Fisharebest\Webtrees\Individual;
33use Fisharebest\Webtrees\Media;
34use Fisharebest\Webtrees\Note;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\Services\TreeService;
37use Fisharebest\Webtrees\Source;
38use Fisharebest\Webtrees\Tree;
39use Illuminate\Database\Capsule\Manager as DB;
40use Illuminate\Database\Query\Expression;
41use Illuminate\Support\Collection;
42use Psr\Http\Message\ResponseInterface;
43use Psr\Http\Message\ServerRequestInterface;
44use Psr\Http\Server\RequestHandlerInterface;
45
46use function app;
47use function assert;
48use function date;
49use function redirect;
50use function response;
51use function route;
52use function view;
53
54/**
55 * Class SiteMapModule
56 */
57class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
58{
59    use ModuleConfigTrait;
60
61    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
62    private const CACHE_LIFE         = 209600; // Two weeks
63
64    /** @var TreeService */
65    private $tree_service;
66
67    /**
68     * TreesMenuModule constructor.
69     *
70     * @param TreeService $tree_service
71     */
72    public function __construct(TreeService $tree_service)
73    {
74        $this->tree_service = $tree_service;
75    }
76
77    /**
78     * Initialization.
79     *
80     * @return void
81     */
82    public function boot(): void
83    {
84        $router_container = app(RouterContainer::class);
85        assert($router_container instanceof RouterContainer);
86
87        $router_container->getMap()
88            ->get('sitemap-style', '/sitemap.xsl', $this);
89
90        $router_container->getMap()
91            ->get('sitemap-index', '/sitemap.xml', $this);
92
93        $router_container->getMap()
94            ->get('sitemap-file', '/sitemap-{tree}-{records}-{page}.xml', $this)
95            ->tokens([
96                'records' => 'INDI|NOTE|OBJE|REPO|SOUR',
97                'page'    => '\d+',
98            ]);
99    }
100
101    /**
102     * A sentence describing what this module does.
103     *
104     * @return string
105     */
106    public function description(): string
107    {
108        /* I18N: Description of the “Sitemaps” module */
109        return I18N::translate('Generate sitemap files for search engines.');
110    }
111
112    /**
113     * Should this module be enabled when it is first installed?
114     *
115     * @return bool
116     */
117    public function isEnabledByDefault(): bool
118    {
119        return false;
120    }
121
122    /**
123     * @param ServerRequestInterface $request
124     *
125     * @return ResponseInterface
126     */
127    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
128    {
129        $this->layout = 'layouts/administration';
130
131        $sitemap_url = route('sitemap-index');
132
133        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
134        $submit_urls = [
135            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
136            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
137        ];
138
139        return $this->viewResponse('modules/sitemap/config', [
140            'all_trees'   => $this->tree_service->all(),
141            'sitemap_url' => $sitemap_url,
142            'submit_urls' => $submit_urls,
143            'title'       => $this->title(),
144        ]);
145    }
146
147    /**
148     * How should this module be identified in the control panel, etc.?
149     *
150     * @return string
151     */
152    public function title(): string
153    {
154        /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */
155        return I18N::translate('Sitemaps');
156    }
157
158    /**
159     * @param ServerRequestInterface $request
160     *
161     * @return ResponseInterface
162     */
163    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
164    {
165        $params = (array) $request->getParsedBody();
166
167        foreach ($this->tree_service->all() as $tree) {
168            $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false);
169            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
170        }
171
172        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
173
174        return redirect($this->getConfigLink());
175    }
176
177    /**
178     * @param ServerRequestInterface $request
179     *
180     * @return ResponseInterface
181     */
182    public function handle(ServerRequestInterface $request): ResponseInterface
183    {
184        $route = $request->getAttribute('route');
185        assert($route instanceof Route);
186
187        if ($route->name === 'sitemap-style') {
188            $content = view('modules/sitemap/sitemap-xsl');
189
190            return response($content, StatusCodeInterface::STATUS_OK, [
191                'Content-Type' => 'application/xml',
192            ]);
193        }
194
195        if ($route->name === 'sitemap-index') {
196            return $this->siteMapIndex($request);
197        }
198
199        return $this->siteMapFile($request);
200    }
201
202    /**
203     * @param ServerRequestInterface $request
204     *
205     * @return ResponseInterface
206     */
207    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
208    {
209        $cache = app('cache.files');
210        assert($cache instanceof Cache);
211
212        $content = $cache->remember('sitemap.xml', function (): string {
213            // Which trees have sitemaps enabled?
214            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
215                return $tree->getPreference('include_in_sitemap') === '1';
216            })->map(static function (Tree $tree): int {
217                return $tree->id();
218            });
219
220            $count_individuals = DB::table('individuals')
221                ->join('gedcom', 'i_file', '=', 'gedcom_id')
222                ->whereIn('gedcom_id', $tree_ids)
223                ->groupBy(['gedcom_id'])
224                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
225                ->pluck('total', 'gedcom_name');
226
227            $count_media = DB::table('media')
228                ->join('gedcom', 'm_file', '=', 'gedcom_id')
229                ->whereIn('gedcom_id', $tree_ids)
230                ->groupBy(['gedcom_id'])
231                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
232                ->pluck('total', 'gedcom_name');
233
234            $count_notes = DB::table('other')
235                ->join('gedcom', 'o_file', '=', 'gedcom_id')
236                ->whereIn('gedcom_id', $tree_ids)
237                ->where('o_type', '=', 'NOTE')
238                ->groupBy(['gedcom_id'])
239                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
240                ->pluck('total', 'gedcom_name');
241
242            $count_repositories = DB::table('other')
243                ->join('gedcom', 'o_file', '=', 'gedcom_id')
244                ->whereIn('gedcom_id', $tree_ids)
245                ->where('o_type', '=', 'REPO')
246                ->groupBy(['gedcom_id'])
247                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
248                ->pluck('total', 'gedcom_name');
249
250            $count_sources = DB::table('sources')
251                ->join('gedcom', 's_file', '=', 'gedcom_id')
252                ->whereIn('gedcom_id', $tree_ids)
253                ->groupBy(['gedcom_id'])
254                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
255                ->pluck('total', 'gedcom_name');
256
257            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
258            DB::table('module_setting')
259                ->where('module_name', '=', $this->name())
260                ->delete();
261
262            return view('modules/sitemap/sitemap-index-xml', [
263                'all_trees'          => $this->tree_service->all(),
264                'count_individuals'  => $count_individuals,
265                'count_media'        => $count_media,
266                'count_notes'        => $count_notes,
267                'count_repositories' => $count_repositories,
268                'count_sources'      => $count_sources,
269                'last_mod'           => date('Y-m-d'),
270                'records_per_volume' => self::RECORDS_PER_VOLUME,
271                'sitemap_xsl'        => route('sitemap-style'),
272            ]);
273        }, self::CACHE_LIFE);
274
275        return response($content, StatusCodeInterface::STATUS_OK, [
276            'Content-Type' => 'application/xml',
277        ]);
278    }
279
280    /**
281     * @param ServerRequestInterface $request
282     *
283     * @return ResponseInterface
284     */
285    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
286    {
287        $tree = $request->getAttribute('tree');
288        assert($tree instanceof Tree);
289
290        $records = $request->getAttribute('records');
291        $page    = $request->getAttribute('page');
292
293        if ($tree->getPreference('include_in_sitemap') !== '1') {
294            throw new HttpNotFoundException();
295        }
296
297        $cache = app('cache.files');
298        assert($cache instanceof Cache);
299
300        $cache_key = 'sitemap/' . $tree->id() . '/' . $records . '/' . $page . '.xml';
301
302        $content = $cache->remember($cache_key, function () use ($tree, $records, $page): string {
303            $records = $this->sitemapRecords($tree, $records, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
304
305            return view('modules/sitemap/sitemap-file-xml', [
306                'records'     => $records,
307                'sitemap_xsl' => route('sitemap-style'),
308                'tree'        => $tree,
309            ]);
310        }, self::CACHE_LIFE);
311
312        return response($content, StatusCodeInterface::STATUS_OK, [
313            'Content-Type' => 'application/xml',
314        ]);
315    }
316
317    /**
318     * @param Tree   $tree
319     * @param string $type
320     * @param int    $limit
321     * @param int    $offset
322     *
323     * @return Collection<GedcomRecord>
324     */
325    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
326    {
327        switch ($type) {
328            case Individual::RECORD_TYPE:
329                $records = $this->sitemapIndividuals($tree, $limit, $offset);
330                break;
331
332            case Media::RECORD_TYPE:
333                $records = $this->sitemapMedia($tree, $limit, $offset);
334                break;
335
336            case Note::RECORD_TYPE:
337                $records = $this->sitemapNotes($tree, $limit, $offset);
338                break;
339
340            case Repository::RECORD_TYPE:
341                $records = $this->sitemapRepositories($tree, $limit, $offset);
342                break;
343
344            case Source::RECORD_TYPE:
345                $records = $this->sitemapSources($tree, $limit, $offset);
346                break;
347
348            default:
349                throw new HttpNotFoundException('Invalid record type: ' . $type);
350        }
351
352        // Skip private records.
353        $records = $records->filter(static function (GedcomRecord $record): bool {
354            return $record->canShow(Auth::PRIV_PRIVATE);
355        });
356
357        return $records;
358    }
359
360    /**
361     * @param Tree $tree
362     * @param int  $limit
363     * @param int  $offset
364     *
365     * @return Collection<Individual>
366     */
367    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
368    {
369        return DB::table('individuals')
370            ->where('i_file', '=', $tree->id())
371            ->orderBy('i_id')
372            ->skip($offset)
373            ->take($limit)
374            ->get()
375            ->map(Individual::rowMapper($tree));
376    }
377
378    /**
379     * @param Tree $tree
380     * @param int  $limit
381     * @param int  $offset
382     *
383     * @return Collection<Media>
384     */
385    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
386    {
387        return DB::table('media')
388            ->where('m_file', '=', $tree->id())
389            ->orderBy('m_id')
390            ->skip($offset)
391            ->take($limit)
392            ->get()
393            ->map(Media::rowMapper($tree));
394    }
395
396    /**
397     * @param Tree $tree
398     * @param int  $limit
399     * @param int  $offset
400     *
401     * @return Collection<Note>
402     */
403    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
404    {
405        return DB::table('other')
406            ->where('o_file', '=', $tree->id())
407            ->where('o_type', '=', 'NOTE')
408            ->orderBy('o_id')
409            ->skip($offset)
410            ->take($limit)
411            ->get()
412            ->map(Note::rowMapper($tree));
413    }
414
415    /**
416     * @param Tree $tree
417     * @param int  $limit
418     * @param int  $offset
419     *
420     * @return Collection<Repository>
421     */
422    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
423    {
424        return DB::table('other')
425            ->where('o_file', '=', $tree->id())
426            ->where('o_type', '=', 'REPO')
427            ->orderBy('o_id')
428            ->skip($offset)
429            ->take($limit)
430            ->get()
431            ->map(Repository::rowMapper($tree));
432    }
433
434    /**
435     * @param Tree $tree
436     * @param int  $limit
437     * @param int  $offset
438     *
439     * @return Collection<Source>
440     */
441    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
442    {
443        return DB::table('sources')
444            ->where('s_file', '=', $tree->id())
445            ->orderBy('s_id')
446            ->skip($offset)
447            ->take($limit)
448            ->get()
449            ->map(Source::rowMapper($tree));
450    }
451}
452