xref: /webtrees/app/Module/SiteMapModule.php (revision 16a40a66d9d5430ce8d1b0a796eecf4876efb58e)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2019 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Module;
21
22use Aura\Router\Route;
23use Aura\Router\RouterContainer;
24use Fig\Http\Message\StatusCodeInterface;
25use Fisharebest\Webtrees\Auth;
26use Fisharebest\Webtrees\Cache;
27use Fisharebest\Webtrees\Exceptions\HttpNotFoundException;
28use Fisharebest\Webtrees\FlashMessages;
29use Fisharebest\Webtrees\GedcomRecord;
30use Fisharebest\Webtrees\Html;
31use Fisharebest\Webtrees\I18N;
32use Fisharebest\Webtrees\Individual;
33use Fisharebest\Webtrees\Media;
34use Fisharebest\Webtrees\Note;
35use Fisharebest\Webtrees\Repository;
36use Fisharebest\Webtrees\Services\TreeService;
37use Fisharebest\Webtrees\Source;
38use Fisharebest\Webtrees\Tree;
39use Illuminate\Database\Capsule\Manager as DB;
40use Illuminate\Database\Query\Expression;
41use Illuminate\Support\Collection;
42use Psr\Http\Message\ResponseInterface;
43use Psr\Http\Message\ServerRequestInterface;
44use Psr\Http\Server\RequestHandlerInterface;
45
46use function app;
47use function assert;
48use function date;
49use function redirect;
50use function response;
51use function route;
52use function view;
53
54/**
55 * Class SiteMapModule
56 */
57class SiteMapModule extends AbstractModule implements ModuleConfigInterface, RequestHandlerInterface
58{
59    use ModuleConfigTrait;
60
61    private const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
62    private const CACHE_LIFE         = 1;//209600; // Two weeks
63
64    /** @var TreeService */
65    private $tree_service;
66
67    /**
68     * TreesMenuModule constructor.
69     *
70     * @param TreeService $tree_service
71     */
72    public function __construct(TreeService $tree_service)
73    {
74        $this->tree_service = $tree_service;
75    }
76
77    /**
78     * Initialization.
79     *
80     * @return void
81     */
82    public function boot(): void
83    {
84        $router_container = app(RouterContainer::class);
85        assert($router_container instanceof RouterContainer);
86
87        $router_container->getMap()
88            ->get('sitemap-index', '/sitemap.xml', $this);
89
90        $router_container->getMap()
91            ->get('sitemap-file', '/sitemap-{tree}-{records}-{page}.xml', $this)
92            ->tokens([
93                'records' => 'INDI|NOTE|OBJE|REPO|SOUR',
94                'page'    => '\d+',
95            ]);
96    }
97
98    /**
99     * A sentence describing what this module does.
100     *
101     * @return string
102     */
103    public function description(): string
104    {
105        /* I18N: Description of the “Sitemaps” module */
106        return I18N::translate('Generate sitemap files for search engines.');
107    }
108
109    /**
110     * Should this module be enabled when it is first installed?
111     *
112     * @return bool
113     */
114    public function isEnabledByDefault(): bool
115    {
116        return false;
117    }
118
119    /**
120     * @param ServerRequestInterface $request
121     *
122     * @return ResponseInterface
123     */
124    public function getAdminAction(ServerRequestInterface $request): ResponseInterface
125    {
126        $this->layout = 'layouts/administration';
127
128        $sitemap_url = route('sitemap-index');
129
130        // This list comes from https://en.wikipedia.org/wiki/Sitemaps
131        $submit_urls = [
132            'Bing/Yahoo' => Html::url('https://www.bing.com/webmaster/ping.aspx', ['siteMap' => $sitemap_url]),
133            'Google'     => Html::url('https://www.google.com/webmasters/tools/ping', ['sitemap' => $sitemap_url]),
134        ];
135
136        return $this->viewResponse('modules/sitemap/config', [
137            'all_trees'   => $this->tree_service->all(),
138            'sitemap_url' => $sitemap_url,
139            'submit_urls' => $submit_urls,
140            'title'       => $this->title(),
141        ]);
142    }
143
144    /**
145     * How should this module be identified in the control panel, etc.?
146     *
147     * @return string
148     */
149    public function title(): string
150    {
151        /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */
152        return I18N::translate('Sitemaps');
153    }
154
155    /**
156     * @param ServerRequestInterface $request
157     *
158     * @return ResponseInterface
159     */
160    public function postAdminAction(ServerRequestInterface $request): ResponseInterface
161    {
162        $params = (array) $request->getParsedBody();
163
164        foreach ($this->tree_service->all() as $tree) {
165            $include_in_sitemap = (bool) ($params['sitemap' . $tree->id()] ?? false);
166            $tree->setPreference('include_in_sitemap', (string) $include_in_sitemap);
167        }
168
169        FlashMessages::addMessage(I18N::translate('The preferences for the module “%s” have been updated.', $this->title()), 'success');
170
171        return redirect($this->getConfigLink());
172    }
173
174    /**
175     * @param ServerRequestInterface $request
176     *
177     * @return ResponseInterface
178     */
179    public function handle(ServerRequestInterface $request): ResponseInterface
180    {
181        $route = $request->getAttribute('route');
182        assert($route instanceof Route);
183
184        if ($route->name === 'sitemap-index') {
185            return $this->siteMapIndex($request);
186        }
187
188        return $this->siteMapFile($request);
189    }
190
191    /**
192     * @param ServerRequestInterface $request
193     *
194     * @return ResponseInterface
195     */
196    private function siteMapIndex(ServerRequestInterface $request): ResponseInterface
197    {
198        $cache = app('cache.files');
199        assert($cache instanceof Cache);
200
201        $content = $cache->remember('sitemap.xml', function (): string {
202            // Which trees have sitemaps enabled?
203            $tree_ids = $this->tree_service->all()->filter(static function (Tree $tree): bool {
204                return $tree->getPreference('include_in_sitemap') === '1';
205            })->map(static function (Tree $tree): int {
206                return $tree->id();
207            });
208
209            $count_individuals = DB::table('individuals')
210                ->join('gedcom', 'i_file', '=', 'gedcom_id')
211                ->whereIn('gedcom_id', $tree_ids)
212                ->groupBy(['gedcom_id'])
213                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
214                ->pluck('total', 'gedcom_name');
215
216            $count_media = DB::table('media')
217                ->join('gedcom', 'm_file', '=', 'gedcom_id')
218                ->whereIn('gedcom_id', $tree_ids)
219                ->groupBy(['gedcom_id'])
220                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
221                ->pluck('total', 'gedcom_name');
222
223            $count_notes = DB::table('other')
224                ->join('gedcom', 'o_file', '=', 'gedcom_id')
225                ->whereIn('gedcom_id', $tree_ids)
226                ->where('o_type', '=', 'NOTE')
227                ->groupBy(['gedcom_id'])
228                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
229                ->pluck('total', 'gedcom_name');
230
231            $count_repositories = DB::table('other')
232                ->join('gedcom', 'o_file', '=', 'gedcom_id')
233                ->whereIn('gedcom_id', $tree_ids)
234                ->where('o_type', '=', 'REPO')
235                ->groupBy(['gedcom_id'])
236                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
237                ->pluck('total', 'gedcom_name');
238
239            $count_sources = DB::table('sources')
240                ->join('gedcom', 's_file', '=', 'gedcom_id')
241                ->whereIn('gedcom_id', $tree_ids)
242                ->groupBy(['gedcom_id'])
243                ->select([new Expression('COUNT(*) AS total'), 'gedcom_name'])
244                ->pluck('total', 'gedcom_name');
245
246            // Versions 2.0.1 and earlier of this module stored large amounts of data in the settings.
247            DB::table('module_setting')
248                ->where('module_name', '=', $this->name())
249                ->delete();
250
251            return view('modules/sitemap/sitemap-index.xml', [
252                'all_trees'          => $this->tree_service->all(),
253                'count_individuals'  => $count_individuals,
254                'count_media'        => $count_media,
255                'count_notes'        => $count_notes,
256                'count_repositories' => $count_repositories,
257                'count_sources'      => $count_sources,
258                'last_mod'           => date('Y-m-d'),
259                'records_per_volume' => self::RECORDS_PER_VOLUME,
260            ]);
261        }, self::CACHE_LIFE);
262
263        return response($content, StatusCodeInterface::STATUS_OK, [
264            'Content-Type' => 'application/xml',
265        ]);
266    }
267
268    /**
269     * @param ServerRequestInterface $request
270     *
271     * @return ResponseInterface
272     */
273    private function siteMapFile(ServerRequestInterface $request): ResponseInterface
274    {
275        $tree = $request->getAttribute('tree');
276        assert($tree instanceof Tree);
277
278        $records = $request->getAttribute('records');
279        $page    = $request->getAttribute('page');
280
281        if ($tree->getPreference('include_in_sitemap') !== '1') {
282            throw new HttpNotFoundException();
283        }
284
285        $cache = app('cache.files');
286        assert($cache instanceof Cache);
287
288        $cache_key = 'sitemap/' . $tree->id() . '/' . $records . '/' . $page . '.xml';
289
290        $content = $cache->remember($cache_key, function () use ($tree, $records, $page): string {
291            $records = $this->sitemapRecords($tree, $records, self::RECORDS_PER_VOLUME, self::RECORDS_PER_VOLUME * $page);
292
293            return view('modules/sitemap/sitemap-file.xml', [
294                'records' => $records,
295                'tree'    => $tree,
296            ]);
297        }, self::CACHE_LIFE);
298
299        return response($content, StatusCodeInterface::STATUS_OK, [
300            'Content-Type' => 'application/xml',
301        ]);
302    }
303
304    /**
305     * @param Tree   $tree
306     * @param string $type
307     * @param int    $limit
308     * @param int    $offset
309     *
310     * @return Collection<GedcomRecord>
311     */
312    private function sitemapRecords(Tree $tree, string $type, int $limit, int $offset): Collection
313    {
314        switch ($type) {
315            case Individual::RECORD_TYPE:
316                $records = $this->sitemapIndividuals($tree, $limit, $offset);
317                break;
318
319            case Media::RECORD_TYPE:
320                $records = $this->sitemapMedia($tree, $limit, $offset);
321                break;
322
323            case Note::RECORD_TYPE:
324                $records = $this->sitemapNotes($tree, $limit, $offset);
325                break;
326
327            case Repository::RECORD_TYPE:
328                $records = $this->sitemapRepositories($tree, $limit, $offset);
329                break;
330
331            case Source::RECORD_TYPE:
332                $records = $this->sitemapSources($tree, $limit, $offset);
333                break;
334
335            default:
336                throw new HttpNotFoundException('Invalid record type: ' . $type);
337        }
338
339        // Skip private records.
340        $records = $records->filter(static function (GedcomRecord $record): bool {
341            return $record->canShow(Auth::PRIV_PRIVATE);
342        });
343
344        return $records;
345    }
346
347    /**
348     * @param Tree $tree
349     * @param int  $limit
350     * @param int  $offset
351     *
352     * @return Collection<Individual>
353     */
354    private function sitemapIndividuals(Tree $tree, int $limit, int $offset): Collection
355    {
356        return DB::table('individuals')
357            ->where('i_file', '=', $tree->id())
358            ->orderBy('i_id')
359            ->skip($offset)
360            ->take($limit)
361            ->get()
362            ->map(Individual::rowMapper($tree));
363    }
364
365    /**
366     * @param Tree $tree
367     * @param int  $limit
368     * @param int  $offset
369     *
370     * @return Collection<Media>
371     */
372    private function sitemapMedia(Tree $tree, int $limit, int $offset): Collection
373    {
374        return DB::table('media')
375            ->where('m_file', '=', $tree->id())
376            ->orderBy('m_id')
377            ->skip($offset)
378            ->take($limit)
379            ->get()
380            ->map(Media::rowMapper($tree));
381    }
382
383    /**
384     * @param Tree $tree
385     * @param int  $limit
386     * @param int  $offset
387     *
388     * @return Collection<Note>
389     */
390    private function sitemapNotes(Tree $tree, int $limit, int $offset): Collection
391    {
392        return DB::table('other')
393            ->where('o_file', '=', $tree->id())
394            ->where('o_type', '=', 'NOTE')
395            ->orderBy('o_id')
396            ->skip($offset)
397            ->take($limit)
398            ->get()
399            ->map(Note::rowMapper($tree));
400    }
401
402    /**
403     * @param Tree $tree
404     * @param int  $limit
405     * @param int  $offset
406     *
407     * @return Collection<Repository>
408     */
409    private function sitemapRepositories(Tree $tree, int $limit, int $offset): Collection
410    {
411        return DB::table('other')
412            ->where('o_file', '=', $tree->id())
413            ->where('o_type', '=', 'REPO')
414            ->orderBy('o_id')
415            ->skip($offset)
416            ->take($limit)
417            ->get()
418            ->map(Repository::rowMapper($tree));
419    }
420
421    /**
422     * @param Tree $tree
423     * @param int  $limit
424     * @param int  $offset
425     *
426     * @return Collection<Source>
427     */
428    private function sitemapSources(Tree $tree, int $limit, int $offset): Collection
429    {
430        return DB::table('sources')
431            ->where('s_file', '=', $tree->id())
432            ->orderBy('s_id')
433            ->skip($offset)
434            ->take($limit)
435            ->get()
436            ->map(Source::rowMapper($tree));
437    }
438}
439