xref: /webtrees/app/Module/SiteMapModule.php (revision 8d68cabe4cf02d6d8507faf4f53889852be0b6aa)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2017 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16namespace Fisharebest\Webtrees\Module;
17
18use Fisharebest\Webtrees\Auth;
19use Fisharebest\Webtrees\Bootstrap4;
20use Fisharebest\Webtrees\Controller\PageController;
21use Fisharebest\Webtrees\Database;
22use Fisharebest\Webtrees\Filter;
23use Fisharebest\Webtrees\Html;
24use Fisharebest\Webtrees\I18N;
25use Fisharebest\Webtrees\Individual;
26use Fisharebest\Webtrees\Media;
27use Fisharebest\Webtrees\Note;
28use Fisharebest\Webtrees\Repository;
29use Fisharebest\Webtrees\Source;
30use Fisharebest\Webtrees\Tree;
31
32/**
33 * Class SiteMapModule
34 */
35class SiteMapModule extends AbstractModule implements ModuleConfigInterface {
36	const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
37	const CACHE_LIFE         = 1209600; // Two weeks
38
39	/** {@inheritdoc} */
40	public function getTitle() {
41		return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps');
42	}
43
44	/** {@inheritdoc} */
45	public function getDescription() {
46		return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.');
47	}
48
49	/**
50	 * This is a general purpose hook, allowing modules to respond to routes
51	 * of the form module.php?mod=FOO&mod_action=BAR
52	 *
53	 * @param string $mod_action
54	 */
55	public function modAction($mod_action) {
56		switch ($mod_action) {
57		case 'admin':
58			$this->admin();
59			break;
60		case 'generate':
61			$this->generate(Filter::get('file'));
62			break;
63		default:
64			http_response_code(404);
65		}
66	}
67
68	/**
69	 * Generate an XML file.
70	 *
71	 * @param string $file
72	 */
73	private function generate($file) {
74		if ($file == 'sitemap.xml') {
75			$this->generateIndex();
76		} elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) {
77			$this->generateFile($match[1], $match[2], $match[3]);
78		} else {
79			http_response_code(404);
80		}
81	}
82
83	/**
84	 * The index file contains references to all the other files.
85	 * These files are the same for visitors/users/admins.
86	 */
87	private function generateIndex() {
88		// Check the cache
89		$timestamp = (int) $this->getPreference('sitemap.timestamp');
90		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) {
91			$data = $this->getPreference('sitemap.xml');
92		} else {
93			$data    = '';
94			$lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>';
95			foreach (Tree::getAll() as $tree) {
96				if ($tree->getPreference('include_in_sitemap')) {
97					$n = Database::prepare(
98						"SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id"
99					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
100					for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
101						$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
102					}
103					$n = Database::prepare(
104						"SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id"
105					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
106					if ($n) {
107						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
108							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
109						}
110					}
111					$n = Database::prepare(
112						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'"
113					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
114					if ($n) {
115						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
116							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
117						}
118					}
119					$n = Database::prepare(
120						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'"
121					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
122					if ($n) {
123						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
124							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
125						}
126					}
127					$n = Database::prepare(
128						"SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id"
129					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
130					if ($n) {
131						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
132							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
133						}
134					}
135				}
136			}
137			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL;
138			// Cache this data.
139			$this->setPreference('sitemap.xml', $data);
140			$this->setPreference('sitemap.timestamp', WT_TIMESTAMP);
141		}
142		header('Content-Type: application/xml');
143		header('Content-Length: ' . strlen($data));
144		echo $data;
145	}
146
147	/**
148	 * A separate file for each family tree and each record type.
149	 * These files depend on access levels, so only cache for visitors.
150	 *
151	 * @param int    $ged_id
152	 * @param string $rec_type
153	 * @param string $volume
154	 */
155	private function generateFile($ged_id, $rec_type, $volume) {
156		$tree = Tree::findById($ged_id);
157		// Check the cache
158		$timestamp = (int) $this->getPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp');
159		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) {
160			$data = $this->getPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml');
161		} else {
162			$data    = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&amp;ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL;
163			$records = [];
164			switch ($rec_type) {
165			case 'i':
166				$rows = Database::prepare(
167					"SELECT i_id AS xref, i_gedcom AS gedcom" .
168					" FROM `##individuals`" .
169					" WHERE i_file = :tree_id" .
170					" ORDER BY i_id" .
171					" LIMIT :limit OFFSET :offset"
172				)->execute([
173					'tree_id' => $ged_id,
174					'limit'   => self::RECORDS_PER_VOLUME,
175					'offset'  => self::RECORDS_PER_VOLUME * $volume,
176				])->fetchAll();
177				foreach ($rows as $row) {
178					$records[] = Individual::getInstance($row->xref, $tree, $row->gedcom);
179				}
180				break;
181			case 's':
182				$rows = Database::prepare(
183					"SELECT s_id AS xref, s_gedcom AS gedcom" .
184					" FROM `##sources`" .
185					" WHERE s_file = :tree_id" .
186					" ORDER BY s_id" .
187					" LIMIT :limit OFFSET :offset"
188				)->execute([
189					'tree_id' => $ged_id,
190					'limit'   => self::RECORDS_PER_VOLUME,
191					'offset'  => self::RECORDS_PER_VOLUME * $volume,
192				])->fetchAll();
193				foreach ($rows as $row) {
194					$records[] = Source::getInstance($row->xref, $tree, $row->gedcom);
195				}
196				break;
197			case 'r':
198				$rows = Database::prepare(
199					"SELECT o_id AS xref, o_gedcom AS gedcom" .
200					" FROM `##other`" .
201					" WHERE o_file = :tree_id AND o_type = 'REPO'" .
202					" ORDER BY o_id" .
203					" LIMIT :limit OFFSET :offset"
204				)->execute([
205					'tree_id' => $ged_id,
206					'limit'   => self::RECORDS_PER_VOLUME,
207					'offset'  => self::RECORDS_PER_VOLUME * $volume,
208				])->fetchAll();
209				foreach ($rows as $row) {
210					$records[] = Repository::getInstance($row->xref, $tree, $row->gedcom);
211				}
212				break;
213			case 'n':
214				$rows = Database::prepare(
215					"SELECT o_id AS xref, o_gedcom AS gedcom" .
216					" FROM `##other`" .
217					" WHERE o_file = :tree_id AND o_type = 'NOTE'" .
218					" ORDER BY o_id" .
219					" LIMIT :limit OFFSET :offset"
220				)->execute([
221					'tree_id' => $ged_id,
222					'limit'   => self::RECORDS_PER_VOLUME,
223					'offset'  => self::RECORDS_PER_VOLUME * $volume,
224				])->fetchAll();
225				foreach ($rows as $row) {
226					$records[] = Note::getInstance($row->xref, $tree, $row->gedcom);
227				}
228				break;
229			case 'm':
230				$rows = Database::prepare(
231					"SELECT m_id AS xref, m_gedcom AS gedcom" .
232					" FROM `##media`" .
233					" WHERE m_file = :tree_id" .
234					" ORDER BY m_id" .
235					" LIMIT :limit OFFSET :offset"
236				)->execute([
237					'tree_id' => $ged_id,
238					'limit'   => self::RECORDS_PER_VOLUME,
239					'offset'  => self::RECORDS_PER_VOLUME * $volume,
240				])->fetchAll();
241				foreach ($rows as $row) {
242					$records[] = Media::getInstance($row->xref, $tree, $row->gedcom);
243				}
244				break;
245			}
246			foreach ($records as $record) {
247				if ($record->canShowName()) {
248					$data .= '<url>';
249					$data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>';
250					$chan = $record->getFirstFact('CHAN');
251					if ($chan) {
252						$date = $chan->getDate();
253						if ($date->isOK()) {
254							$data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>';
255						}
256					}
257					$data .= '</url>' . PHP_EOL;
258				}
259			}
260			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL;
261			// Cache this data - but only for visitors, as we don’t want
262			// visitors to see data created by signed-in users.
263			if (!Auth::check()) {
264				$this->setPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data);
265				$this->setPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP);
266			}
267		}
268		header('Content-Type: application/xml');
269		header('Content-Length: ' . strlen($data));
270		echo $data;
271	}
272
273	/**
274	 * Edit the configuration
275	 */
276	private function admin() {
277		$controller = new PageController;
278		$controller
279			->restrictAccess(Auth::isAdmin())
280			->setPageTitle($this->getTitle())
281			->pageHeader();
282
283		// Save the updated preferences
284		if (Filter::post('action') == 'save') {
285			foreach (Tree::getAll() as $tree) {
286				$tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId()));
287			}
288			// Clear cache and force files to be regenerated
289			Database::prepare(
290				"DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'"
291			)->execute();
292		}
293
294		$include_any = false;
295
296		echo Bootstrap4::breadcrumbs([
297			'admin.php'         => I18N::translate('Control panel'),
298			'admin_modules.php' => I18N::translate('Module administration'),
299		], $controller->getPageTitle());
300		?>
301
302		<h1><?= $controller->getPageTitle() ?></h1>
303		<?php
304
305		echo
306		'<p>',
307			/* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */
308			I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') .
309			'</p>',
310		'<p>', /* I18N: Label for a configuration option */ I18N::translate('Which family trees should be included in the sitemaps'), '</p>',
311			'<form method="post" action="module.php?mod=' . $this->getName() . '&amp;mod_action=admin">',
312		'<input type="hidden" name="action" value="save">';
313		foreach (Tree::getAll() as $tree) {
314			echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" ';
315			if ($tree->getPreference('include_in_sitemap')) {
316				echo 'checked';
317				$include_any = true;
318			}
319			echo '>', $tree->getTitleHtml(), '</label></div>';
320		}
321		echo
322		'<input type="submit" value="', I18N::translate('save'), '">',
323		'</form>',
324		'<hr>';
325
326		if ($include_any) {
327			$site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap.xml';
328			$site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml');
329			echo
330				'<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>',
331				'<pre>Sitemap: ', $site_map_url1, '</pre>',
332				'<hr>',
333				'<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>',
334				'<ul>',
335				// This list comes from http://en.wikipedia.org/wiki/Sitemaps
336				'<li><a href="https://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>',
337				'<li><a href="https://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>',
338				'</ul>';
339		}
340	}
341
342	/** {@inheritdoc} */
343	public function getConfigLink() {
344		return Html::url('module.php', [
345			'mod'        => $this->getName(),
346			'mod_action' => 'admin',
347		]);
348	}
349}
350