xref: /webtrees/app/Module/SiteMapModule.php (revision 15d603e7c7c15d20f055d3d9c38d6b133453c5be)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2017 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16namespace Fisharebest\Webtrees\Module;
17
18use Fisharebest\Webtrees\Auth;
19use Fisharebest\Webtrees\Bootstrap4;
20use Fisharebest\Webtrees\Controller\PageController;
21use Fisharebest\Webtrees\Database;
22use Fisharebest\Webtrees\Filter;
23use Fisharebest\Webtrees\I18N;
24use Fisharebest\Webtrees\Individual;
25use Fisharebest\Webtrees\Media;
26use Fisharebest\Webtrees\Note;
27use Fisharebest\Webtrees\Repository;
28use Fisharebest\Webtrees\Source;
29use Fisharebest\Webtrees\Tree;
30
31/**
32 * Class SiteMapModule
33 */
34class SiteMapModule extends AbstractModule implements ModuleConfigInterface {
35	const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
36	const CACHE_LIFE         = 1209600; // Two weeks
37
38	/** {@inheritdoc} */
39	public function getTitle() {
40		return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps');
41	}
42
43	/** {@inheritdoc} */
44	public function getDescription() {
45		return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.');
46	}
47
48	/**
49	 * This is a general purpose hook, allowing modules to respond to routes
50	 * of the form module.php?mod=FOO&mod_action=BAR
51	 *
52	 * @param string $mod_action
53	 */
54	public function modAction($mod_action) {
55		switch ($mod_action) {
56		case 'admin':
57			$this->admin();
58			break;
59		case 'generate':
60			$this->generate(Filter::get('file'));
61			break;
62		default:
63			http_response_code(404);
64		}
65	}
66
67	/**
68	 * Generate an XML file.
69	 *
70	 * @param string $file
71	 */
72	private function generate($file) {
73		if ($file == 'sitemap.xml') {
74			$this->generateIndex();
75		} elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) {
76			$this->generateFile($match[1], $match[2], $match[3]);
77		} else {
78			http_response_code(404);
79		}
80	}
81
82	/**
83	 * The index file contains references to all the other files.
84	 * These files are the same for visitors/users/admins.
85	 */
86	private function generateIndex() {
87		// Check the cache
88		$timestamp = (int) $this->getPreference('sitemap.timestamp');
89		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) {
90			$data = $this->getPreference('sitemap.xml');
91		} else {
92			$data    = '';
93			$lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>';
94			foreach (Tree::getAll() as $tree) {
95				if ($tree->getPreference('include_in_sitemap')) {
96					$n = Database::prepare(
97						"SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id"
98					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
99					for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
100						$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
101					}
102					$n = Database::prepare(
103						"SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id"
104					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
105					if ($n) {
106						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
107							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
108						}
109					}
110					$n = Database::prepare(
111						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'"
112					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
113					if ($n) {
114						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
115							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
116						}
117					}
118					$n = Database::prepare(
119						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'"
120					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
121					if ($n) {
122						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
123							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
124						}
125					}
126					$n = Database::prepare(
127						"SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id"
128					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
129					if ($n) {
130						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
131							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
132						}
133					}
134				}
135			}
136			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL;
137			// Cache this data.
138			$this->setPreference('sitemap.xml', $data);
139			$this->setPreference('sitemap.timestamp', WT_TIMESTAMP);
140		}
141		header('Content-Type: application/xml');
142		header('Content-Length: ' . strlen($data));
143		echo $data;
144	}
145
146	/**
147	 * A separate file for each family tree and each record type.
148	 * These files depend on access levels, so only cache for visitors.
149	 *
150	 * @param int    $ged_id
151	 * @param string $rec_type
152	 * @param string $volume
153	 */
154	private function generateFile($ged_id, $rec_type, $volume) {
155		$tree = Tree::findById($ged_id);
156		// Check the cache
157		$timestamp = (int) $this->getPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp');
158		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) {
159			$data = $this->getPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml');
160		} else {
161			$data    = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&amp;ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL;
162			$records = [];
163			switch ($rec_type) {
164			case 'i':
165				$rows = Database::prepare(
166					"SELECT i_id AS xref, i_gedcom AS gedcom" .
167					" FROM `##individuals`" .
168					" WHERE i_file = :tree_id" .
169					" ORDER BY i_id" .
170					" LIMIT :limit OFFSET :offset"
171				)->execute([
172					'tree_id' => $ged_id,
173					'limit'   => self::RECORDS_PER_VOLUME,
174					'offset'  => self::RECORDS_PER_VOLUME * $volume,
175				])->fetchAll();
176				foreach ($rows as $row) {
177					$records[] = Individual::getInstance($row->xref, $tree, $row->gedcom);
178				}
179				break;
180			case 's':
181				$rows = Database::prepare(
182					"SELECT s_id AS xref, s_gedcom AS gedcom" .
183					" FROM `##sources`" .
184					" WHERE s_file = :tree_id" .
185					" ORDER BY s_id" .
186					" LIMIT :limit OFFSET :offset"
187				)->execute([
188					'tree_id' => $ged_id,
189					'limit'   => self::RECORDS_PER_VOLUME,
190					'offset'  => self::RECORDS_PER_VOLUME * $volume,
191				])->fetchAll();
192				foreach ($rows as $row) {
193					$records[] = Source::getInstance($row->xref, $tree, $row->gedcom);
194				}
195				break;
196			case 'r':
197				$rows = Database::prepare(
198					"SELECT o_id AS xref, o_gedcom AS gedcom" .
199					" FROM `##other`" .
200					" WHERE o_file = :tree_id AND o_type = 'REPO'" .
201					" ORDER BY o_id" .
202					" LIMIT :limit OFFSET :offset"
203				)->execute([
204					'tree_id' => $ged_id,
205					'limit'   => self::RECORDS_PER_VOLUME,
206					'offset'  => self::RECORDS_PER_VOLUME * $volume,
207				])->fetchAll();
208				foreach ($rows as $row) {
209					$records[] = Repository::getInstance($row->xref, $tree, $row->gedcom);
210				}
211				break;
212			case 'n':
213				$rows = Database::prepare(
214					"SELECT o_id AS xref, o_gedcom AS gedcom" .
215					" FROM `##other`" .
216					" WHERE o_file = :tree_id AND o_type = 'NOTE'" .
217					" ORDER BY o_id" .
218					" LIMIT :limit OFFSET :offset"
219				)->execute([
220					'tree_id' => $ged_id,
221					'limit'   => self::RECORDS_PER_VOLUME,
222					'offset'  => self::RECORDS_PER_VOLUME * $volume,
223				])->fetchAll();
224				foreach ($rows as $row) {
225					$records[] = Note::getInstance($row->xref, $tree, $row->gedcom);
226				}
227				break;
228			case 'm':
229				$rows = Database::prepare(
230					"SELECT m_id AS xref, m_gedcom AS gedcom" .
231					" FROM `##media`" .
232					" WHERE m_file = :tree_id" .
233					" ORDER BY m_id" .
234					" LIMIT :limit OFFSET :offset"
235				)->execute([
236					'tree_id' => $ged_id,
237					'limit'   => self::RECORDS_PER_VOLUME,
238					'offset'  => self::RECORDS_PER_VOLUME * $volume,
239				])->fetchAll();
240				foreach ($rows as $row) {
241					$records[] = Media::getInstance($row->xref, $tree, $row->gedcom);
242				}
243				break;
244			}
245			foreach ($records as $record) {
246				if ($record->canShowName()) {
247					$data .= '<url>';
248					$data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>';
249					$chan = $record->getFirstFact('CHAN');
250					if ($chan) {
251						$date = $chan->getDate();
252						if ($date->isOK()) {
253							$data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>';
254						}
255					}
256					$data .= '</url>' . PHP_EOL;
257				}
258			}
259			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL;
260			// Cache this data - but only for visitors, as we don’t want
261			// visitors to see data created by signed-in users.
262			if (!Auth::check()) {
263				$this->setPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data);
264				$this->setPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP);
265			}
266		}
267		header('Content-Type: application/xml');
268		header('Content-Length: ' . strlen($data));
269		echo $data;
270	}
271
272	/**
273	 * Edit the configuration
274	 */
275	private function admin() {
276		$controller = new PageController;
277		$controller
278			->restrictAccess(Auth::isAdmin())
279			->setPageTitle($this->getTitle())
280			->pageHeader();
281
282		// Save the updated preferences
283		if (Filter::post('action') == 'save') {
284			foreach (Tree::getAll() as $tree) {
285				$tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId()));
286			}
287			// Clear cache and force files to be regenerated
288			Database::prepare(
289				"DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'"
290			)->execute();
291		}
292
293		$include_any = false;
294
295		echo Bootstrap4::breadcrumbs([
296			'admin.php'         => I18N::translate('Control panel'),
297			'admin_modules.php' => I18N::translate('Module administration'),
298		], $controller->getPageTitle());
299		?>
300
301		<h1><?= $controller->getPageTitle() ?></h1>
302		<?php
303
304		echo
305		'<p>',
306			/* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */
307			I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') .
308			'</p>',
309		'<p>', /* I18N: Label for a configuration option */ I18N::translate('Which family trees should be included in the sitemaps'), '</p>',
310			'<form method="post" action="module.php?mod=' . $this->getName() . '&amp;mod_action=admin">',
311		'<input type="hidden" name="action" value="save">';
312		foreach (Tree::getAll() as $tree) {
313			echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" ';
314			if ($tree->getPreference('include_in_sitemap')) {
315				echo 'checked';
316				$include_any = true;
317			}
318			echo '>', $tree->getTitleHtml(), '</label></div>';
319		}
320		echo
321		'<input type="submit" value="', I18N::translate('save'), '">',
322		'</form>',
323		'<hr>';
324
325		if ($include_any) {
326			$site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap.xml';
327			$site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml');
328			echo
329				'<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>',
330				'<pre>Sitemap: ', $site_map_url1, '</pre>',
331				'<hr>',
332				'<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>',
333				'<ul>',
334				// This list comes from http://en.wikipedia.org/wiki/Sitemaps
335				'<li><a href="https://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>',
336				'<li><a href="https://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>',
337				'</ul>';
338
339		}
340	}
341
342	/** {@inheritdoc} */
343	public function getConfigLink() {
344		return 'module.php?mod=' . $this->getName() . '&amp;mod_action=admin';
345	}
346}
347