xref: /webtrees/app/Module/SiteMapModule.php (revision 13abd6f3a37322f885d85df150e105d27ad81f8d)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2016 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16namespace Fisharebest\Webtrees\Module;
17
18use Fisharebest\Webtrees\Auth;
19use Fisharebest\Webtrees\Controller\PageController;
20use Fisharebest\Webtrees\Database;
21use Fisharebest\Webtrees\Filter;
22use Fisharebest\Webtrees\I18N;
23use Fisharebest\Webtrees\Individual;
24use Fisharebest\Webtrees\Media;
25use Fisharebest\Webtrees\Note;
26use Fisharebest\Webtrees\Repository;
27use Fisharebest\Webtrees\Source;
28use Fisharebest\Webtrees\Tree;
29
30/**
31 * Class SiteMapModule
32 */
33class SiteMapModule extends AbstractModule implements ModuleConfigInterface {
34	const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
35	const CACHE_LIFE         = 1209600; // Two weeks
36
37	/** {@inheritdoc} */
38	public function getTitle() {
39		return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps');
40	}
41
42	/** {@inheritdoc} */
43	public function getDescription() {
44		return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.');
45	}
46
47	/**
48	 * This is a general purpose hook, allowing modules to respond to routes
49	 * of the form module.php?mod=FOO&mod_action=BAR
50	 *
51	 * @param string $mod_action
52	 */
53	public function modAction($mod_action) {
54		switch ($mod_action) {
55		case 'admin':
56			$this->admin();
57			break;
58		case 'generate':
59			$this->generate(Filter::get('file'));
60			break;
61		default:
62			http_response_code(404);
63		}
64	}
65
66	/**
67	 * Generate an XML file.
68	 *
69	 * @param string $file
70	 */
71	private function generate($file) {
72		if ($file == 'sitemap.xml') {
73			$this->generateIndex();
74		} elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) {
75			$this->generateFile($match[1], $match[2], $match[3]);
76		} else {
77			http_response_code(404);
78		}
79	}
80
81	/**
82	 * The index file contains references to all the other files.
83	 * These files are the same for visitors/users/admins.
84	 */
85	private function generateIndex() {
86		// Check the cache
87		$timestamp = $this->getSetting('sitemap.timestamp');
88		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) {
89			$data = $this->getSetting('sitemap.xml');
90		} else {
91			$data    = '';
92			$lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>';
93			foreach (Tree::getAll() as $tree) {
94				if ($tree->getPreference('include_in_sitemap')) {
95					$n = Database::prepare(
96						"SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id"
97					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
98					for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
99						$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
100					}
101					$n = Database::prepare(
102						"SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id"
103					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
104					if ($n) {
105						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
106							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
107						}
108					}
109					$n = Database::prepare(
110						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'"
111					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
112					if ($n) {
113						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
114							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
115						}
116					}
117					$n = Database::prepare(
118						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'"
119					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
120					if ($n) {
121						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
122							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
123						}
124					}
125					$n = Database::prepare(
126						"SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id"
127					)->execute(['tree_id' => $tree->getTreeId()])->fetchOne();
128					if ($n) {
129						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
130							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
131						}
132					}
133				}
134			}
135			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL;
136			// Cache this data.
137			$this->setSetting('sitemap.xml', $data);
138			$this->setSetting('sitemap.timestamp', WT_TIMESTAMP);
139		}
140		header('Content-Type: application/xml');
141		header('Content-Length: ' . strlen($data));
142		echo $data;
143	}
144
145	/**
146	 * A separate file for each family tree and each record type.
147	 * These files depend on access levels, so only cache for visitors.
148	 *
149	 * @param int    $ged_id
150	 * @param string $rec_type
151	 * @param string $volume
152	 */
153	private function generateFile($ged_id, $rec_type, $volume) {
154		$tree = Tree::findById($ged_id);
155		// Check the cache
156		$timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp');
157		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) {
158			$data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml');
159		} else {
160			$data    = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&amp;ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL;
161			$records = [];
162			switch ($rec_type) {
163			case 'i':
164				$rows = Database::prepare(
165					"SELECT i_id AS xref, i_gedcom AS gedcom" .
166					" FROM `##individuals`" .
167					" WHERE i_file = :tree_id" .
168					" ORDER BY i_id" .
169					" LIMIT :limit OFFSET :offset"
170				)->execute([
171					'tree_id' => $ged_id,
172					'limit'   => self::RECORDS_PER_VOLUME,
173					'offset'  => self::RECORDS_PER_VOLUME * $volume,
174				])->fetchAll();
175				foreach ($rows as $row) {
176					$records[] = Individual::getInstance($row->xref, $tree, $row->gedcom);
177				}
178				break;
179			case 's':
180				$rows = Database::prepare(
181					"SELECT s_id AS xref, s_gedcom AS gedcom" .
182					" FROM `##sources`" .
183					" WHERE s_file = :tree_id" .
184					" ORDER BY s_id" .
185					" LIMIT :limit OFFSET :offset"
186				)->execute([
187					'tree_id' => $ged_id,
188					'limit'   => self::RECORDS_PER_VOLUME,
189					'offset'  => self::RECORDS_PER_VOLUME * $volume,
190				])->fetchAll();
191				foreach ($rows as $row) {
192					$records[] = Source::getInstance($row->xref, $tree, $row->gedcom);
193				}
194				break;
195			case 'r':
196				$rows = Database::prepare(
197					"SELECT o_id AS xref, o_gedcom AS gedcom" .
198					" FROM `##other`" .
199					" WHERE o_file = :tree_id AND o_type = 'REPO'" .
200					" ORDER BY o_id" .
201					" LIMIT :limit OFFSET :offset"
202				)->execute([
203					'tree_id' => $ged_id,
204					'limit'   => self::RECORDS_PER_VOLUME,
205					'offset'  => self::RECORDS_PER_VOLUME * $volume,
206				])->fetchAll();
207				foreach ($rows as $row) {
208					$records[] = Repository::getInstance($row->xref, $tree, $row->gedcom);
209				}
210				break;
211			case 'n':
212				$rows = Database::prepare(
213					"SELECT o_id AS xref, o_gedcom AS gedcom" .
214					" FROM `##other`" .
215					" WHERE o_file = :tree_id AND o_type = 'NOTE'" .
216					" ORDER BY o_id" .
217					" LIMIT :limit OFFSET :offset"
218				)->execute([
219					'tree_id' => $ged_id,
220					'limit'   => self::RECORDS_PER_VOLUME,
221					'offset'  => self::RECORDS_PER_VOLUME * $volume,
222				])->fetchAll();
223				foreach ($rows as $row) {
224					$records[] = Note::getInstance($row->xref, $tree, $row->gedcom);
225				}
226				break;
227			case 'm':
228				$rows = Database::prepare(
229					"SELECT m_id AS xref, m_gedcom AS gedcom" .
230					" FROM `##media`" .
231					" WHERE m_file = :tree_id" .
232					" ORDER BY m_id" .
233					" LIMIT :limit OFFSET :offset"
234				)->execute([
235					'tree_id' => $ged_id,
236					'limit'   => self::RECORDS_PER_VOLUME,
237					'offset'  => self::RECORDS_PER_VOLUME * $volume,
238				])->fetchAll();
239				foreach ($rows as $row) {
240					$records[] = Media::getInstance($row->xref, $tree, $row->gedcom);
241				}
242				break;
243			}
244			foreach ($records as $record) {
245				if ($record->canShowName()) {
246					$data .= '<url>';
247					$data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>';
248					$chan = $record->getFirstFact('CHAN');
249					if ($chan) {
250						$date = $chan->getDate();
251						if ($date->isOK()) {
252							$data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>';
253						}
254					}
255					$data .= '</url>' . PHP_EOL;
256				}
257			}
258			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL;
259			// Cache this data - but only for visitors, as we don’t want
260			// visitors to see data created by signed-in users.
261			if (!Auth::check()) {
262				$this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data);
263				$this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP);
264			}
265		}
266		header('Content-Type: application/xml');
267		header('Content-Length: ' . strlen($data));
268		echo $data;
269	}
270
271	/**
272	 * Edit the configuration
273	 */
274	private function admin() {
275		$controller = new PageController;
276		$controller
277			->restrictAccess(Auth::isAdmin())
278			->setPageTitle($this->getTitle())
279			->pageHeader();
280
281		// Save the updated preferences
282		if (Filter::post('action') == 'save') {
283			foreach (Tree::getAll() as $tree) {
284				$tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId()));
285			}
286			// Clear cache and force files to be regenerated
287			Database::prepare(
288				"DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'"
289			)->execute();
290		}
291
292		$include_any = false;
293
294		?>
295		<ol class="breadcrumb small">
296			<li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li>
297			<li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li>
298			<li class="active"><?php echo $controller->getPageTitle(); ?></li>
299		</ol>
300		<h1><?php echo $controller->getPageTitle(); ?></h1>
301		<?php
302
303		echo
304		'<p>',
305			/* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */
306			I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') .
307			'</p>',
308		'<p>', /* I18N: Label for a configuration option */ I18N::translate('Which family trees should be included in the sitemaps'), '</p>',
309			'<form method="post" action="module.php?mod=' . $this->getName() . '&amp;mod_action=admin">',
310		'<input type="hidden" name="action" value="save">';
311		foreach (Tree::getAll() as $tree) {
312			echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" ';
313			if ($tree->getPreference('include_in_sitemap')) {
314				echo 'checked';
315				$include_any = true;
316			}
317			echo '>', $tree->getTitleHtml(), '</label></div>';
318		}
319		echo
320		'<input type="submit" value="', I18N::translate('save'), '">',
321		'</form>',
322		'<hr>';
323
324		if ($include_any) {
325			$site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap.xml';
326			$site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml');
327			echo
328				'<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>',
329				'<pre>Sitemap: ', $site_map_url1, '</pre>',
330				'<hr>',
331				'<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>',
332				'<ul>',
333				// This list comes from http://en.wikipedia.org/wiki/Sitemaps
334				'<li><a href="http://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>',
335				'<li><a href="http://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>',
336				'</ul>';
337
338		}
339	}
340
341	/** {@inheritdoc} */
342	public function getConfigLink() {
343		return 'module.php?mod=' . $this->getName() . '&amp;mod_action=admin';
344	}
345}
346