xref: /webtrees/app/Module/SiteMapModule.php (revision 24ec66ce7e77188cd2495b0f8d4dd0ae6e8c9c52)
1<?php
2namespace Fisharebest\Webtrees;
3
4/**
5 * webtrees: online genealogy
6 * Copyright (C) 2015 webtrees development team
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19use Zend_Session;
20
21/**
22 * Class SiteMapModule
23 */
24class SiteMapModule extends Module implements ModuleConfigInterface {
25	const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
26	const CACHE_LIFE = 1209600; // Two weeks
27
28	/** {@inheritdoc} */
29	public function getTitle() {
30		return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps');
31	}
32
33	/** {@inheritdoc} */
34	public function getDescription() {
35		return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.');
36	}
37
38	/** {@inheritdoc} */
39	public function modAction($mod_action) {
40		switch ($mod_action) {
41		case 'admin':
42			$this->admin();
43			break;
44		case 'generate':
45			Zend_Session::writeClose();
46			$this->generate(Filter::get('file'));
47			break;
48		default:
49			http_response_code(404);
50		}
51	}
52
53	/**
54	 * @param string $file
55	 */
56	private function generate($file) {
57		if ($file == 'sitemap.xml') {
58			$this->generateIndex();
59		} elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) {
60			$this->generateFile($match[1], $match[2], $match[3]);
61		} else {
62			http_response_code(404);
63		}
64	}
65
66	/**
67	 * The index file contains references to all the other files.
68	 * These files are the same for visitors/users/admins.
69	 */
70	private function generateIndex() {
71		// Check the cache
72		$timestamp = $this->getSetting('sitemap.timestamp');
73		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) {
74			$data = $this->getSetting('sitemap.xml');
75		} else {
76			$data = '';
77			$lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>';
78			foreach (Tree::getAll() as $tree) {
79				if ($tree->getPreference('include_in_sitemap')) {
80					$n = Database::prepare(
81						"SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id"
82					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
83					for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
84						$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
85					}
86					$n = Database::prepare(
87						"SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id"
88					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
89					if ($n) {
90						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
91							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
92						}
93					}
94					$n = Database::prepare(
95						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'"
96					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
97					if ($n) {
98						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
99							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
100						}
101					}
102					$n = Database::prepare(
103						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'"
104					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
105					if ($n) {
106						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
107							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
108						}
109					}
110					$n = Database::prepare(
111						"SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id"
112					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
113					if ($n) {
114						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
115							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
116						}
117					}
118				}
119			}
120			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL;
121			// Cache this data.
122			$this->setSetting('sitemap.xml', $data);
123			$this->setSetting('sitemap.timestamp', WT_TIMESTAMP);
124		}
125		header('Content-Type: application/xml');
126		header('Content-Length: ' . strlen($data));
127		echo $data;
128	}
129
130	/**
131	 * A separate file for each family tree and each record type.
132	 * These files depend on access levels, so only cache for visitors.
133	 *
134	 * @param integer $ged_id
135	 * @param string $rec_type
136	 * @param string $volume
137	 */
138	private function generateFile($ged_id, $rec_type, $volume) {
139		$tree = Tree::findById($ged_id);
140		// Check the cache
141		$timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp');
142		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) {
143			$data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml');
144		} else {
145			$tree = Tree::findById($ged_id);
146			$data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&amp;ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL;
147			$records = array();
148			switch ($rec_type) {
149			case 'i':
150				$rows = Database::prepare(
151					"SELECT i_id AS xref, i_gedcom AS gedcom" .
152					" FROM `##individuals`" .
153					" WHERE i_file = :tree_id" .
154					" ORDER BY i_id" .
155					" LIMIT :limit OFFSET :offset"
156				)->execute(array(
157					'tree_id' => $ged_id,
158					'limit'   => self::RECORDS_PER_VOLUME,
159					'offset'  => self::RECORDS_PER_VOLUME * $volume,
160				))->fetchAll();
161				foreach ($rows as $row) {
162					$records[] = Individual::getInstance($row->xref, $tree, $row->gedcom);
163				}
164				break;
165			case 's':
166				$rows = Database::prepare(
167					"SELECT s_id AS xref, s_gedcom AS gedcom" .
168					" FROM `##sources`" .
169					" WHERE s_file = :tree_id" .
170					" ORDER BY s_id" .
171					" LIMIT :limit OFFSET :offset"
172				)->execute(array(
173					'tree_id' => $ged_id,
174					'limit'   => self::RECORDS_PER_VOLUME,
175					'offset'  => self::RECORDS_PER_VOLUME * $volume,
176				))->fetchAll();
177				foreach ($rows as $row) {
178					$records[] = Source::getInstance($row->xref, $tree, $row->gedcom);
179				}
180				break;
181			case 'r':
182				$rows = Database::prepare(
183					"SELECT o_id AS xref, o_gedcom AS gedcom" .
184					" FROM `##other`" .
185					" WHERE o_file = :tree_id AND o_type = 'REPO'" .
186					" ORDER BY o_id" .
187					" LIMIT :limit OFFSET :offset"
188				)->execute(array(
189					'tree_id' => $ged_id,
190					'limit'   => self::RECORDS_PER_VOLUME,
191					'offset'  => self::RECORDS_PER_VOLUME * $volume,
192				))->fetchAll();
193				foreach ($rows as $row) {
194					$records[] = Repository::getInstance($row->xref, $tree, $row->gedcom);
195				}
196				break;
197			case 'n':
198				$rows = Database::prepare(
199					"SELECT o_id AS xref, o_gedcom AS gedcom" .
200					" FROM `##other`" .
201					" WHERE o_file = :tree_id AND o_type = 'NOTE'" .
202					" ORDER BY o_id" .
203					" LIMIT :limit OFFSET :offset"
204				)->execute(array(
205					'tree_id' => $ged_id,
206					'limit'   => self::RECORDS_PER_VOLUME,
207					'offset'  => self::RECORDS_PER_VOLUME * $volume,
208				))->fetchAll();
209				foreach ($rows as $row) {
210					$records[] = Note::getInstance($row->xref, $tree, $row->gedcom);
211				}
212				break;
213			case 'm':
214				$rows = Database::prepare(
215					"SELECT m_id AS xref, m_gedcom AS gedcom" .
216					" FROM `##media`" .
217					" WHERE m_file = :tree_id" .
218					" ORDER BY m_id" .
219					" LIMIT :limit OFFSET :offset"
220				)->execute(array(
221					'tree_id' => $ged_id,
222					'limit'   => self::RECORDS_PER_VOLUME,
223					'offset'  => self::RECORDS_PER_VOLUME * $volume,
224				))->fetchAll();
225				foreach ($rows as $row) {
226					$records[] = Media::getInstance($row->xref, $tree, $row->gedcom);
227				}
228				break;
229			}
230			foreach ($records as $record) {
231				if ($record->canShowName()) {
232					$data .= '<url>';
233					$data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>';
234					$chan = $record->getFirstFact('CHAN');
235					if ($chan) {
236						$date = $chan->getDate();
237						if ($date->isOK()) {
238							$data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>';
239						}
240					}
241					$data .= '</url>' . PHP_EOL;
242				}
243			}
244			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL;
245			// Cache this data - but only for visitors, as we don’t want
246			// visitors to see data created by logged-in users.
247			if (!Auth::check()) {
248				$this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data);
249				$this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP);
250			}
251		}
252		header('Content-Type: application/xml');
253		header('Content-Length: ' . strlen($data));
254		echo $data;
255	}
256
257	/**
258	 * Edit the configuration
259	 */
260	private function admin() {
261		$controller = new PageController;
262		$controller
263			->restrictAccess(Auth::isAdmin())
264			->setPageTitle($this->getTitle())
265			->pageHeader();
266
267		// Save the updated preferences
268		if (Filter::post('action') == 'save') {
269			foreach (Tree::getAll() as $tree) {
270				$tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId()));
271			}
272			// Clear cache and force files to be regenerated
273			Database::prepare(
274				"DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'"
275			)->execute();
276		}
277
278		$include_any = false;
279
280		?>
281		<ol class="breadcrumb small">
282			<li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li>
283			<li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li>
284			<li class="active"><?php echo $controller->getPageTitle(); ?></li>
285		</ol>
286		<h2><?php echo $controller->getPageTitle(); ?></h2>
287		<?php
288
289		echo
290		'<p>',
291			/* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */
292			I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling.  All major search engines support sitemaps.  For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') .
293			'</p>',
294		'<p>', I18N::translate('Which family trees should be included in the sitemaps?'), '</p>',
295			'<form method="post" action="module.php?mod=' . $this->getName() . '&amp;mod_action=admin">',
296		'<input type="hidden" name="action" value="save">';
297		foreach (Tree::getAll() as $tree) {
298			echo '<p><input type="checkbox" name="include', $tree->getTreeId(), '" ';
299			if ($tree->getPreference('include_in_sitemap')) {
300				echo 'checked';
301				$include_any = true;
302			}
303			echo '>', $tree->getTitleHtml(), '</p>';
304		}
305		echo
306		'<input type="submit" value="', I18N::translate('save'), '">',
307		'</form>',
308		'<hr>';
309
310		if ($include_any) {
311			$site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap.xml';
312			$site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml');
313			echo
314				'<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>',
315				'<pre>Sitemap: ', $site_map_url1, '</pre>',
316				'<hr>',
317				'<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>',
318				'<ul>',
319				// This list comes from http://en.wikipedia.org/wiki/Sitemaps
320				'<li><a target="_blank" href="http://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>',
321				'<li><a target="_blank" href="http://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>',
322				'</ul>';
323
324		}
325	}
326
327	/** {@inheritdoc} */
328	public function getConfigLink() {
329		return 'module.php?mod=' . $this->getName() . '&amp;mod_action=admin';
330	}
331}
332