xref: /webtrees/app/Module/SiteMapModule.php (revision 4b9ff166b3342695f2a94855b7a33368e6d55c35)
1<?php
2namespace Fisharebest\Webtrees;
3
4/**
5 * webtrees: online genealogy
6 * Copyright (C) 2015 webtrees development team
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19use Zend_Session;
20
21/**
22 * Class SiteMapModule
23 */
24class SiteMapModule extends Module implements ModuleConfigInterface {
25	const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
26	const CACHE_LIFE = 1209600; // Two weeks
27
28	/** {@inheritdoc} */
29	public function getTitle() {
30		return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps');
31	}
32
33	/** {@inheritdoc} */
34	public function getDescription() {
35		return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.');
36	}
37
38	/** {@inheritdoc} */
39	public function modAction($mod_action) {
40		switch ($mod_action) {
41		case 'admin':
42			$this->admin();
43			break;
44		case 'generate':
45			Zend_Session::writeClose();
46			$this->generate(Filter::get('file'));
47			break;
48		default:
49			http_response_code(404);
50		}
51	}
52
53	/**
54	 * @param string $file
55	 */
56	private function generate($file) {
57		if ($file == 'sitemap.xml') {
58			$this->generateIndex();
59		} elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) {
60			$this->generateFile($match[1], $match[2], $match[3]);
61		} else {
62			http_response_code(404);
63		}
64	}
65
66	/**
67	 * The index file contains references to all the other files.
68	 * These files are the same for visitors/users/admins.
69	 */
70	private function generateIndex() {
71		// Check the cache
72		$timestamp = $this->getSetting('sitemap.timestamp');
73		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) {
74			$data = $this->getSetting('sitemap.xml');
75		} else {
76			$data = '';
77			$lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>';
78			foreach (Tree::getAll() as $tree) {
79				if ($tree->getPreference('include_in_sitemap')) {
80					$n = Database::prepare(
81						"SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id"
82					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
83					for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
84						$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
85					}
86					$n = Database::prepare(
87						"SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id"
88					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
89					if ($n) {
90						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
91							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
92						}
93					}
94					$n = Database::prepare(
95						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'"
96					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
97					if ($n) {
98						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
99							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
100						}
101					}
102					$n = Database::prepare(
103						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'"
104					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
105					if ($n) {
106						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
107							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
108						}
109					}
110					$n = Database::prepare(
111						"SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id"
112					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
113					if ($n) {
114						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
115							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
116						}
117					}
118				}
119			}
120			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL;
121			// Cache this data.
122			$this->setSetting('sitemap.xml', $data);
123			$this->setSetting('sitemap.timestamp', WT_TIMESTAMP);
124		}
125		header('Content-Type: application/xml');
126		header('Content-Length: ' . strlen($data));
127		echo $data;
128	}
129
130	/**
131	 * A separate file for each family tree and each record type.
132	 * These files depend on access levels, so only cache for visitors.
133	 *
134	 * @param integer $ged_id
135	 * @param string $rec_type
136	 * @param string $volume
137	 */
138	private function generateFile($ged_id, $rec_type, $volume) {
139		// Check the cache
140		$timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp');
141		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) {
142			$data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml');
143		} else {
144			$tree = Tree::findById($ged_id);
145			$data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&amp;ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL;
146			$records = array();
147			switch ($rec_type) {
148			case 'i':
149				$rows = Database::prepare(
150					"SELECT i_id AS xref, i_file AS gedcom_id, i_gedcom AS gedcom" .
151					" FROM `##individuals`" .
152					" WHERE i_file = :tree_id" .
153					" ORDER BY i_id" .
154					" LIMIT :limit OFFSET :offset"
155				)->execute(array(
156					'tree_id' => $ged_id,
157					'limit'   => self::RECORDS_PER_VOLUME,
158					'offset'  => self::RECORDS_PER_VOLUME * $volume,
159				))->fetchAll();
160				foreach ($rows as $row) {
161					$records[] = Individual::getInstance($row->xref, $row->gedcom_id, $row->gedcom);
162				}
163				break;
164			case 's':
165				$rows = Database::prepare(
166					"SELECT s_id AS xref, s_file AS gedcom_id, s_gedcom AS gedcom" .
167					" FROM `##sources`" .
168					" WHERE s_file = :tree_id" .
169					" ORDER BY s_id" .
170					" LIMIT :limit OFFSET :offset"
171				)->execute(array(
172					'tree_id' => $ged_id,
173					'limit'   => self::RECORDS_PER_VOLUME,
174					'offset'  => self::RECORDS_PER_VOLUME * $volume,
175				))->fetchAll();
176				foreach ($rows as $row) {
177					$records[] = Source::getInstance($row->xref, $row->gedcom_id, $row->gedcom);
178				}
179				break;
180			case 'r':
181				$rows = Database::prepare(
182					"SELECT o_id AS xref, o_file AS gedcom_id, o_gedcom AS gedcom" .
183					" FROM `##other`" .
184					" WHERE o_file = :tree_id AND o_type = 'REPO'" .
185					" ORDER BY o_id" .
186					" LIMIT :limit OFFSET :offset"
187				)->execute(array(
188					'tree_id' => $ged_id,
189					'limit'   => self::RECORDS_PER_VOLUME,
190					'offset'  => self::RECORDS_PER_VOLUME * $volume,
191				))->fetchAll();
192				foreach ($rows as $row) {
193					$records[] = Repository::getInstance($row->xref, $row->gedcom_id, $row->gedcom);
194				}
195				break;
196			case 'n':
197				$rows = Database::prepare(
198					"SELECT o_id AS xref, o_file AS gedcom_id, o_gedcom AS gedcom" .
199					" FROM `##other`" .
200					" WHERE o_file = :tree_id AND o_type = 'NOTE'" .
201					" ORDER BY o_id" .
202					" LIMIT :limit OFFSET :offset"
203				)->execute(array(
204					'tree_id' => $ged_id,
205					'limit'   => self::RECORDS_PER_VOLUME,
206					'offset'  => self::RECORDS_PER_VOLUME * $volume,
207				))->fetchAll();
208				foreach ($rows as $row) {
209					$records[] = Note::getInstance($row->xref, $row->gedcom_id, $row->gedcom);
210				}
211				break;
212			case 'm':
213				$rows = Database::prepare(
214					"SELECT m_id AS xref, m_file AS gedcom_id, m_gedcom AS gedcom" .
215					" FROM `##media`" .
216					" WHERE m_file = :tree_id" .
217					" ORDER BY m_id" .
218					" LIMIT :limit OFFSET :offset"
219				)->execute(array(
220					'tree_id' => $ged_id,
221					'limit'   => self::RECORDS_PER_VOLUME,
222					'offset'  => self::RECORDS_PER_VOLUME * $volume,
223				))->fetchAll();
224				foreach ($rows as $row) {
225					$records[] = Media::getInstance($row->xref, $row->gedcom_id, $row->gedcom);
226				}
227				break;
228			}
229			foreach ($records as $record) {
230				if ($record->canShowName()) {
231					$data .= '<url>';
232					$data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>';
233					$chan = $record->getFirstFact('CHAN');
234					if ($chan) {
235						$date = $chan->getDate();
236						if ($date->isOK()) {
237							$data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>';
238						}
239					}
240					$data .= '</url>' . PHP_EOL;
241				}
242			}
243			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL;
244			// Cache this data - but only for visitors, as we don’t want
245			// visitors to see data created by logged-in users.
246			if (!Auth::check()) {
247				$this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data);
248				$this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP);
249			}
250		}
251		header('Content-Type: application/xml');
252		header('Content-Length: ' . strlen($data));
253		echo $data;
254	}
255
256	/**
257	 * Edit the configuration
258	 */
259	private function admin() {
260		$controller = new PageController;
261		$controller
262			->restrictAccess(Auth::isAdmin())
263			->setPageTitle($this->getTitle())
264			->pageHeader();
265
266		// Save the updated preferences
267		if (Filter::post('action') == 'save') {
268			foreach (Tree::getAll() as $tree) {
269				$tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId()));
270			}
271			// Clear cache and force files to be regenerated
272			Database::prepare(
273				"DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'"
274			)->execute();
275		}
276
277		$include_any = false;
278
279		?>
280		<ol class="breadcrumb small">
281			<li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li>
282			<li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li>
283			<li class="active"><?php echo $controller->getPageTitle(); ?></li>
284		</ol>
285		<h2><?php echo $controller->getPageTitle(); ?></h2>
286		<?php
287
288		echo
289		'<p>',
290			/* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */
291			I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling.  All major search engines support sitemaps.  For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') .
292			'</p>',
293		'<p>', I18N::translate('Which family trees should be included in the sitemaps?'), '</p>',
294			'<form method="post" action="module.php?mod=' . $this->getName() . '&amp;mod_action=admin">',
295		'<input type="hidden" name="action" value="save">';
296		foreach (Tree::getAll() as $tree) {
297			echo '<p><input type="checkbox" name="include', $tree->getTreeId(), '" ';
298			if ($tree->getPreference('include_in_sitemap')) {
299				echo 'checked';
300				$include_any = true;
301			}
302			echo '>', $tree->getTitleHtml(), '</p>';
303		}
304		echo
305		'<input type="submit" value="', I18N::translate('save'), '">',
306		'</form>',
307		'<hr>';
308
309		if ($include_any) {
310			$site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap.xml';
311			$site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml');
312			echo
313				'<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>',
314				'<pre>Sitemap: ', $site_map_url1, '</pre>',
315				'<hr>',
316				'<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>',
317				'<ul>',
318				// This list comes from http://en.wikipedia.org/wiki/Sitemaps
319				'<li><a target="_blank" href="http://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>',
320				'<li><a target="_blank" href="http://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>',
321				'</ul>';
322
323		}
324	}
325
326	/** {@inheritdoc} */
327	public function getConfigLink() {
328		return 'module.php?mod=' . $this->getName() . '&amp;mod_action=admin';
329	}
330}
331