xref: /webtrees/app/Module/SiteMapModule.php (revision 31bc7874190e33336a5ff742eac1246d7473e530)
1<?php
2namespace Fisharebest\Webtrees;
3
4/**
5 * webtrees: online genealogy
6 * Copyright (C) 2015 webtrees development team
7 * This program is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19/**
20 * Class SiteMapModule
21 */
22class SiteMapModule extends AbstractModule implements ModuleConfigInterface {
23	const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits.
24	const CACHE_LIFE = 1209600; // Two weeks
25
26	/** {@inheritdoc} */
27	public function getTitle() {
28		return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps');
29	}
30
31	/** {@inheritdoc} */
32	public function getDescription() {
33		return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.');
34	}
35
36	/** {@inheritdoc} */
37	public function modAction($mod_action) {
38		switch ($mod_action) {
39		case 'admin':
40			$this->admin();
41			break;
42		case 'generate':
43			$this->generate(Filter::get('file'));
44			break;
45		default:
46			http_response_code(404);
47		}
48	}
49
50	/**
51	 * @param string $file
52	 */
53	private function generate($file) {
54		if ($file == 'sitemap.xml') {
55			$this->generateIndex();
56		} elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) {
57			$this->generateFile($match[1], $match[2], $match[3]);
58		} else {
59			http_response_code(404);
60		}
61	}
62
63	/**
64	 * The index file contains references to all the other files.
65	 * These files are the same for visitors/users/admins.
66	 */
67	private function generateIndex() {
68		// Check the cache
69		$timestamp = $this->getSetting('sitemap.timestamp');
70		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) {
71			$data = $this->getSetting('sitemap.xml');
72		} else {
73			$data = '';
74			$lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>';
75			foreach (Tree::getAll() as $tree) {
76				if ($tree->getPreference('include_in_sitemap')) {
77					$n = Database::prepare(
78						"SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id"
79					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
80					for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
81						$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
82					}
83					$n = Database::prepare(
84						"SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id"
85					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
86					if ($n) {
87						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
88							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
89						}
90					}
91					$n = Database::prepare(
92						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'"
93					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
94					if ($n) {
95						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
96							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
97						}
98					}
99					$n = Database::prepare(
100						"SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'"
101					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
102					if ($n) {
103						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
104							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
105						}
106					}
107					$n = Database::prepare(
108						"SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id"
109					)->execute(array('tree_id' => $tree->getTreeId()))->fetchOne();
110					if ($n) {
111						for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) {
112							$data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL;
113						}
114					}
115				}
116			}
117			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL;
118			// Cache this data.
119			$this->setSetting('sitemap.xml', $data);
120			$this->setSetting('sitemap.timestamp', WT_TIMESTAMP);
121		}
122		header('Content-Type: application/xml');
123		header('Content-Length: ' . strlen($data));
124		echo $data;
125	}
126
127	/**
128	 * A separate file for each family tree and each record type.
129	 * These files depend on access levels, so only cache for visitors.
130	 *
131	 * @param integer $ged_id
132	 * @param string $rec_type
133	 * @param string $volume
134	 */
135	private function generateFile($ged_id, $rec_type, $volume) {
136		$tree = Tree::findById($ged_id);
137		// Check the cache
138		$timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp');
139		if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) {
140			$data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml');
141		} else {
142			$data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&amp;ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL;
143			$records = array();
144			switch ($rec_type) {
145			case 'i':
146				$rows = Database::prepare(
147					"SELECT i_id AS xref, i_gedcom AS gedcom" .
148					" FROM `##individuals`" .
149					" WHERE i_file = :tree_id" .
150					" ORDER BY i_id" .
151					" LIMIT :limit OFFSET :offset"
152				)->execute(array(
153					'tree_id' => $ged_id,
154					'limit'   => self::RECORDS_PER_VOLUME,
155					'offset'  => self::RECORDS_PER_VOLUME * $volume,
156				))->fetchAll();
157				foreach ($rows as $row) {
158					$records[] = Individual::getInstance($row->xref, $tree, $row->gedcom);
159				}
160				break;
161			case 's':
162				$rows = Database::prepare(
163					"SELECT s_id AS xref, s_gedcom AS gedcom" .
164					" FROM `##sources`" .
165					" WHERE s_file = :tree_id" .
166					" ORDER BY s_id" .
167					" LIMIT :limit OFFSET :offset"
168				)->execute(array(
169					'tree_id' => $ged_id,
170					'limit'   => self::RECORDS_PER_VOLUME,
171					'offset'  => self::RECORDS_PER_VOLUME * $volume,
172				))->fetchAll();
173				foreach ($rows as $row) {
174					$records[] = Source::getInstance($row->xref, $tree, $row->gedcom);
175				}
176				break;
177			case 'r':
178				$rows = Database::prepare(
179					"SELECT o_id AS xref, o_gedcom AS gedcom" .
180					" FROM `##other`" .
181					" WHERE o_file = :tree_id AND o_type = 'REPO'" .
182					" ORDER BY o_id" .
183					" LIMIT :limit OFFSET :offset"
184				)->execute(array(
185					'tree_id' => $ged_id,
186					'limit'   => self::RECORDS_PER_VOLUME,
187					'offset'  => self::RECORDS_PER_VOLUME * $volume,
188				))->fetchAll();
189				foreach ($rows as $row) {
190					$records[] = Repository::getInstance($row->xref, $tree, $row->gedcom);
191				}
192				break;
193			case 'n':
194				$rows = Database::prepare(
195					"SELECT o_id AS xref, o_gedcom AS gedcom" .
196					" FROM `##other`" .
197					" WHERE o_file = :tree_id AND o_type = 'NOTE'" .
198					" ORDER BY o_id" .
199					" LIMIT :limit OFFSET :offset"
200				)->execute(array(
201					'tree_id' => $ged_id,
202					'limit'   => self::RECORDS_PER_VOLUME,
203					'offset'  => self::RECORDS_PER_VOLUME * $volume,
204				))->fetchAll();
205				foreach ($rows as $row) {
206					$records[] = Note::getInstance($row->xref, $tree, $row->gedcom);
207				}
208				break;
209			case 'm':
210				$rows = Database::prepare(
211					"SELECT m_id AS xref, m_gedcom AS gedcom" .
212					" FROM `##media`" .
213					" WHERE m_file = :tree_id" .
214					" ORDER BY m_id" .
215					" LIMIT :limit OFFSET :offset"
216				)->execute(array(
217					'tree_id' => $ged_id,
218					'limit'   => self::RECORDS_PER_VOLUME,
219					'offset'  => self::RECORDS_PER_VOLUME * $volume,
220				))->fetchAll();
221				foreach ($rows as $row) {
222					$records[] = Media::getInstance($row->xref, $tree, $row->gedcom);
223				}
224				break;
225			}
226			foreach ($records as $record) {
227				if ($record->canShowName()) {
228					$data .= '<url>';
229					$data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>';
230					$chan = $record->getFirstFact('CHAN');
231					if ($chan) {
232						$date = $chan->getDate();
233						if ($date->isOK()) {
234							$data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>';
235						}
236					}
237					$data .= '</url>' . PHP_EOL;
238				}
239			}
240			$data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL;
241			// Cache this data - but only for visitors, as we don’t want
242			// visitors to see data created by logged-in users.
243			if (!Auth::check()) {
244				$this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data);
245				$this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP);
246			}
247		}
248		header('Content-Type: application/xml');
249		header('Content-Length: ' . strlen($data));
250		echo $data;
251	}
252
253	/**
254	 * Edit the configuration
255	 */
256	private function admin() {
257		$controller = new PageController;
258		$controller
259			->restrictAccess(Auth::isAdmin())
260			->setPageTitle($this->getTitle())
261			->pageHeader();
262
263		// Save the updated preferences
264		if (Filter::post('action') == 'save') {
265			foreach (Tree::getAll() as $tree) {
266				$tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId()));
267			}
268			// Clear cache and force files to be regenerated
269			Database::prepare(
270				"DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'"
271			)->execute();
272		}
273
274		$include_any = false;
275
276		?>
277		<ol class="breadcrumb small">
278			<li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li>
279			<li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li>
280			<li class="active"><?php echo $controller->getPageTitle(); ?></li>
281		</ol>
282		<h1><?php echo $controller->getPageTitle(); ?></h1>
283		<?php
284
285		echo
286		'<p>',
287			/* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */
288			I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling.  All major search engines support sitemaps.  For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') .
289			'</p>',
290		'<p>', I18N::translate('Which family trees should be included in the sitemaps?'), '</p>',
291			'<form method="post" action="module.php?mod=' . $this->getName() . '&amp;mod_action=admin">',
292		'<input type="hidden" name="action" value="save">';
293		foreach (Tree::getAll() as $tree) {
294			echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" ';
295			if ($tree->getPreference('include_in_sitemap')) {
296				echo 'checked';
297				$include_any = true;
298			}
299			echo '>', $tree->getTitleHtml(), '</label></div>';
300		}
301		echo
302		'<input type="submit" value="', I18N::translate('save'), '">',
303		'</form>',
304		'<hr>';
305
306		if ($include_any) {
307			$site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&amp;mod_action=generate&amp;file=sitemap.xml';
308			$site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml');
309			echo
310				'<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>',
311				'<pre>Sitemap: ', $site_map_url1, '</pre>',
312				'<hr>',
313				'<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>',
314				'<ul>',
315				// This list comes from http://en.wikipedia.org/wiki/Sitemaps
316				'<li><a target="_blank" href="http://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>',
317				'<li><a target="_blank" href="http://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>',
318				'</ul>';
319
320		}
321	}
322
323	/** {@inheritdoc} */
324	public function getConfigLink() {
325		return 'module.php?mod=' . $this->getName() . '&amp;mod_action=admin';
326	}
327}
328