1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2017 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16namespace Fisharebest\Webtrees\Module; 17 18use Fisharebest\Webtrees\Auth; 19use Fisharebest\Webtrees\Bootstrap4; 20use Fisharebest\Webtrees\Controller\PageController; 21use Fisharebest\Webtrees\Database; 22use Fisharebest\Webtrees\Filter; 23use Fisharebest\Webtrees\Html; 24use Fisharebest\Webtrees\I18N; 25use Fisharebest\Webtrees\Individual; 26use Fisharebest\Webtrees\Media; 27use Fisharebest\Webtrees\Note; 28use Fisharebest\Webtrees\Repository; 29use Fisharebest\Webtrees\Source; 30use Fisharebest\Webtrees\Tree; 31 32/** 33 * Class SiteMapModule 34 */ 35class SiteMapModule extends AbstractModule implements ModuleConfigInterface { 36 const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 37 const CACHE_LIFE = 1209600; // Two weeks 38 39 /** {@inheritdoc} */ 40 public function getTitle() { 41 return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps'); 42 } 43 44 /** {@inheritdoc} */ 45 public function getDescription() { 46 return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.'); 47 } 48 49 /** 50 * This is a general purpose hook, allowing modules to respond to routes 51 * of the form module.php?mod=FOO&mod_action=BAR 52 * 53 * @param string $mod_action 54 */ 55 public function modAction($mod_action) { 56 switch ($mod_action) { 57 case 'admin': 58 $this->admin(); 59 break; 60 case 'generate': 61 $this->generate(Filter::get('file')); 62 break; 63 default: 64 http_response_code(404); 65 } 66 } 67 68 /** 69 * Generate an XML file. 70 * 71 * @param string $file 72 */ 73 private function generate($file) { 74 if ($file == 'sitemap.xml') { 75 $this->generateIndex(); 76 } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) { 77 $this->generateFile($match[1], $match[2], $match[3]); 78 } else { 79 http_response_code(404); 80 } 81 } 82 83 /** 84 * The index file contains references to all the other files. 85 * These files are the same for visitors/users/admins. 86 */ 87 private function generateIndex() { 88 // Check the cache 89 $timestamp = (int) $this->getPreference('sitemap.timestamp'); 90 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) { 91 $data = $this->getPreference('sitemap.xml'); 92 } else { 93 $data = ''; 94 $lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>'; 95 foreach (Tree::getAll() as $tree) { 96 if ($tree->getPreference('include_in_sitemap')) { 97 $n = Database::prepare( 98 "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id" 99 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 100 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 101 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 102 } 103 $n = Database::prepare( 104 "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id" 105 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 106 if ($n) { 107 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 108 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 109 } 110 } 111 $n = Database::prepare( 112 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'" 113 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 114 if ($n) { 115 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 116 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 117 } 118 } 119 $n = Database::prepare( 120 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'" 121 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 122 if ($n) { 123 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 124 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 125 } 126 } 127 $n = Database::prepare( 128 "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id" 129 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 130 if ($n) { 131 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 132 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 133 } 134 } 135 } 136 } 137 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL; 138 // Cache this data. 139 $this->setPreference('sitemap.xml', $data); 140 $this->setPreference('sitemap.timestamp', WT_TIMESTAMP); 141 } 142 header('Content-Type: application/xml'); 143 header('Content-Length: ' . strlen($data)); 144 echo $data; 145 } 146 147 /** 148 * A separate file for each family tree and each record type. 149 * These files depend on access levels, so only cache for visitors. 150 * 151 * @param int $ged_id 152 * @param string $rec_type 153 * @param string $volume 154 */ 155 private function generateFile($ged_id, $rec_type, $volume) { 156 $tree = Tree::findById($ged_id); 157 // Check the cache 158 $timestamp = (int) $this->getPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp'); 159 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) { 160 $data = $this->getPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml'); 161 } else { 162 $data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL; 163 $records = []; 164 switch ($rec_type) { 165 case 'i': 166 $rows = Database::prepare( 167 "SELECT i_id AS xref, i_gedcom AS gedcom" . 168 " FROM `##individuals`" . 169 " WHERE i_file = :tree_id" . 170 " ORDER BY i_id" . 171 " LIMIT :limit OFFSET :offset" 172 )->execute([ 173 'tree_id' => $ged_id, 174 'limit' => self::RECORDS_PER_VOLUME, 175 'offset' => self::RECORDS_PER_VOLUME * $volume, 176 ])->fetchAll(); 177 foreach ($rows as $row) { 178 $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom); 179 } 180 break; 181 case 's': 182 $rows = Database::prepare( 183 "SELECT s_id AS xref, s_gedcom AS gedcom" . 184 " FROM `##sources`" . 185 " WHERE s_file = :tree_id" . 186 " ORDER BY s_id" . 187 " LIMIT :limit OFFSET :offset" 188 )->execute([ 189 'tree_id' => $ged_id, 190 'limit' => self::RECORDS_PER_VOLUME, 191 'offset' => self::RECORDS_PER_VOLUME * $volume, 192 ])->fetchAll(); 193 foreach ($rows as $row) { 194 $records[] = Source::getInstance($row->xref, $tree, $row->gedcom); 195 } 196 break; 197 case 'r': 198 $rows = Database::prepare( 199 "SELECT o_id AS xref, o_gedcom AS gedcom" . 200 " FROM `##other`" . 201 " WHERE o_file = :tree_id AND o_type = 'REPO'" . 202 " ORDER BY o_id" . 203 " LIMIT :limit OFFSET :offset" 204 )->execute([ 205 'tree_id' => $ged_id, 206 'limit' => self::RECORDS_PER_VOLUME, 207 'offset' => self::RECORDS_PER_VOLUME * $volume, 208 ])->fetchAll(); 209 foreach ($rows as $row) { 210 $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom); 211 } 212 break; 213 case 'n': 214 $rows = Database::prepare( 215 "SELECT o_id AS xref, o_gedcom AS gedcom" . 216 " FROM `##other`" . 217 " WHERE o_file = :tree_id AND o_type = 'NOTE'" . 218 " ORDER BY o_id" . 219 " LIMIT :limit OFFSET :offset" 220 )->execute([ 221 'tree_id' => $ged_id, 222 'limit' => self::RECORDS_PER_VOLUME, 223 'offset' => self::RECORDS_PER_VOLUME * $volume, 224 ])->fetchAll(); 225 foreach ($rows as $row) { 226 $records[] = Note::getInstance($row->xref, $tree, $row->gedcom); 227 } 228 break; 229 case 'm': 230 $rows = Database::prepare( 231 "SELECT m_id AS xref, m_gedcom AS gedcom" . 232 " FROM `##media`" . 233 " WHERE m_file = :tree_id" . 234 " ORDER BY m_id" . 235 " LIMIT :limit OFFSET :offset" 236 )->execute([ 237 'tree_id' => $ged_id, 238 'limit' => self::RECORDS_PER_VOLUME, 239 'offset' => self::RECORDS_PER_VOLUME * $volume, 240 ])->fetchAll(); 241 foreach ($rows as $row) { 242 $records[] = Media::getInstance($row->xref, $tree, $row->gedcom); 243 } 244 break; 245 } 246 foreach ($records as $record) { 247 if ($record->canShowName()) { 248 $data .= '<url>'; 249 $data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>'; 250 $chan = $record->getFirstFact('CHAN'); 251 if ($chan) { 252 $date = $chan->getDate(); 253 if ($date->isOK()) { 254 $data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>'; 255 } 256 } 257 $data .= '</url>' . PHP_EOL; 258 } 259 } 260 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL; 261 // Cache this data - but only for visitors, as we don’t want 262 // visitors to see data created by signed-in users. 263 if (!Auth::check()) { 264 $this->setPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data); 265 $this->setPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP); 266 } 267 } 268 header('Content-Type: application/xml'); 269 header('Content-Length: ' . strlen($data)); 270 echo $data; 271 } 272 273 /** 274 * Edit the configuration 275 */ 276 private function admin() { 277 $controller = new PageController; 278 $controller 279 ->restrictAccess(Auth::isAdmin()) 280 ->setPageTitle($this->getTitle()) 281 ->pageHeader(); 282 283 // Save the updated preferences 284 if (Filter::post('action') == 'save') { 285 foreach (Tree::getAll() as $tree) { 286 $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId())); 287 } 288 // Clear cache and force files to be regenerated 289 Database::prepare( 290 "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'" 291 )->execute(); 292 } 293 294 $include_any = false; 295 296 echo Bootstrap4::breadcrumbs([ 297 route('admin-control-panel') => I18N::translate('Control panel'), 298 route('admin-modules') => I18N::translate('Module administration'), 299 ], $controller->getPageTitle()); 300 ?> 301 302 <h1><?= $controller->getPageTitle() ?></h1> 303 <?php 304 305 echo 306 '<p>', 307 /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */ 308 I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') . 309 '</p>', 310 '<p>', /* I18N: Label for a configuration option */ I18N::translate('Which family trees should be included in the sitemaps'), '</p>', 311 '<form method="post" action="module.php?mod=' . $this->getName() . '&mod_action=admin">', 312 '<input type="hidden" name="action" value="save">'; 313 foreach (Tree::getAll() as $tree) { 314 echo '<div class="form-check"><label><input type="checkbox" name="include', $tree->getTreeId(), '" '; 315 if ($tree->getPreference('include_in_sitemap')) { 316 echo 'checked'; 317 $include_any = true; 318 } 319 echo '>', $tree->getTitleHtml(), '</label></div>'; 320 } 321 echo 322 '<input type="submit" value="', I18N::translate('save'), '">', 323 '</form>', 324 '<hr>'; 325 326 if ($include_any) { 327 $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'; 328 $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'); 329 echo 330 '<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>', 331 '<pre>Sitemap: ', $site_map_url1, '</pre>', 332 '<hr>', 333 '<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>', 334 '<ul>', 335 // This list comes from http://en.wikipedia.org/wiki/Sitemaps 336 '<li><a href="https://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>', 337 '<li><a href="https://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>', 338 '</ul>'; 339 } 340 } 341 342 /** {@inheritdoc} */ 343 public function getConfigLink() { 344 return Html::url('module.php', [ 345 'mod' => $this->getName(), 346 'mod_action' => 'admin', 347 ]); 348 } 349} 350