1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2015 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16namespace Fisharebest\Webtrees\Module; 17 18use Fisharebest\Webtrees\Auth; 19use Fisharebest\Webtrees\Controller\PageController; 20use Fisharebest\Webtrees\Database; 21use Fisharebest\Webtrees\Filter; 22use Fisharebest\Webtrees\I18N; 23use Fisharebest\Webtrees\Individual; 24use Fisharebest\Webtrees\Media; 25use Fisharebest\Webtrees\Note; 26use Fisharebest\Webtrees\Repository; 27use Fisharebest\Webtrees\Source; 28use Fisharebest\Webtrees\Tree; 29 30/** 31 * Class SiteMapModule 32 */ 33class SiteMapModule extends AbstractModule implements ModuleConfigInterface { 34 const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 35 const CACHE_LIFE = 1209600; // Two weeks 36 37 /** {@inheritdoc} */ 38 public function getTitle() { 39 return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps'); 40 } 41 42 /** {@inheritdoc} */ 43 public function getDescription() { 44 return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.'); 45 } 46 47 /** 48 * This is a general purpose hook, allowing modules to respond to routes 49 * of the form module.php?mod=FOO&mod_action=BAR 50 * 51 * @param string $mod_action 52 */ 53 public function modAction($mod_action) { 54 switch ($mod_action) { 55 case 'admin': 56 $this->admin(); 57 break; 58 case 'generate': 59 $this->generate(Filter::get('file')); 60 break; 61 default: 62 http_response_code(404); 63 } 64 } 65 66 /** 67 * Generate an XML file. 68 * 69 * @param string $file 70 */ 71 private function generate($file) { 72 if ($file == 'sitemap.xml') { 73 $this->generateIndex(); 74 } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) { 75 $this->generateFile($match[1], $match[2], $match[3]); 76 } else { 77 http_response_code(404); 78 } 79 } 80 81 /** 82 * The index file contains references to all the other files. 83 * These files are the same for visitors/users/admins. 84 */ 85 private function generateIndex() { 86 // Check the cache 87 $timestamp = $this->getSetting('sitemap.timestamp'); 88 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) { 89 $data = $this->getSetting('sitemap.xml'); 90 } else { 91 $data = ''; 92 $lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>'; 93 foreach (Tree::getAll() as $tree) { 94 if ($tree->getPreference('include_in_sitemap')) { 95 $n = Database::prepare( 96 "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id" 97 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 98 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 99 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 100 } 101 $n = Database::prepare( 102 "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id" 103 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 104 if ($n) { 105 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 106 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 107 } 108 } 109 $n = Database::prepare( 110 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'" 111 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 112 if ($n) { 113 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 114 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 115 } 116 } 117 $n = Database::prepare( 118 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'" 119 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 120 if ($n) { 121 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 122 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 123 } 124 } 125 $n = Database::prepare( 126 "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id" 127 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 128 if ($n) { 129 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 130 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 131 } 132 } 133 } 134 } 135 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL; 136 // Cache this data. 137 $this->setSetting('sitemap.xml', $data); 138 $this->setSetting('sitemap.timestamp', WT_TIMESTAMP); 139 } 140 header('Content-Type: application/xml'); 141 header('Content-Length: ' . strlen($data)); 142 echo $data; 143 } 144 145 /** 146 * A separate file for each family tree and each record type. 147 * These files depend on access levels, so only cache for visitors. 148 * 149 * @param int $ged_id 150 * @param string $rec_type 151 * @param string $volume 152 */ 153 private function generateFile($ged_id, $rec_type, $volume) { 154 $tree = Tree::findById($ged_id); 155 // Check the cache 156 $timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp'); 157 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) { 158 $data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml'); 159 } else { 160 $data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL; 161 $records = array(); 162 switch ($rec_type) { 163 case 'i': 164 $rows = Database::prepare( 165 "SELECT i_id AS xref, i_gedcom AS gedcom" . 166 " FROM `##individuals`" . 167 " WHERE i_file = :tree_id" . 168 " ORDER BY i_id" . 169 " LIMIT :limit OFFSET :offset" 170 )->execute(array( 171 'tree_id' => $ged_id, 172 'limit' => self::RECORDS_PER_VOLUME, 173 'offset' => self::RECORDS_PER_VOLUME * $volume, 174 ))->fetchAll(); 175 foreach ($rows as $row) { 176 $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom); 177 } 178 break; 179 case 's': 180 $rows = Database::prepare( 181 "SELECT s_id AS xref, s_gedcom AS gedcom" . 182 " FROM `##sources`" . 183 " WHERE s_file = :tree_id" . 184 " ORDER BY s_id" . 185 " LIMIT :limit OFFSET :offset" 186 )->execute(array( 187 'tree_id' => $ged_id, 188 'limit' => self::RECORDS_PER_VOLUME, 189 'offset' => self::RECORDS_PER_VOLUME * $volume, 190 ))->fetchAll(); 191 foreach ($rows as $row) { 192 $records[] = Source::getInstance($row->xref, $tree, $row->gedcom); 193 } 194 break; 195 case 'r': 196 $rows = Database::prepare( 197 "SELECT o_id AS xref, o_gedcom AS gedcom" . 198 " FROM `##other`" . 199 " WHERE o_file = :tree_id AND o_type = 'REPO'" . 200 " ORDER BY o_id" . 201 " LIMIT :limit OFFSET :offset" 202 )->execute(array( 203 'tree_id' => $ged_id, 204 'limit' => self::RECORDS_PER_VOLUME, 205 'offset' => self::RECORDS_PER_VOLUME * $volume, 206 ))->fetchAll(); 207 foreach ($rows as $row) { 208 $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom); 209 } 210 break; 211 case 'n': 212 $rows = Database::prepare( 213 "SELECT o_id AS xref, o_gedcom AS gedcom" . 214 " FROM `##other`" . 215 " WHERE o_file = :tree_id AND o_type = 'NOTE'" . 216 " ORDER BY o_id" . 217 " LIMIT :limit OFFSET :offset" 218 )->execute(array( 219 'tree_id' => $ged_id, 220 'limit' => self::RECORDS_PER_VOLUME, 221 'offset' => self::RECORDS_PER_VOLUME * $volume, 222 ))->fetchAll(); 223 foreach ($rows as $row) { 224 $records[] = Note::getInstance($row->xref, $tree, $row->gedcom); 225 } 226 break; 227 case 'm': 228 $rows = Database::prepare( 229 "SELECT m_id AS xref, m_gedcom AS gedcom" . 230 " FROM `##media`" . 231 " WHERE m_file = :tree_id" . 232 " ORDER BY m_id" . 233 " LIMIT :limit OFFSET :offset" 234 )->execute(array( 235 'tree_id' => $ged_id, 236 'limit' => self::RECORDS_PER_VOLUME, 237 'offset' => self::RECORDS_PER_VOLUME * $volume, 238 ))->fetchAll(); 239 foreach ($rows as $row) { 240 $records[] = Media::getInstance($row->xref, $tree, $row->gedcom); 241 } 242 break; 243 } 244 foreach ($records as $record) { 245 if ($record->canShowName()) { 246 $data .= '<url>'; 247 $data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>'; 248 $chan = $record->getFirstFact('CHAN'); 249 if ($chan) { 250 $date = $chan->getDate(); 251 if ($date->isOK()) { 252 $data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>'; 253 } 254 } 255 $data .= '</url>' . PHP_EOL; 256 } 257 } 258 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL; 259 // Cache this data - but only for visitors, as we don’t want 260 // visitors to see data created by logged-in users. 261 if (!Auth::check()) { 262 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data); 263 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP); 264 } 265 } 266 header('Content-Type: application/xml'); 267 header('Content-Length: ' . strlen($data)); 268 echo $data; 269 } 270 271 /** 272 * Edit the configuration 273 */ 274 private function admin() { 275 $controller = new PageController; 276 $controller 277 ->restrictAccess(Auth::isAdmin()) 278 ->setPageTitle($this->getTitle()) 279 ->pageHeader(); 280 281 // Save the updated preferences 282 if (Filter::post('action') == 'save') { 283 foreach (Tree::getAll() as $tree) { 284 $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId())); 285 } 286 // Clear cache and force files to be regenerated 287 Database::prepare( 288 "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'" 289 )->execute(); 290 } 291 292 $include_any = false; 293 294 ?> 295 <ol class="breadcrumb small"> 296 <li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li> 297 <li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li> 298 <li class="active"><?php echo $controller->getPageTitle(); ?></li> 299 </ol> 300 <h1><?php echo $controller->getPageTitle(); ?></h1> 301 <?php 302 303 echo 304 '<p>', 305 /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */ 306 I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') . 307 '</p>', 308 '<p>', I18N::translate('Which family trees should be included in the sitemaps?'), '</p>', 309 '<form method="post" action="module.php?mod=' . $this->getName() . '&mod_action=admin">', 310 '<input type="hidden" name="action" value="save">'; 311 foreach (Tree::getAll() as $tree) { 312 echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" '; 313 if ($tree->getPreference('include_in_sitemap')) { 314 echo 'checked'; 315 $include_any = true; 316 } 317 echo '>', $tree->getTitleHtml(), '</label></div>'; 318 } 319 echo 320 '<input type="submit" value="', I18N::translate('save'), '">', 321 '</form>', 322 '<hr>'; 323 324 if ($include_any) { 325 $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'; 326 $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'); 327 echo 328 '<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>', 329 '<pre>Sitemap: ', $site_map_url1, '</pre>', 330 '<hr>', 331 '<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>', 332 '<ul>', 333 // This list comes from http://en.wikipedia.org/wiki/Sitemaps 334 '<li><a target="_blank" href="http://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>', 335 '<li><a target="_blank" href="http://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>', 336 '</ul>'; 337 338 } 339 } 340 341 /** {@inheritdoc} */ 342 public function getConfigLink() { 343 return 'module.php?mod=' . $this->getName() . '&mod_action=admin'; 344 } 345} 346