1<?php 2namespace Fisharebest\Webtrees; 3 4/** 5 * webtrees: online genealogy 6 * Copyright (C) 2015 webtrees development team 7 * This program is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19use Zend_Session; 20 21/** 22 * Class SiteMapModule 23 */ 24class SiteMapModule extends AbstractModule implements ModuleConfigInterface { 25 const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 26 const CACHE_LIFE = 1209600; // Two weeks 27 28 /** {@inheritdoc} */ 29 public function getTitle() { 30 return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps'); 31 } 32 33 /** {@inheritdoc} */ 34 public function getDescription() { 35 return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.'); 36 } 37 38 /** {@inheritdoc} */ 39 public function modAction($mod_action) { 40 switch ($mod_action) { 41 case 'admin': 42 $this->admin(); 43 break; 44 case 'generate': 45 Zend_Session::writeClose(); 46 $this->generate(Filter::get('file')); 47 break; 48 default: 49 http_response_code(404); 50 } 51 } 52 53 /** 54 * @param string $file 55 */ 56 private function generate($file) { 57 if ($file == 'sitemap.xml') { 58 $this->generateIndex(); 59 } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) { 60 $this->generateFile($match[1], $match[2], $match[3]); 61 } else { 62 http_response_code(404); 63 } 64 } 65 66 /** 67 * The index file contains references to all the other files. 68 * These files are the same for visitors/users/admins. 69 */ 70 private function generateIndex() { 71 // Check the cache 72 $timestamp = $this->getSetting('sitemap.timestamp'); 73 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) { 74 $data = $this->getSetting('sitemap.xml'); 75 } else { 76 $data = ''; 77 $lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>'; 78 foreach (Tree::getAll() as $tree) { 79 if ($tree->getPreference('include_in_sitemap')) { 80 $n = Database::prepare( 81 "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id" 82 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 83 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 84 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 85 } 86 $n = Database::prepare( 87 "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id" 88 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 89 if ($n) { 90 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 91 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 92 } 93 } 94 $n = Database::prepare( 95 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'" 96 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 97 if ($n) { 98 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 99 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 100 } 101 } 102 $n = Database::prepare( 103 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'" 104 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 105 if ($n) { 106 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 107 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 108 } 109 } 110 $n = Database::prepare( 111 "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id" 112 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 113 if ($n) { 114 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 115 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 116 } 117 } 118 } 119 } 120 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL; 121 // Cache this data. 122 $this->setSetting('sitemap.xml', $data); 123 $this->setSetting('sitemap.timestamp', WT_TIMESTAMP); 124 } 125 header('Content-Type: application/xml'); 126 header('Content-Length: ' . strlen($data)); 127 echo $data; 128 } 129 130 /** 131 * A separate file for each family tree and each record type. 132 * These files depend on access levels, so only cache for visitors. 133 * 134 * @param integer $ged_id 135 * @param string $rec_type 136 * @param string $volume 137 */ 138 private function generateFile($ged_id, $rec_type, $volume) { 139 $tree = Tree::findById($ged_id); 140 // Check the cache 141 $timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp'); 142 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) { 143 $data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml'); 144 } else { 145 $data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL; 146 $records = array(); 147 switch ($rec_type) { 148 case 'i': 149 $rows = Database::prepare( 150 "SELECT i_id AS xref, i_gedcom AS gedcom" . 151 " FROM `##individuals`" . 152 " WHERE i_file = :tree_id" . 153 " ORDER BY i_id" . 154 " LIMIT :limit OFFSET :offset" 155 )->execute(array( 156 'tree_id' => $ged_id, 157 'limit' => self::RECORDS_PER_VOLUME, 158 'offset' => self::RECORDS_PER_VOLUME * $volume, 159 ))->fetchAll(); 160 foreach ($rows as $row) { 161 $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom); 162 } 163 break; 164 case 's': 165 $rows = Database::prepare( 166 "SELECT s_id AS xref, s_gedcom AS gedcom" . 167 " FROM `##sources`" . 168 " WHERE s_file = :tree_id" . 169 " ORDER BY s_id" . 170 " LIMIT :limit OFFSET :offset" 171 )->execute(array( 172 'tree_id' => $ged_id, 173 'limit' => self::RECORDS_PER_VOLUME, 174 'offset' => self::RECORDS_PER_VOLUME * $volume, 175 ))->fetchAll(); 176 foreach ($rows as $row) { 177 $records[] = Source::getInstance($row->xref, $tree, $row->gedcom); 178 } 179 break; 180 case 'r': 181 $rows = Database::prepare( 182 "SELECT o_id AS xref, o_gedcom AS gedcom" . 183 " FROM `##other`" . 184 " WHERE o_file = :tree_id AND o_type = 'REPO'" . 185 " ORDER BY o_id" . 186 " LIMIT :limit OFFSET :offset" 187 )->execute(array( 188 'tree_id' => $ged_id, 189 'limit' => self::RECORDS_PER_VOLUME, 190 'offset' => self::RECORDS_PER_VOLUME * $volume, 191 ))->fetchAll(); 192 foreach ($rows as $row) { 193 $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom); 194 } 195 break; 196 case 'n': 197 $rows = Database::prepare( 198 "SELECT o_id AS xref, o_gedcom AS gedcom" . 199 " FROM `##other`" . 200 " WHERE o_file = :tree_id AND o_type = 'NOTE'" . 201 " ORDER BY o_id" . 202 " LIMIT :limit OFFSET :offset" 203 )->execute(array( 204 'tree_id' => $ged_id, 205 'limit' => self::RECORDS_PER_VOLUME, 206 'offset' => self::RECORDS_PER_VOLUME * $volume, 207 ))->fetchAll(); 208 foreach ($rows as $row) { 209 $records[] = Note::getInstance($row->xref, $tree, $row->gedcom); 210 } 211 break; 212 case 'm': 213 $rows = Database::prepare( 214 "SELECT m_id AS xref, m_gedcom AS gedcom" . 215 " FROM `##media`" . 216 " WHERE m_file = :tree_id" . 217 " ORDER BY m_id" . 218 " LIMIT :limit OFFSET :offset" 219 )->execute(array( 220 'tree_id' => $ged_id, 221 'limit' => self::RECORDS_PER_VOLUME, 222 'offset' => self::RECORDS_PER_VOLUME * $volume, 223 ))->fetchAll(); 224 foreach ($rows as $row) { 225 $records[] = Media::getInstance($row->xref, $tree, $row->gedcom); 226 } 227 break; 228 } 229 foreach ($records as $record) { 230 if ($record->canShowName()) { 231 $data .= '<url>'; 232 $data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>'; 233 $chan = $record->getFirstFact('CHAN'); 234 if ($chan) { 235 $date = $chan->getDate(); 236 if ($date->isOK()) { 237 $data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>'; 238 } 239 } 240 $data .= '</url>' . PHP_EOL; 241 } 242 } 243 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL; 244 // Cache this data - but only for visitors, as we don’t want 245 // visitors to see data created by logged-in users. 246 if (!Auth::check()) { 247 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data); 248 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP); 249 } 250 } 251 header('Content-Type: application/xml'); 252 header('Content-Length: ' . strlen($data)); 253 echo $data; 254 } 255 256 /** 257 * Edit the configuration 258 */ 259 private function admin() { 260 $controller = new PageController; 261 $controller 262 ->restrictAccess(Auth::isAdmin()) 263 ->setPageTitle($this->getTitle()) 264 ->pageHeader(); 265 266 // Save the updated preferences 267 if (Filter::post('action') == 'save') { 268 foreach (Tree::getAll() as $tree) { 269 $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId())); 270 } 271 // Clear cache and force files to be regenerated 272 Database::prepare( 273 "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'" 274 )->execute(); 275 } 276 277 $include_any = false; 278 279 ?> 280 <ol class="breadcrumb small"> 281 <li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li> 282 <li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li> 283 <li class="active"><?php echo $controller->getPageTitle(); ?></li> 284 </ol> 285 <h1><?php echo $controller->getPageTitle(); ?></h1> 286 <?php 287 288 echo 289 '<p>', 290 /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */ 291 I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') . 292 '</p>', 293 '<p>', I18N::translate('Which family trees should be included in the sitemaps?'), '</p>', 294 '<form method="post" action="module.php?mod=' . $this->getName() . '&mod_action=admin">', 295 '<input type="hidden" name="action" value="save">'; 296 foreach (Tree::getAll() as $tree) { 297 echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" '; 298 if ($tree->getPreference('include_in_sitemap')) { 299 echo 'checked'; 300 $include_any = true; 301 } 302 echo '>', $tree->getTitleHtml(), '</label></div>'; 303 } 304 echo 305 '<input type="submit" value="', I18N::translate('save'), '">', 306 '</form>', 307 '<hr>'; 308 309 if ($include_any) { 310 $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'; 311 $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'); 312 echo 313 '<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>', 314 '<pre>Sitemap: ', $site_map_url1, '</pre>', 315 '<hr>', 316 '<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>', 317 '<ul>', 318 // This list comes from http://en.wikipedia.org/wiki/Sitemaps 319 '<li><a target="_blank" href="http://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>', 320 '<li><a target="_blank" href="http://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>', 321 '</ul>'; 322 323 } 324 } 325 326 /** {@inheritdoc} */ 327 public function getConfigLink() { 328 return 'module.php?mod=' . $this->getName() . '&mod_action=admin'; 329 } 330} 331