1<?php 2namespace Fisharebest\Webtrees; 3 4/** 5 * webtrees: online genealogy 6 * Copyright (C) 2015 webtrees development team 7 * This program is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19use Zend_Session; 20 21/** 22 * Class SiteMapModule 23 */ 24class SiteMapModule extends Module implements ModuleConfigInterface { 25 const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 26 const CACHE_LIFE = 1209600; // Two weeks 27 28 /** {@inheritdoc} */ 29 public function getTitle() { 30 return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps'); 31 } 32 33 /** {@inheritdoc} */ 34 public function getDescription() { 35 return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.'); 36 } 37 38 /** {@inheritdoc} */ 39 public function modAction($mod_action) { 40 switch ($mod_action) { 41 case 'admin': 42 $this->admin(); 43 break; 44 case 'generate': 45 Zend_Session::writeClose(); 46 $this->generate(Filter::get('file')); 47 break; 48 default: 49 http_response_code(404); 50 } 51 } 52 53 /** 54 * @param string $file 55 */ 56 private function generate($file) { 57 if ($file == 'sitemap.xml') { 58 $this->generateIndex(); 59 } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) { 60 $this->generateFile($match[1], $match[2], $match[3]); 61 } else { 62 http_response_code(404); 63 } 64 } 65 66 /** 67 * The index file contains references to all the other files. 68 * These files are the same for visitors/users/admins. 69 */ 70 private function generateIndex() { 71 // Check the cache 72 $timestamp = $this->getSetting('sitemap.timestamp'); 73 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) { 74 $data = $this->getSetting('sitemap.xml'); 75 } else { 76 $data = ''; 77 $lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>'; 78 foreach (Tree::getAll() as $tree) { 79 if ($tree->getPreference('include_in_sitemap')) { 80 $n = Database::prepare( 81 "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id" 82 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 83 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 84 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 85 } 86 $n = Database::prepare( 87 "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id" 88 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 89 if ($n) { 90 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 91 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 92 } 93 } 94 $n = Database::prepare( 95 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'" 96 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 97 if ($n) { 98 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 99 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 100 } 101 } 102 $n = Database::prepare( 103 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'" 104 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 105 if ($n) { 106 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 107 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 108 } 109 } 110 $n = Database::prepare( 111 "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id" 112 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 113 if ($n) { 114 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 115 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 116 } 117 } 118 } 119 } 120 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL; 121 // Cache this data. 122 $this->setSetting('sitemap.xml', $data); 123 $this->setSetting('sitemap.timestamp', WT_TIMESTAMP); 124 } 125 header('Content-Type: application/xml'); 126 header('Content-Length: ' . strlen($data)); 127 echo $data; 128 } 129 130 /** 131 * A separate file for each family tree and each record type. 132 * These files depend on access levels, so only cache for visitors. 133 * 134 * @param integer $ged_id 135 * @param string $rec_type 136 * @param string $volume 137 */ 138 private function generateFile($ged_id, $rec_type, $volume) { 139 $tree = Tree::findById($ged_id); 140 // Check the cache 141 $timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp'); 142 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) { 143 $data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml'); 144 } else { 145 $tree = Tree::findById($ged_id); 146 $data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL; 147 $records = array(); 148 switch ($rec_type) { 149 case 'i': 150 $rows = Database::prepare( 151 "SELECT i_id AS xref, i_gedcom AS gedcom" . 152 " FROM `##individuals`" . 153 " WHERE i_file = :tree_id" . 154 " ORDER BY i_id" . 155 " LIMIT :limit OFFSET :offset" 156 )->execute(array( 157 'tree_id' => $ged_id, 158 'limit' => self::RECORDS_PER_VOLUME, 159 'offset' => self::RECORDS_PER_VOLUME * $volume, 160 ))->fetchAll(); 161 foreach ($rows as $row) { 162 $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom); 163 } 164 break; 165 case 's': 166 $rows = Database::prepare( 167 "SELECT s_id AS xref, s_gedcom AS gedcom" . 168 " FROM `##sources`" . 169 " WHERE s_file = :tree_id" . 170 " ORDER BY s_id" . 171 " LIMIT :limit OFFSET :offset" 172 )->execute(array( 173 'tree_id' => $ged_id, 174 'limit' => self::RECORDS_PER_VOLUME, 175 'offset' => self::RECORDS_PER_VOLUME * $volume, 176 ))->fetchAll(); 177 foreach ($rows as $row) { 178 $records[] = Source::getInstance($row->xref, $tree, $row->gedcom); 179 } 180 break; 181 case 'r': 182 $rows = Database::prepare( 183 "SELECT o_id AS xref, o_gedcom AS gedcom" . 184 " FROM `##other`" . 185 " WHERE o_file = :tree_id AND o_type = 'REPO'" . 186 " ORDER BY o_id" . 187 " LIMIT :limit OFFSET :offset" 188 )->execute(array( 189 'tree_id' => $ged_id, 190 'limit' => self::RECORDS_PER_VOLUME, 191 'offset' => self::RECORDS_PER_VOLUME * $volume, 192 ))->fetchAll(); 193 foreach ($rows as $row) { 194 $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom); 195 } 196 break; 197 case 'n': 198 $rows = Database::prepare( 199 "SELECT o_id AS xref, o_gedcom AS gedcom" . 200 " FROM `##other`" . 201 " WHERE o_file = :tree_id AND o_type = 'NOTE'" . 202 " ORDER BY o_id" . 203 " LIMIT :limit OFFSET :offset" 204 )->execute(array( 205 'tree_id' => $ged_id, 206 'limit' => self::RECORDS_PER_VOLUME, 207 'offset' => self::RECORDS_PER_VOLUME * $volume, 208 ))->fetchAll(); 209 foreach ($rows as $row) { 210 $records[] = Note::getInstance($row->xref, $tree, $row->gedcom); 211 } 212 break; 213 case 'm': 214 $rows = Database::prepare( 215 "SELECT m_id AS xref, m_gedcom AS gedcom" . 216 " FROM `##media`" . 217 " WHERE m_file = :tree_id" . 218 " ORDER BY m_id" . 219 " LIMIT :limit OFFSET :offset" 220 )->execute(array( 221 'tree_id' => $ged_id, 222 'limit' => self::RECORDS_PER_VOLUME, 223 'offset' => self::RECORDS_PER_VOLUME * $volume, 224 ))->fetchAll(); 225 foreach ($rows as $row) { 226 $records[] = Media::getInstance($row->xref, $tree, $row->gedcom); 227 } 228 break; 229 } 230 foreach ($records as $record) { 231 if ($record->canShowName()) { 232 $data .= '<url>'; 233 $data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>'; 234 $chan = $record->getFirstFact('CHAN'); 235 if ($chan) { 236 $date = $chan->getDate(); 237 if ($date->isOK()) { 238 $data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>'; 239 } 240 } 241 $data .= '</url>' . PHP_EOL; 242 } 243 } 244 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL; 245 // Cache this data - but only for visitors, as we don’t want 246 // visitors to see data created by logged-in users. 247 if (!Auth::check()) { 248 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data); 249 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP); 250 } 251 } 252 header('Content-Type: application/xml'); 253 header('Content-Length: ' . strlen($data)); 254 echo $data; 255 } 256 257 /** 258 * Edit the configuration 259 */ 260 private function admin() { 261 $controller = new PageController; 262 $controller 263 ->restrictAccess(Auth::isAdmin()) 264 ->setPageTitle($this->getTitle()) 265 ->pageHeader(); 266 267 // Save the updated preferences 268 if (Filter::post('action') == 'save') { 269 foreach (Tree::getAll() as $tree) { 270 $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId())); 271 } 272 // Clear cache and force files to be regenerated 273 Database::prepare( 274 "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'" 275 )->execute(); 276 } 277 278 $include_any = false; 279 280 ?> 281 <ol class="breadcrumb small"> 282 <li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li> 283 <li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li> 284 <li class="active"><?php echo $controller->getPageTitle(); ?></li> 285 </ol> 286 <h2><?php echo $controller->getPageTitle(); ?></h2> 287 <?php 288 289 echo 290 '<p>', 291 /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */ 292 I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') . 293 '</p>', 294 '<p>', I18N::translate('Which family trees should be included in the sitemaps?'), '</p>', 295 '<form method="post" action="module.php?mod=' . $this->getName() . '&mod_action=admin">', 296 '<input type="hidden" name="action" value="save">'; 297 foreach (Tree::getAll() as $tree) { 298 echo '<p><input type="checkbox" name="include', $tree->getTreeId(), '" '; 299 if ($tree->getPreference('include_in_sitemap')) { 300 echo 'checked'; 301 $include_any = true; 302 } 303 echo '>', $tree->getTitleHtml(), '</p>'; 304 } 305 echo 306 '<input type="submit" value="', I18N::translate('save'), '">', 307 '</form>', 308 '<hr>'; 309 310 if ($include_any) { 311 $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'; 312 $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'); 313 echo 314 '<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>', 315 '<pre>Sitemap: ', $site_map_url1, '</pre>', 316 '<hr>', 317 '<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>', 318 '<ul>', 319 // This list comes from http://en.wikipedia.org/wiki/Sitemaps 320 '<li><a target="_blank" href="http://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>', 321 '<li><a target="_blank" href="http://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>', 322 '</ul>'; 323 324 } 325 } 326 327 /** {@inheritdoc} */ 328 public function getConfigLink() { 329 return 'module.php?mod=' . $this->getName() . '&mod_action=admin'; 330 } 331} 332