1<?php 2namespace Fisharebest\Webtrees; 3 4/** 5 * webtrees: online genealogy 6 * Copyright (C) 2015 webtrees development team 7 * This program is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 */ 18 19/** 20 * Class SiteMapModule 21 */ 22class SiteMapModule extends AbstractModule implements ModuleConfigInterface { 23 const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 24 const CACHE_LIFE = 1209600; // Two weeks 25 26 /** {@inheritdoc} */ 27 public function getTitle() { 28 return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps'); 29 } 30 31 /** {@inheritdoc} */ 32 public function getDescription() { 33 return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.'); 34 } 35 36 /** {@inheritdoc} */ 37 public function modAction($mod_action) { 38 switch ($mod_action) { 39 case 'admin': 40 $this->admin(); 41 break; 42 case 'generate': 43 $this->generate(Filter::get('file')); 44 break; 45 default: 46 http_response_code(404); 47 } 48 } 49 50 /** 51 * @param string $file 52 */ 53 private function generate($file) { 54 if ($file == 'sitemap.xml') { 55 $this->generateIndex(); 56 } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) { 57 $this->generateFile($match[1], $match[2], $match[3]); 58 } else { 59 http_response_code(404); 60 } 61 } 62 63 /** 64 * The index file contains references to all the other files. 65 * These files are the same for visitors/users/admins. 66 */ 67 private function generateIndex() { 68 // Check the cache 69 $timestamp = $this->getSetting('sitemap.timestamp'); 70 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) { 71 $data = $this->getSetting('sitemap.xml'); 72 } else { 73 $data = ''; 74 $lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>'; 75 foreach (Tree::getAll() as $tree) { 76 if ($tree->getPreference('include_in_sitemap')) { 77 $n = Database::prepare( 78 "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id" 79 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 80 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 81 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 82 } 83 $n = Database::prepare( 84 "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id" 85 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 86 if ($n) { 87 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 88 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 89 } 90 } 91 $n = Database::prepare( 92 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'" 93 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 94 if ($n) { 95 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 96 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 97 } 98 } 99 $n = Database::prepare( 100 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'" 101 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 102 if ($n) { 103 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 104 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 105 } 106 } 107 $n = Database::prepare( 108 "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id" 109 )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); 110 if ($n) { 111 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 112 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 113 } 114 } 115 } 116 } 117 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL; 118 // Cache this data. 119 $this->setSetting('sitemap.xml', $data); 120 $this->setSetting('sitemap.timestamp', WT_TIMESTAMP); 121 } 122 header('Content-Type: application/xml'); 123 header('Content-Length: ' . strlen($data)); 124 echo $data; 125 } 126 127 /** 128 * A separate file for each family tree and each record type. 129 * These files depend on access levels, so only cache for visitors. 130 * 131 * @param integer $ged_id 132 * @param string $rec_type 133 * @param string $volume 134 */ 135 private function generateFile($ged_id, $rec_type, $volume) { 136 $tree = Tree::findById($ged_id); 137 // Check the cache 138 $timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp'); 139 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) { 140 $data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml'); 141 } else { 142 $data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL; 143 $records = array(); 144 switch ($rec_type) { 145 case 'i': 146 $rows = Database::prepare( 147 "SELECT i_id AS xref, i_gedcom AS gedcom" . 148 " FROM `##individuals`" . 149 " WHERE i_file = :tree_id" . 150 " ORDER BY i_id" . 151 " LIMIT :limit OFFSET :offset" 152 )->execute(array( 153 'tree_id' => $ged_id, 154 'limit' => self::RECORDS_PER_VOLUME, 155 'offset' => self::RECORDS_PER_VOLUME * $volume, 156 ))->fetchAll(); 157 foreach ($rows as $row) { 158 $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom); 159 } 160 break; 161 case 's': 162 $rows = Database::prepare( 163 "SELECT s_id AS xref, s_gedcom AS gedcom" . 164 " FROM `##sources`" . 165 " WHERE s_file = :tree_id" . 166 " ORDER BY s_id" . 167 " LIMIT :limit OFFSET :offset" 168 )->execute(array( 169 'tree_id' => $ged_id, 170 'limit' => self::RECORDS_PER_VOLUME, 171 'offset' => self::RECORDS_PER_VOLUME * $volume, 172 ))->fetchAll(); 173 foreach ($rows as $row) { 174 $records[] = Source::getInstance($row->xref, $tree, $row->gedcom); 175 } 176 break; 177 case 'r': 178 $rows = Database::prepare( 179 "SELECT o_id AS xref, o_gedcom AS gedcom" . 180 " FROM `##other`" . 181 " WHERE o_file = :tree_id AND o_type = 'REPO'" . 182 " ORDER BY o_id" . 183 " LIMIT :limit OFFSET :offset" 184 )->execute(array( 185 'tree_id' => $ged_id, 186 'limit' => self::RECORDS_PER_VOLUME, 187 'offset' => self::RECORDS_PER_VOLUME * $volume, 188 ))->fetchAll(); 189 foreach ($rows as $row) { 190 $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom); 191 } 192 break; 193 case 'n': 194 $rows = Database::prepare( 195 "SELECT o_id AS xref, o_gedcom AS gedcom" . 196 " FROM `##other`" . 197 " WHERE o_file = :tree_id AND o_type = 'NOTE'" . 198 " ORDER BY o_id" . 199 " LIMIT :limit OFFSET :offset" 200 )->execute(array( 201 'tree_id' => $ged_id, 202 'limit' => self::RECORDS_PER_VOLUME, 203 'offset' => self::RECORDS_PER_VOLUME * $volume, 204 ))->fetchAll(); 205 foreach ($rows as $row) { 206 $records[] = Note::getInstance($row->xref, $tree, $row->gedcom); 207 } 208 break; 209 case 'm': 210 $rows = Database::prepare( 211 "SELECT m_id AS xref, m_gedcom AS gedcom" . 212 " FROM `##media`" . 213 " WHERE m_file = :tree_id" . 214 " ORDER BY m_id" . 215 " LIMIT :limit OFFSET :offset" 216 )->execute(array( 217 'tree_id' => $ged_id, 218 'limit' => self::RECORDS_PER_VOLUME, 219 'offset' => self::RECORDS_PER_VOLUME * $volume, 220 ))->fetchAll(); 221 foreach ($rows as $row) { 222 $records[] = Media::getInstance($row->xref, $tree, $row->gedcom); 223 } 224 break; 225 } 226 foreach ($records as $record) { 227 if ($record->canShowName()) { 228 $data .= '<url>'; 229 $data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>'; 230 $chan = $record->getFirstFact('CHAN'); 231 if ($chan) { 232 $date = $chan->getDate(); 233 if ($date->isOK()) { 234 $data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>'; 235 } 236 } 237 $data .= '</url>' . PHP_EOL; 238 } 239 } 240 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL; 241 // Cache this data - but only for visitors, as we don’t want 242 // visitors to see data created by logged-in users. 243 if (!Auth::check()) { 244 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data); 245 $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP); 246 } 247 } 248 header('Content-Type: application/xml'); 249 header('Content-Length: ' . strlen($data)); 250 echo $data; 251 } 252 253 /** 254 * Edit the configuration 255 */ 256 private function admin() { 257 $controller = new PageController; 258 $controller 259 ->restrictAccess(Auth::isAdmin()) 260 ->setPageTitle($this->getTitle()) 261 ->pageHeader(); 262 263 // Save the updated preferences 264 if (Filter::post('action') == 'save') { 265 foreach (Tree::getAll() as $tree) { 266 $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId())); 267 } 268 // Clear cache and force files to be regenerated 269 Database::prepare( 270 "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'" 271 )->execute(); 272 } 273 274 $include_any = false; 275 276 ?> 277 <ol class="breadcrumb small"> 278 <li><a href="admin.php"><?php echo I18N::translate('Control panel'); ?></a></li> 279 <li><a href="admin_modules.php"><?php echo I18N::translate('Module administration'); ?></a></li> 280 <li class="active"><?php echo $controller->getPageTitle(); ?></li> 281 </ol> 282 <h1><?php echo $controller->getPageTitle(); ?></h1> 283 <?php 284 285 echo 286 '<p>', 287 /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */ 288 I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') . 289 '</p>', 290 '<p>', I18N::translate('Which family trees should be included in the sitemaps?'), '</p>', 291 '<form method="post" action="module.php?mod=' . $this->getName() . '&mod_action=admin">', 292 '<input type="hidden" name="action" value="save">'; 293 foreach (Tree::getAll() as $tree) { 294 echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" '; 295 if ($tree->getPreference('include_in_sitemap')) { 296 echo 'checked'; 297 $include_any = true; 298 } 299 echo '>', $tree->getTitleHtml(), '</label></div>'; 300 } 301 echo 302 '<input type="submit" value="', I18N::translate('save'), '">', 303 '</form>', 304 '<hr>'; 305 306 if ($include_any) { 307 $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'; 308 $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'); 309 echo 310 '<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>', 311 '<pre>Sitemap: ', $site_map_url1, '</pre>', 312 '<hr>', 313 '<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>', 314 '<ul>', 315 // This list comes from http://en.wikipedia.org/wiki/Sitemaps 316 '<li><a target="_blank" href="http://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>', 317 '<li><a target="_blank" href="http://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>', 318 '</ul>'; 319 320 } 321 } 322 323 /** {@inheritdoc} */ 324 public function getConfigLink() { 325 return 'module.php?mod=' . $this->getName() . '&mod_action=admin'; 326 } 327} 328