1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2017 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16namespace Fisharebest\Webtrees\Module; 17 18use Fisharebest\Webtrees\Auth; 19use Fisharebest\Webtrees\Bootstrap4; 20use Fisharebest\Webtrees\Controller\PageController; 21use Fisharebest\Webtrees\Database; 22use Fisharebest\Webtrees\Filter; 23use Fisharebest\Webtrees\I18N; 24use Fisharebest\Webtrees\Individual; 25use Fisharebest\Webtrees\Media; 26use Fisharebest\Webtrees\Note; 27use Fisharebest\Webtrees\Repository; 28use Fisharebest\Webtrees\Source; 29use Fisharebest\Webtrees\Tree; 30 31/** 32 * Class SiteMapModule 33 */ 34class SiteMapModule extends AbstractModule implements ModuleConfigInterface { 35 const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. 36 const CACHE_LIFE = 1209600; // Two weeks 37 38 /** {@inheritdoc} */ 39 public function getTitle() { 40 return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps'); 41 } 42 43 /** {@inheritdoc} */ 44 public function getDescription() { 45 return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.'); 46 } 47 48 /** 49 * This is a general purpose hook, allowing modules to respond to routes 50 * of the form module.php?mod=FOO&mod_action=BAR 51 * 52 * @param string $mod_action 53 */ 54 public function modAction($mod_action) { 55 switch ($mod_action) { 56 case 'admin': 57 $this->admin(); 58 break; 59 case 'generate': 60 $this->generate(Filter::get('file')); 61 break; 62 default: 63 http_response_code(404); 64 } 65 } 66 67 /** 68 * Generate an XML file. 69 * 70 * @param string $file 71 */ 72 private function generate($file) { 73 if ($file == 'sitemap.xml') { 74 $this->generateIndex(); 75 } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) { 76 $this->generateFile($match[1], $match[2], $match[3]); 77 } else { 78 http_response_code(404); 79 } 80 } 81 82 /** 83 * The index file contains references to all the other files. 84 * These files are the same for visitors/users/admins. 85 */ 86 private function generateIndex() { 87 // Check the cache 88 $timestamp = (int) $this->getPreference('sitemap.timestamp'); 89 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) { 90 $data = $this->getPreference('sitemap.xml'); 91 } else { 92 $data = ''; 93 $lastmod = '<lastmod>' . date('Y-m-d') . '</lastmod>'; 94 foreach (Tree::getAll() as $tree) { 95 if ($tree->getPreference('include_in_sitemap')) { 96 $n = Database::prepare( 97 "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id" 98 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 99 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 100 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 101 } 102 $n = Database::prepare( 103 "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id" 104 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 105 if ($n) { 106 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 107 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 108 } 109 } 110 $n = Database::prepare( 111 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'" 112 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 113 if ($n) { 114 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 115 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 116 } 117 } 118 $n = Database::prepare( 119 "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'" 120 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 121 if ($n) { 122 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 123 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 124 } 125 } 126 $n = Database::prepare( 127 "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id" 128 )->execute(['tree_id' => $tree->getTreeId()])->fetchOne(); 129 if ($n) { 130 for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { 131 $data .= '<sitemap><loc>' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml</loc>' . $lastmod . '</sitemap>' . PHP_EOL; 132 } 133 } 134 } 135 } 136 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . PHP_EOL . $data . '</sitemapindex>' . PHP_EOL; 137 // Cache this data. 138 $this->setPreference('sitemap.xml', $data); 139 $this->setPreference('sitemap.timestamp', WT_TIMESTAMP); 140 } 141 header('Content-Type: application/xml'); 142 header('Content-Length: ' . strlen($data)); 143 echo $data; 144 } 145 146 /** 147 * A separate file for each family tree and each record type. 148 * These files depend on access levels, so only cache for visitors. 149 * 150 * @param int $ged_id 151 * @param string $rec_type 152 * @param string $volume 153 */ 154 private function generateFile($ged_id, $rec_type, $volume) { 155 $tree = Tree::findById($ged_id); 156 // Check the cache 157 $timestamp = (int) $this->getPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp'); 158 if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) { 159 $data = $this->getPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml'); 160 } else { 161 $data = '<url><loc>' . WT_BASE_URL . 'index.php?ctype=gedcom&ged=' . $tree->getNameUrl() . '</loc></url>' . PHP_EOL; 162 $records = []; 163 switch ($rec_type) { 164 case 'i': 165 $rows = Database::prepare( 166 "SELECT i_id AS xref, i_gedcom AS gedcom" . 167 " FROM `##individuals`" . 168 " WHERE i_file = :tree_id" . 169 " ORDER BY i_id" . 170 " LIMIT :limit OFFSET :offset" 171 )->execute([ 172 'tree_id' => $ged_id, 173 'limit' => self::RECORDS_PER_VOLUME, 174 'offset' => self::RECORDS_PER_VOLUME * $volume, 175 ])->fetchAll(); 176 foreach ($rows as $row) { 177 $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom); 178 } 179 break; 180 case 's': 181 $rows = Database::prepare( 182 "SELECT s_id AS xref, s_gedcom AS gedcom" . 183 " FROM `##sources`" . 184 " WHERE s_file = :tree_id" . 185 " ORDER BY s_id" . 186 " LIMIT :limit OFFSET :offset" 187 )->execute([ 188 'tree_id' => $ged_id, 189 'limit' => self::RECORDS_PER_VOLUME, 190 'offset' => self::RECORDS_PER_VOLUME * $volume, 191 ])->fetchAll(); 192 foreach ($rows as $row) { 193 $records[] = Source::getInstance($row->xref, $tree, $row->gedcom); 194 } 195 break; 196 case 'r': 197 $rows = Database::prepare( 198 "SELECT o_id AS xref, o_gedcom AS gedcom" . 199 " FROM `##other`" . 200 " WHERE o_file = :tree_id AND o_type = 'REPO'" . 201 " ORDER BY o_id" . 202 " LIMIT :limit OFFSET :offset" 203 )->execute([ 204 'tree_id' => $ged_id, 205 'limit' => self::RECORDS_PER_VOLUME, 206 'offset' => self::RECORDS_PER_VOLUME * $volume, 207 ])->fetchAll(); 208 foreach ($rows as $row) { 209 $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom); 210 } 211 break; 212 case 'n': 213 $rows = Database::prepare( 214 "SELECT o_id AS xref, o_gedcom AS gedcom" . 215 " FROM `##other`" . 216 " WHERE o_file = :tree_id AND o_type = 'NOTE'" . 217 " ORDER BY o_id" . 218 " LIMIT :limit OFFSET :offset" 219 )->execute([ 220 'tree_id' => $ged_id, 221 'limit' => self::RECORDS_PER_VOLUME, 222 'offset' => self::RECORDS_PER_VOLUME * $volume, 223 ])->fetchAll(); 224 foreach ($rows as $row) { 225 $records[] = Note::getInstance($row->xref, $tree, $row->gedcom); 226 } 227 break; 228 case 'm': 229 $rows = Database::prepare( 230 "SELECT m_id AS xref, m_gedcom AS gedcom" . 231 " FROM `##media`" . 232 " WHERE m_file = :tree_id" . 233 " ORDER BY m_id" . 234 " LIMIT :limit OFFSET :offset" 235 )->execute([ 236 'tree_id' => $ged_id, 237 'limit' => self::RECORDS_PER_VOLUME, 238 'offset' => self::RECORDS_PER_VOLUME * $volume, 239 ])->fetchAll(); 240 foreach ($rows as $row) { 241 $records[] = Media::getInstance($row->xref, $tree, $row->gedcom); 242 } 243 break; 244 } 245 foreach ($records as $record) { 246 if ($record->canShowName()) { 247 $data .= '<url>'; 248 $data .= '<loc>' . WT_BASE_URL . $record->getHtmlUrl() . '</loc>'; 249 $chan = $record->getFirstFact('CHAN'); 250 if ($chan) { 251 $date = $chan->getDate(); 252 if ($date->isOK()) { 253 $data .= '<lastmod>' . $date->minimumDate()->Format('%Y-%m-%d') . '</lastmod>'; 254 } 255 } 256 $data .= '</url>' . PHP_EOL; 257 } 258 } 259 $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">' . PHP_EOL . $data . '</urlset>' . PHP_EOL; 260 // Cache this data - but only for visitors, as we don’t want 261 // visitors to see data created by signed-in users. 262 if (!Auth::check()) { 263 $this->setPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data); 264 $this->setPreference('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP); 265 } 266 } 267 header('Content-Type: application/xml'); 268 header('Content-Length: ' . strlen($data)); 269 echo $data; 270 } 271 272 /** 273 * Edit the configuration 274 */ 275 private function admin() { 276 $controller = new PageController; 277 $controller 278 ->restrictAccess(Auth::isAdmin()) 279 ->setPageTitle($this->getTitle()) 280 ->pageHeader(); 281 282 // Save the updated preferences 283 if (Filter::post('action') == 'save') { 284 foreach (Tree::getAll() as $tree) { 285 $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId())); 286 } 287 // Clear cache and force files to be regenerated 288 Database::prepare( 289 "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'" 290 )->execute(); 291 } 292 293 $include_any = false; 294 295 echo Bootstrap4::breadcrumbs([ 296 'admin.php' => I18N::translate('Control panel'), 297 'admin_modules.php' => I18N::translate('Module administration'), 298 ], $controller->getPageTitle()); 299 ?> 300 301 <h1><?= $controller->getPageTitle() ?></h1> 302 <?php 303 304 echo 305 '<p>', 306 /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */ 307 I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see <a href="http://www.sitemaps.org/">www.sitemaps.org</a>.') . 308 '</p>', 309 '<p>', /* I18N: Label for a configuration option */ I18N::translate('Which family trees should be included in the sitemaps'), '</p>', 310 '<form method="post" action="module.php?mod=' . $this->getName() . '&mod_action=admin">', 311 '<input type="hidden" name="action" value="save">'; 312 foreach (Tree::getAll() as $tree) { 313 echo '<div class="checkbox"><label><input type="checkbox" name="include', $tree->getTreeId(), '" '; 314 if ($tree->getPreference('include_in_sitemap')) { 315 echo 'checked'; 316 $include_any = true; 317 } 318 echo '>', $tree->getTitleHtml(), '</label></div>'; 319 } 320 echo 321 '<input type="submit" value="', I18N::translate('save'), '">', 322 '</form>', 323 '<hr>'; 324 325 if ($include_any) { 326 $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'; 327 $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'); 328 echo 329 '<p>', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '</p>', 330 '<pre>Sitemap: ', $site_map_url1, '</pre>', 331 '<hr>', 332 '<p>', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '</p>', 333 '<ul>', 334 // This list comes from http://en.wikipedia.org/wiki/Sitemaps 335 '<li><a href="https://www.bing.com/webmaster/ping.aspx?siteMap=' . $site_map_url2 . '">Bing</a></li>', 336 '<li><a href="https://www.google.com/webmasters/tools/ping?sitemap=' . $site_map_url2 . '">Google</a></li>', 337 '</ul>'; 338 339 } 340 } 341 342 /** {@inheritdoc} */ 343 public function getConfigLink() { 344 return 'module.php?mod=' . $this->getName() . '&mod_action=admin'; 345 } 346} 347