. */ use Zend_Session; /** * Class SiteMapModule */ class SiteMapModule extends AbstractModule implements ModuleConfigInterface { const RECORDS_PER_VOLUME = 500; // Keep sitemap files small, for memory, CPU and max_allowed_packet limits. const CACHE_LIFE = 1209600; // Two weeks /** {@inheritdoc} */ public function getTitle() { return /* I18N: Name of a module - see http://en.wikipedia.org/wiki/Sitemaps */ I18N::translate('Sitemaps'); } /** {@inheritdoc} */ public function getDescription() { return /* I18N: Description of the “Sitemaps” module */ I18N::translate('Generate sitemap files for search engines.'); } /** {@inheritdoc} */ public function modAction($mod_action) { switch ($mod_action) { case 'admin': $this->admin(); break; case 'generate': Zend_Session::writeClose(); $this->generate(Filter::get('file')); break; default: http_response_code(404); } } /** * @param string $file */ private function generate($file) { if ($file == 'sitemap.xml') { $this->generateIndex(); } elseif (preg_match('/^sitemap-(\d+)-([isrmn])-(\d+).xml$/', $file, $match)) { $this->generateFile($match[1], $match[2], $match[3]); } else { http_response_code(404); } } /** * The index file contains references to all the other files. * These files are the same for visitors/users/admins. */ private function generateIndex() { // Check the cache $timestamp = $this->getSetting('sitemap.timestamp'); if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE) { $data = $this->getSetting('sitemap.xml'); } else { $data = ''; $lastmod = '' . date('Y-m-d') . ''; foreach (Tree::getAll() as $tree) { if ($tree->getPreference('include_in_sitemap')) { $n = Database::prepare( "SELECT COUNT(*) FROM `##individuals` WHERE i_file = :tree_id" )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { $data .= '' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-i-' . $i . '.xml' . $lastmod . '' . PHP_EOL; } $n = Database::prepare( "SELECT COUNT(*) FROM `##sources` WHERE s_file = :tree_id" )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); if ($n) { for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { $data .= '' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-s-' . $i . '.xml' . $lastmod . '' . PHP_EOL; } } $n = Database::prepare( "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'REPO'" )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); if ($n) { for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { $data .= '' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-r-' . $i . '.xml' . $lastmod . '' . PHP_EOL; } } $n = Database::prepare( "SELECT COUNT(*) FROM `##other` WHERE o_file = :tree_id AND o_type = 'NOTE'" )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); if ($n) { for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { $data .= '' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-n-' . $i . '.xml' . $lastmod . '' . PHP_EOL; } } $n = Database::prepare( "SELECT COUNT(*) FROM `##media` WHERE m_file = :tree_id" )->execute(array('tree_id' => $tree->getTreeId()))->fetchOne(); if ($n) { for ($i = 0; $i <= $n / self::RECORDS_PER_VOLUME; ++$i) { $data .= '' . WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap-' . $tree->getTreeId() . '-m-' . $i . '.xml' . $lastmod . '' . PHP_EOL; } } } } $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '' . PHP_EOL . $data . '' . PHP_EOL; // Cache this data. $this->setSetting('sitemap.xml', $data); $this->setSetting('sitemap.timestamp', WT_TIMESTAMP); } header('Content-Type: application/xml'); header('Content-Length: ' . strlen($data)); echo $data; } /** * A separate file for each family tree and each record type. * These files depend on access levels, so only cache for visitors. * * @param integer $ged_id * @param string $rec_type * @param string $volume */ private function generateFile($ged_id, $rec_type, $volume) { $tree = Tree::findById($ged_id); // Check the cache $timestamp = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp'); if ($timestamp > WT_TIMESTAMP - self::CACHE_LIFE && !Auth::check()) { $data = $this->getSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml'); } else { $data = '' . WT_BASE_URL . 'index.php?ctype=gedcom&ged=' . $tree->getNameUrl() . '' . PHP_EOL; $records = array(); switch ($rec_type) { case 'i': $rows = Database::prepare( "SELECT i_id AS xref, i_gedcom AS gedcom" . " FROM `##individuals`" . " WHERE i_file = :tree_id" . " ORDER BY i_id" . " LIMIT :limit OFFSET :offset" )->execute(array( 'tree_id' => $ged_id, 'limit' => self::RECORDS_PER_VOLUME, 'offset' => self::RECORDS_PER_VOLUME * $volume, ))->fetchAll(); foreach ($rows as $row) { $records[] = Individual::getInstance($row->xref, $tree, $row->gedcom); } break; case 's': $rows = Database::prepare( "SELECT s_id AS xref, s_gedcom AS gedcom" . " FROM `##sources`" . " WHERE s_file = :tree_id" . " ORDER BY s_id" . " LIMIT :limit OFFSET :offset" )->execute(array( 'tree_id' => $ged_id, 'limit' => self::RECORDS_PER_VOLUME, 'offset' => self::RECORDS_PER_VOLUME * $volume, ))->fetchAll(); foreach ($rows as $row) { $records[] = Source::getInstance($row->xref, $tree, $row->gedcom); } break; case 'r': $rows = Database::prepare( "SELECT o_id AS xref, o_gedcom AS gedcom" . " FROM `##other`" . " WHERE o_file = :tree_id AND o_type = 'REPO'" . " ORDER BY o_id" . " LIMIT :limit OFFSET :offset" )->execute(array( 'tree_id' => $ged_id, 'limit' => self::RECORDS_PER_VOLUME, 'offset' => self::RECORDS_PER_VOLUME * $volume, ))->fetchAll(); foreach ($rows as $row) { $records[] = Repository::getInstance($row->xref, $tree, $row->gedcom); } break; case 'n': $rows = Database::prepare( "SELECT o_id AS xref, o_gedcom AS gedcom" . " FROM `##other`" . " WHERE o_file = :tree_id AND o_type = 'NOTE'" . " ORDER BY o_id" . " LIMIT :limit OFFSET :offset" )->execute(array( 'tree_id' => $ged_id, 'limit' => self::RECORDS_PER_VOLUME, 'offset' => self::RECORDS_PER_VOLUME * $volume, ))->fetchAll(); foreach ($rows as $row) { $records[] = Note::getInstance($row->xref, $tree, $row->gedcom); } break; case 'm': $rows = Database::prepare( "SELECT m_id AS xref, m_gedcom AS gedcom" . " FROM `##media`" . " WHERE m_file = :tree_id" . " ORDER BY m_id" . " LIMIT :limit OFFSET :offset" )->execute(array( 'tree_id' => $ged_id, 'limit' => self::RECORDS_PER_VOLUME, 'offset' => self::RECORDS_PER_VOLUME * $volume, ))->fetchAll(); foreach ($rows as $row) { $records[] = Media::getInstance($row->xref, $tree, $row->gedcom); } break; } foreach ($records as $record) { if ($record->canShowName()) { $data .= ''; $data .= '' . WT_BASE_URL . $record->getHtmlUrl() . ''; $chan = $record->getFirstFact('CHAN'); if ($chan) { $date = $chan->getDate(); if ($date->isOK()) { $data .= '' . $date->minimumDate()->Format('%Y-%m-%d') . ''; } } $data .= '' . PHP_EOL; } } $data = '<' . '?xml version="1.0" encoding="UTF-8" ?' . '>' . PHP_EOL . '' . PHP_EOL . $data . '' . PHP_EOL; // Cache this data - but only for visitors, as we don’t want // visitors to see data created by logged-in users. if (!Auth::check()) { $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.xml', $data); $this->setSetting('sitemap-' . $ged_id . '-' . $rec_type . '-' . $volume . '.timestamp', WT_TIMESTAMP); } } header('Content-Type: application/xml'); header('Content-Length: ' . strlen($data)); echo $data; } /** * Edit the configuration */ private function admin() { $controller = new PageController; $controller ->restrictAccess(Auth::isAdmin()) ->setPageTitle($this->getTitle()) ->pageHeader(); // Save the updated preferences if (Filter::post('action') == 'save') { foreach (Tree::getAll() as $tree) { $tree->setPreference('include_in_sitemap', Filter::postBool('include' . $tree->getTreeId())); } // Clear cache and force files to be regenerated Database::prepare( "DELETE FROM `##module_setting` WHERE setting_name LIKE 'sitemap%'" )->execute(); } $include_any = false; ?>

getPageTitle(); ?>

', /* I18N: The www.sitemaps.org site is translated into many languages (e.g. http://www.sitemaps.org/fr/) - choose an appropriate URL. */ I18N::translate('Sitemaps are a way for webmasters to tell search engines about the pages on a website that are available for crawling. All major search engines support sitemaps. For more information, see www.sitemaps.org.') . '

', '

', I18N::translate('Which family trees should be included in the sitemaps?'), '

', '
', ''; foreach (Tree::getAll() as $tree) { echo '

getPreference('include_in_sitemap')) { echo 'checked'; $include_any = true; } echo '>', $tree->getTitleHtml(), '

'; } echo '', '
', '
'; if ($include_any) { $site_map_url1 = WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'; $site_map_url2 = rawurlencode(WT_BASE_URL . 'module.php?mod=' . $this->getName() . '&mod_action=generate&file=sitemap.xml'); echo '

', I18N::translate('To tell search engines that sitemaps are available, you should add the following line to your robots.txt file.'), '

', '
Sitemap: ', $site_map_url1, '
', '
', '

', I18N::translate('To tell search engines that sitemaps are available, you can use the following links.'), '

', ''; } } /** {@inheritdoc} */ public function getConfigLink() { return 'module.php?mod=' . $this->getName() . '&mod_action=admin'; } }