diff --git a/MarkupSitemapXML.module b/MarkupSitemapXML.module index 29bc470..59b7787 100644 --- a/MarkupSitemapXML.module +++ b/MarkupSitemapXML.module @@ -1,137 +1,214 @@ - 'Markup Sitemap XML', - 'summary' => 'Generates an XML sitemap at yoursite.com/sitemap.xml for use with Google Webmaster Tools etc.', - 'href' => 'http://processwire.com/talk/index.php/topic,867.0.html', - 'version' => 110, - 'permanent' => false, - 'autoload' => true, - 'singular' => true, - ); - } - - /** - * Add the hook - * - */ - public function init() { - // Intercept a request for an URL ending in sitemap.xml and output - if (isset($_SERVER['REQUEST_URI']) && strlen($_SERVER['REQUEST_URI']) - strlen('/sitemap.xml') === strrpos($_SERVER['REQUEST_URI'], '/sitemap.xml')) { - $this->addHookBefore("ProcessPageView::pageNotFound",$this,"renderSitemap"); - } - } - - public function renderSitemap(HookEvent $event){ - //$event->replace = true; - $lang = ''; - $this->pageselector = ''; - $langname = ''; // for LanguageLocalizedUrl - $subdomain = ''; // for Multisite - - // set startpage according to request (sitemap.xml spec says that sitemap - // should only contain pages below it's root page in page tree) - // Fix #11 by FlipZoomMedia | David Karich - $startpage = str_ireplace(trim(wire('config')->urls->root, '/'), '', $this->sanitizer->path(dirname($_SERVER['REQUEST_URI']))); - - // Multisite requires minor URL-related tweak - if (wire("modules")->isInstalled("Multisite")) { - $multisite = wire("modules")->get("Multisite"); - if ($multisite->subdomain) { - $startpage = "/".$multisite->subdomain.$startpage; - } - } - - // make sure that page used as root for sitemap actually exists - if ($this->pages->get($startpage) instanceof NullPage) return; - - // support for LanguageLocalizedURL language module - if(wire("modules")->isInstalled("LanguageLocalizedURL")) { - $llu = wire("modules")->get("LanguageLocalizedURL"); - $langname = $this->page->name; - $lang = ($llu->defaultLang == $langname) ? 'default' : $langname; - $langpage = $this->page; - // get the field name for published language from the modules settings - $publishedfield_name = $llu->publishedPageField ? $llu->publishedPageField : 'language_published'; - // add a selector to find children pages when generating sitemap list - $this->pageselector = "$publishedfield_name=$langpage->id"; - //set user language so the module will spit out language localized urls - $this->user->language = $this->languages->get($lang); - } - // Check for the cached sitemap, else generate and cache a fresh sitemap - $startpagestr = $this->sanitizer->pageName($startpage); - $cache = wire('modules')->get("MarkupCache"); - if(!$output = $cache->get("MarkupSitemapXML$startpagestr$langname", 3600)) { - $output = "\n"; - $output .= ''; - $output .= $this->sitemapListPage(wire('pages')->get($startpage)); - $output .= "\n"; - $cache->save($output); - } - header("Content-Type: text/xml", true, 200); - echo $output; - exit(); - } - - public function sitemapListPage($page) { - $entry = ""; - if ($page->viewable() && ($page->sitemap_ignore == 0 || $page->path == '/')) { // $page->path part added so that it ignores hiding the homepage, else you wouldn't have ANY pages returned - $modified = date ('Y-m-d', $page->modified); - $entry = "\n \n"; - $entry .= " {$page->httpUrl}\n"; - $entry .= " {$modified}\n"; - $entry .= " "; - } - // Fix #12 by FlipZoomMedia | David Karich - $children = $page->children($this->pageselector); - if(count($children)) { - foreach($children as $child) { - $entry .= $this->sitemapListPage($child); - } - } - return $entry; - } - - /** - * Install the module - * - */ - public function ___install() { - - if($this->fields->get('sitemap_ignore')) { - $this->error("You already have a 'sitemap_ignore' field."); - return; - } - - wire('modules')->get('MarkupCache'); - - $field = new Field(); - $field->type = $this->modules->get("FieldtypeCheckbox"); - $field->name = 'sitemap_ignore'; - $field->label = 'Hide page from XML sitemap'; - $field->description = 'Hide this page and its children from the XML sitemap'; - $field->save(); - - $this->message("Added field 'sitemap_ignore'. Add this field to any templates where you want to prevent inclusion in the XML sitemap."); - } - - /** - * Uninstall the module - * - */ - public function ___uninstall() { - // only do the following if you want to uninstall the fields that were installed - // this may be one thing that's safe to leave to the user - $field = wire('fields')->get('sitemap_ignore'); - if($field && $field->numFieldgroups() > 0) - throw new WireException("Can't uninstall because field sitemap_ignore is still being used. Please remove it from any templates."); - wire('fields')->delete($field); - } -} + + * @author Tabea David + * @version 1.1.1 + * @copyright Copyright (c) 2014 Notanotherdotcom, https://www.notanotherdotcom.com/, + * @copyright Copyright (c) 2014 KF Interactive, www.kf-interactive.com, + * @see http://www.processwire.com + * + */ + +/** + * Class MarkupSitemapXML + */ +class MarkupSitemapXML extends WireData implements Module { + + /** + * Provide information about this module to ProcessWire + * + */ + public static function getModuleInfo() { + return array( + 'title' => 'Markup Sitemap XML', + 'summary' => 'Generates an XML sitemap at yoursite.com/sitemap.xml for use with Google Webmaster Tools etc.', + 'href' => 'http://processwire.com/talk/index.php/topic,867.0.html', + 'version' => 111, + 'permanent' => false, + 'autoload' => true, + 'singular' => true, + ); + } + + /** + * Add the hook + * + */ + public function init() { + $this->addHookAfter('ProcessPageEdit::buildFormSettings', $this, 'addIgnoreXML'); + $this->addHookAfter('Pages::save', $this, 'saveIgnoreXML'); + + // Intercept a request for an URL ending in sitemap.xml and output + if (isset($_SERVER['REQUEST_URI']) && strlen($_SERVER['REQUEST_URI']) - strlen('/sitemap.xml') === strrpos($_SERVER['REQUEST_URI'], '/sitemap.xml')) { + $this->addHookBefore("ProcessPageView::pageNotFound",$this,"renderSitemap"); + } + + } + + /** + * Hook adding Checkbox Sitemap XML to form page edit adding Checkbox Sitemap XML to form page edit + * + * @param HookEvent $event + */ + public function addIgnoreXML(HookEvent $event) { + $page = $event->object->getPage(); + + // add checkbox only to user-defined templates, not system templates + if(!($page->template->flags & Template::flagSystem)) { + $fieldset = $event->return; // get the fieldwrapper returned + $checked = (int) $page->sitemap_ignore === 1 ? 'checked' : ''; + $field = $this->modules->get('InputfieldCheckbox'); + $field->attr('id+name', 'sitemap_ignore'); + $field->attr('value', 1); + $field->label = $this->_('Markup Sitemap XML'); + $field->description = $this->_('Check the box to confirm that you want to hide this page and its children from the XML sitemap'); + $field->label2 = $this->_('Hide from XML sitemap'); + $field->collapsed = Inputfield::collapsedYes; + $field->attr('checked', $checked); + $fieldset->add($field); + } + } + + /** + * Hook save Checkbox Sitemap XML + * + * @param HookEvent $event + */ + public function saveIgnoreXML($event) { + $page = $event->arguments[0]; + $database = $this->wire('database'); + $query = $database->prepare("UPDATE pages SET sitemap_ignore=:page_sitemap_ignore WHERE id=:page_id"); + $query->bindValue(":page_sitemap_ignore", (int) $page->sitemap_ignore, PDO::PARAM_INT); + $query->bindValue(":page_id", (int) $page->id, PDO::PARAM_INT); + + try { + $query->execute(); + } catch(Exception $e) { + $errorCode = $e->getCode(); + $this->message("Could not remove column 'sitemap_ignore'. Errorcode: $errorCode"); + } + } + + /** + * Hook render Sitemap + * + * @param HookEvent $event + */ + public function renderSitemap(HookEvent $event){ + //$event->replace = true; + $lang = ''; + $this->pageselector = ''; + $langname = ''; // for LanguageLocalizedUrl + $subdomain = ''; // for Multisite + + // set startpage according to request (sitemap.xml spec says that sitemap + // should only contain pages below it's root page in page tree) + // Fix #11 by FlipZoomMedia | David Karich + $startpage = str_ireplace(trim(wire('config')->urls->root, '/'), '', $this->sanitizer->path(dirname($_SERVER['REQUEST_URI']))); + + // Multisite requires minor URL-related tweak + if (wire("modules")->isInstalled("Multisite")) { + $multisite = wire("modules")->get("Multisite"); + if ($multisite->subdomain) { + $startpage = "/".$multisite->subdomain.$startpage; + } + } + + // make sure that page used as root for sitemap actually exists + if ($this->pages->get($startpage) instanceof NullPage) return; + + // support for LanguageLocalizedURL language module + if(wire("modules")->isInstalled("LanguageLocalizedURL")) { + $llu = wire("modules")->get("LanguageLocalizedURL"); + $langname = $this->page->name; + $lang = ($llu->defaultLang == $langname) ? 'default' : $langname; + $langpage = $this->page; + // get the field name for published language from the modules settings + $publishedfield_name = $llu->publishedPageField ? $llu->publishedPageField : 'language_published'; + // add a selector to find children pages when generating sitemap list + $this->pageselector = "$publishedfield_name=$langpage->id"; + //set user language so the module will spit out language localized urls + $this->user->language = $this->languages->get($lang); + } + // Check for the cached sitemap, else generate and cache a fresh sitemap + $startpagestr = $this->sanitizer->pageName($startpage); + $cache = wire('modules')->get("MarkupCache"); + if(!$output = $cache->get("MarkupSitemapXML$startpagestr$langname", 3600)) { + $output = "\n"; + $output .= ''; + $output .= $this->sitemapListPage(wire('pages')->get($startpage)); + $output .= "\n"; + $cache->save($output); + } + header("Content-Type: text/xml", true, 200); + echo $output; + exit(); + } + + /** + * sitemapListPage + * + * @param Page $page + */ + public function sitemapListPage($page) { + $entry = ""; + if ($page->viewable() && ($page->sitemap_ignore == 0 || $page->path == '/')) { // $page->path part added so that it ignores hiding the homepage, else you wouldn't have ANY pages returned + $modified = date ('Y-m-d', $page->modified); + $entry = "\n \n"; + $entry .= " {$page->httpUrl}\n"; + $entry .= " {$modified}\n"; + $entry .= " "; + } + // Fix #12 by FlipZoomMedia | David Karich + $children = $page->children($this->pageselector); + if(count($children)) { + foreach($children as $child) { + $entry .= $this->sitemapListPage($child); + } + } + return $entry; + } + + /** + * Install the module + * + */ + public function ___install() { + $database = $this->wire('database'); + $newField = "sitemap_ignore int(11) unsigned NOT NULL DEFAULT '0'"; + $query = $database->prepare("ALTER TABLE pages ADD $newField"); + + try { + $query->execute(); + $this->message("Added column 'sitemap_ignore'. Check this field in page settings where you want to prevent inclusion in the XML sitempap"); + } catch(Exception $e) { + $errorCode = $e->getCode(); + $this->message("Could not add column 'sitemap_ignore'."); + } + } + + /** + * Uninstall the module + * + */ + public function ___uninstall() { + $database = $this->wire('database'); + $query = $database->prepare("ALTER TABLE pages DROP COLUMN sitemap_ignore"); + + try { + $query->execute(); + $this->message("Removed successfully column 'sitemap_ignore'."); + } catch(Exception $e) { + $errorCode = $e->getCode(); + $this->message("Could not remove column 'sitemap_ignore'."); + } + + } +} diff --git a/README.md b/README.md index 4282694..19f5066 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,62 @@ MarkupSitemapXML ================ -A module for ProcessWire that generates a sitemap.xml file for use with major search engines. +## Overview: + +A module for ProcessWire that generates a sitemap.xml file for use with major search engines. + +The module ignores any hidden pages and their children, assuming that since you don't want these to be visible on the site then you don't want them to be found via search engines either. + +Designed for use with [ProcessWire](http://processwire.com) version 2.5 + +## Installation + +1. Clone the module and place MarkupSitemapXML in your site/modules/ directory. + ``` + git clone https://github.com/Notanotherdotcom/MarkupSitemapXML your/path/site/modules/MarkupSitemapXML + ``` + +2. Login to ProcessWire admin and click Modules. +3. Click "Check for new modules". +4. Click "install" next to the new MarkupSitemapXML module. +5. That's all - no settings are required but possible. + + +## Usage + +The module also adds a field called **sitemap_ignore** to exclude specific pages on a per-page basis. Again, this assumes that you wish to ignore that page's children as well. + +![sitemap_ignore](https://github.com/justonestep/processwire-markupsitemapxml/blob/feature/global-sitemap_ignore/screens/settings.png) + +The sitemap is accessible at yoursite.com/sitemap.xml. + +![sitemap.xml](https://github.com/justonestep/processwire-markupsitemapxml/blob/feature/global-sitemap_ignore/screens/sitemap.png) + +## What is left to do + +### robots.txt + +You can specify the location of the Sitemap using a robots.txt file. To do this, simply add the following line including the full URL to the sitemap: + +``` +Sitemap: [your sitemap web address] +Sitemap: http://www.example.com/sitemap.xml +``` +### Submitting to Google and Bing + +#### Ping Google Bot + +Type following URL in address bar of your browser: + +``` +http://www.google.com/webmasters/sitemaps/ping?sitemap=[your sitemap web address] +http://www.google.com/webmasters/sitemaps/ping?sitemap=http://www.example.com/sitemap-file.xml +``` +#### Ping Bing Bot + +Type following URL in address bar of your browser: + +``` +http://www.bing.com/webmaster/ping.aspx?siteMap=[your sitemap web address] +http://www.bing.com/webmaster/ping.aspx?siteMap=sitemap=http://www.example.com/sitemap-file.xml +``` \ No newline at end of file diff --git a/screens/settings.png b/screens/settings.png new file mode 100644 index 0000000..a6e6955 Binary files /dev/null and b/screens/settings.png differ diff --git a/screens/sitemap.png b/screens/sitemap.png new file mode 100644 index 0000000..d90504a Binary files /dev/null and b/screens/sitemap.png differ