-
Notifications
You must be signed in to change notification settings - Fork 10
Sitemap.xml is generated dynamically #81
base: master
Are you sure you want to change the base?
Changes from all commits
3e65513
48e5d3c
556150e
19679e1
50b6176
5d66bf1
119460b
764ca38
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,2 @@ | ||
|
|
||
| maxCacheDays - Only works when {path} is provided, so we check if there is a sitemap.xml already created in your filesystem. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| import lib from './lib' | ||
|
|
||
| module.exports = lib |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,203 @@ | ||
| import fs from 'fs'; | ||
| import util from 'util'; | ||
| import moment from 'moment'; | ||
|
|
||
| const SitemapCrawler = require('sitemap-generator'); | ||
| const Generator = require('sitemap') | ||
|
|
||
| const CHANGE_FREQ_ALWAYS = 'always' | ||
| const CHANGE_FREQ_HOURLY = 'hourly' | ||
| const CHANGE_FREQ_DAILY = 'daily' | ||
| const CHANGE_FREQ_WEEKLY = 'weekly' | ||
| const CHANGE_FREQ_MONTHLY = 'monthly' | ||
| const CHANGE_FREQ_YEARLY = 'yearly' | ||
| const CHANGE_FREQ_NEVER = 'never' | ||
|
|
||
| const DEFAULT_CHANGE_FREQ = CHANGE_FREQ_MONTHLY | ||
| const DEFAULT_PRIORITY = 0.5 | ||
|
|
||
| const CACHE_TIME = 600000 | ||
|
|
||
| /** | ||
| * Gets all the URLS that a website has accessible by users. | ||
| * | ||
| * @param {*} options | ||
| */ | ||
| const runCrawler = (options) => { | ||
| let urls = []; | ||
| let errors = []; | ||
|
|
||
| const {hostname} = options; | ||
|
|
||
| // NB: We use SitemapCrawler generator to create all the sitemap tree. | ||
| // filepath should be null, so it doesn't generate an xml file. | ||
| const crawlerOptions = { | ||
| stripQuerystring: options.stripQuerystring || true, | ||
| filepath: null, | ||
| } | ||
|
|
||
| const crawler = SitemapCrawler(hostname, crawlerOptions); | ||
|
|
||
| return new Promise((resolve, reject) => { | ||
| crawler.start(); | ||
|
|
||
| crawler.on('add', (url) => { | ||
| urls.push(url) | ||
| }); | ||
|
|
||
| crawler.on('error', (error) => { | ||
| errors.push(error) | ||
| }); | ||
|
|
||
| crawler.on('done', () => { | ||
| resolve([urls, errors]) | ||
| }); | ||
| }) | ||
| } | ||
|
|
||
| const getXmlUrls = (options, generatedUrls = []) => { | ||
| const {rules = [], defaultPriority, defaultChangeFreq} = options; | ||
|
|
||
| const urls = generatedUrls.map((url) => { | ||
| const foundRule = rules.find(({path}) => ( | ||
| RegExp(path,'g').exec(url) !== null | ||
| )) || {} | ||
|
|
||
| // TODO: Once we check the validity of the values and set the correct ones | ||
| // we can remove the last condition || | ||
| return { | ||
| url, | ||
| changefreq: foundRule.changeFreq || defaultChangeFreq || DEFAULT_CHANGE_FREQ, | ||
| priority: foundRule.priority || defaultPriority || DEFAULT_PRIORITY, | ||
| } | ||
| }) | ||
|
|
||
| return urls | ||
| } | ||
|
|
||
| const generateXmlSitemap = (options, generatedUrls = []) => { | ||
| const urls = getXmlUrls(options, generatedUrls) | ||
| const sitemapGenerator = Generator.createSitemap ({ | ||
| cacheTime: CACHE_TIME, // 600 sec - cache purge period | ||
| urls, | ||
| }) | ||
|
|
||
| return new Promise((resolve, reject) => { | ||
| sitemapGenerator.toXML((err, xml) => { | ||
| if (err) { | ||
| return reject(err) | ||
| } | ||
|
|
||
| return resolve(xml) | ||
| }) | ||
| }) | ||
| } | ||
|
|
||
| const createSitemapFile = (path, fileContent) => ( | ||
| new Promise((resolve, reject) => ( | ||
| fs.writeFile(path, fileContent, (err) => { | ||
| if (err) { | ||
| return reject(err); | ||
| } | ||
| return resolve(); | ||
| }) | ||
| )) | ||
| ) | ||
|
|
||
| const shouldReturnCachedFile = (path, maxCacheDays) => ( | ||
| new Promise((resolve, reject) => ( | ||
| fs.stat(path, (err, stats) => { | ||
| if (err) { | ||
| return resolve(false) | ||
| } | ||
|
|
||
| const today = moment(new Date(), 'YYYY-MM-DD') | ||
| const fileModifiedDate = moment(new Date(util.inspect(stats.mtime)), 'YYYY-MM-DD') | ||
| const creationDays = moment.duration(today.diff(fileModifiedDate)).asDays() | ||
|
|
||
| return resolve(parseInt(creationDays) < maxCacheDays) | ||
| }) | ||
| )) | ||
| ) | ||
|
|
||
| const getFileXmlSitemap = (path) => ( | ||
| new Promise((resolve, reject) => ( | ||
| fs.readFile(path, (error, fileContent) => { | ||
| if (error) { | ||
| return reject(error) | ||
| } | ||
|
|
||
| return resolve(fileContent) | ||
| }) | ||
| )) | ||
| ) | ||
|
|
||
| const start = (options) => ( | ||
| // TODO: Check validity of the options object. | ||
| // Example: hostname is provided and required! | ||
|
|
||
| // TODO: Also check if user default values are valid: | ||
| // priority should be [0-1] | ||
| // changeFreq should be one of the constants above | ||
| // otherwise, sets the default values: DEFAULT_CHANGE_FREQ and DEFAULT_PRIORITY | ||
|
|
||
| new Promise(async (resolve, reject) => { | ||
| const {maxCacheDays} = options; | ||
|
|
||
| // TODO: Clean this logic... Probably put it inside a method? | ||
| if (maxCacheDays && maxCacheDays > 0) { | ||
| const {path} = options; | ||
|
|
||
| try { | ||
| const hasCachedFile = await shouldReturnCachedFile(path, maxCacheDays) | ||
|
|
||
| if (hasCachedFile) { | ||
| const fileXmlSitemap = await getFileXmlSitemap(path); | ||
| return resolve(fileXmlSitemap); | ||
| } | ||
|
|
||
| // SKIP Cached version: Generate the file again | ||
| } catch (error) { | ||
| console.log('Trying to get file from cached failed...') | ||
| console.log('error', error) | ||
| // SKIP Cached version: Generate the file again | ||
| } | ||
| } | ||
|
|
||
| // Generate file... | ||
| let urls; | ||
| let errors; | ||
|
|
||
| try { | ||
| [urls, errors] = await runCrawler(options) | ||
| } catch (error) { | ||
| return reject(error); | ||
| } | ||
|
|
||
| // TODO: Do something with the errors??? | ||
|
|
||
| try { | ||
| const generatedXmlSitemap = await generateXmlSitemap(options, urls); | ||
| const {path} = options; | ||
|
|
||
| if (path) { | ||
| // NB: If it returns an error, we don't care. | ||
| await createSitemapFile(path, generatedXmlSitemap); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: createCacheFile() without waiting for it |
||
|
|
||
| return resolve(generatedXmlSitemap); | ||
| } else { | ||
| return resolve(generatedXmlSitemap); | ||
| } | ||
| } | ||
| catch (error) { | ||
| console.log(error) | ||
| return reject(error); | ||
| } | ||
| }) | ||
| ) | ||
|
|
||
| const SitemapGenerator = { | ||
| start, | ||
| } | ||
|
|
||
| module.exports = SitemapGenerator | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where are the tests at? 😂 |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,23 @@ | ||
| { | ||
| "name": "fancy-sitemap", | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should include a main, that way it doesn't have to guess where the entry file is.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. "main": "./lib" |
||
| "description": "PUT A FANCY DESCRIPTION", | ||
| "homepage": "http://www.github.com/ferreiro/fancy-sitemap", | ||
| "version": "0.0.9", | ||
| "private": false, | ||
| "license": "MIT", | ||
| "keywords": [], | ||
| "author": { | ||
| "name": "Jorge Ferreiro", | ||
| "email": "jorge@ferreiro.me", | ||
| "url": "https://www.ferreiro.me/" | ||
| }, | ||
| "scripts": { | ||
| "start": "" | ||
| }, | ||
| "dependencies": { | ||
| "lodash": "4.17.11", | ||
| "sitemap": "2.1.0", | ||
| "sitemap-generator": "8.3.3" | ||
| }, | ||
| "devDependencies": {} | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,6 +7,15 @@ const helmet = require('helmet') | |
| const session = require('express-session') | ||
| const express = require('express') | ||
| const compression = require('compression') | ||
| const rateLimit = require('express-rate-limit'); | ||
|
|
||
| const crawlersRateLimit = rateLimit({ | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a real concern? |
||
| windowMs: 15 * 60 * 1000, // 15 minutes | ||
| max: 15 // limit each IP to 15 requests per windowMs | ||
| }); | ||
|
|
||
| // TODO: Move to it's own repository and deploy... | ||
| const sitemapGenerator = require('../../fancy-sitemap/index') | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are in a workspace and it is defined as a package. Let the workspaces handle resolution. |
||
|
|
||
| const env = require('../env') | ||
|
|
||
|
|
@@ -18,11 +27,56 @@ module.exports = (app) => { | |
| app.use(compression()) | ||
|
|
||
| // Search engines | ||
| app.get('/robots.txt', (req, res) => { | ||
| app.get('/robots.txt', crawlersRateLimit, (req, res) => { | ||
| res.sendFile(path.join(__dirname + '/../robots.txt')) | ||
| }) | ||
| app.get('/sitemap.xml', (req, res) => { | ||
| res.sendFile(path.join(__dirname + '/../sitemap.xml')) | ||
|
|
||
| app.get('/sitemap.xml', crawlersRateLimit, (req, res) => { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it possible instead to generate this at build time and deploy it as part of the bundle? Then you don't need to continuously regenerate it. The reason to not do this if there is a way to dynamically change/add urls. If that is the case another option could be to create a webhook that generates the file as a result of an action(create post, delete post)
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think what you have is great and will work, but may be more performant to run it less. |
||
| // TODO: Move the sitemap configuration to it's own file... | ||
| sitemapGenerator | ||
| .start({ | ||
| hostname: process.env.NODE_ENV === 'DEV' ? 'localhost:3000' : 'https://www.ferreiro.me', | ||
| path: 'sitemap.xml', | ||
| maxCacheDays: env.MAX_CACHE_DAYS_SITEMAP || 0, | ||
| stripQuerystring: true, | ||
| filepath: 'sitemap.xml', | ||
| defaultPriority: 0.8, | ||
| defaultChangeFreq: 'yearly', | ||
| rules: [ | ||
| { | ||
| path: '/$', | ||
| changeFreq: 'monthly', | ||
| priority: 1, | ||
| }, | ||
| { | ||
| path: '/about$', | ||
| changeFreq: 'monthly', | ||
| priority: 1, | ||
| }, | ||
| { | ||
| path: '/blog$', | ||
| changeFreq: 'weekly', | ||
| priority: 1, | ||
| }, | ||
| { | ||
| path: '/blog/*', | ||
| changeFreq: 'monthly', | ||
| priority: 0.9, | ||
| }, | ||
| { | ||
| path: '/talks$', | ||
| changeFreq: 'monthly', | ||
| priority: 1, | ||
| }, | ||
| ], | ||
| }) | ||
| .then((sitemap) => { | ||
| res.header('Content-Type', 'application/xml'); | ||
| return res.send(sitemap) | ||
| }) | ||
| .catch((error) => ( | ||
| res.status(500).send(error).end() | ||
| )) | ||
| }) | ||
|
|
||
| // Serve static bower: http://goo.gl/e2nTBf | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
TODO: Use Joi or other JSON schema validation to check it. https://github.com/hapijs/joi