From cee187f3feaadc3789a08a77b1a8b6c9b67b3f8c Mon Sep 17 00:00:00 2001 From: Max Pfeiffer Date: Thu, 20 Mar 2025 22:38:56 +0100 Subject: [PATCH 1/4] Fixed sitemap --- pelican/plugins/seo/seo.py | 2 ++ pelican/plugins/seo/seo_enhancer/__init__.py | 4 +++- pelican/plugins/seo/settings.py | 1 + .../plugins/seo/tests/test_run_robots_file.py | 24 +++++++++++++++++++ .../plugins/seo/tests/test_seo_enhancer.py | 19 +++++++++++++++ pelican/plugins/seo/tests/test_settings.py | 4 ++-- 6 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 pelican/plugins/seo/tests/test_run_robots_file.py diff --git a/pelican/plugins/seo/seo.py b/pelican/plugins/seo/seo.py index 42b3f6d..42d401e 100644 --- a/pelican/plugins/seo/seo.py +++ b/pelican/plugins/seo/seo.py @@ -52,6 +52,7 @@ def get_plugin_settings(context): - SEO_ENHANCER - SEO_ENHANCER_OPEN_GRAPH - SEO_ENHANCER_TWITTER_CARDS + - SEO_ENHANCER_SITEMAP_URL :return: Dictionary of settings for the plugin :rtype: @@ -130,6 +131,7 @@ def run_robots_file(generators): seo_enhancer.generate_robots( rules=robots_rules, output_path=output_path, + sitemap_url=plugin_settings["SEO_ENHANCER_SITEMAP_URL"], ) diff --git a/pelican/plugins/seo/seo_enhancer/__init__.py b/pelican/plugins/seo/seo_enhancer/__init__.py index 98b547c..87e3c28 100644 --- a/pelican/plugins/seo/seo_enhancer/__init__.py +++ b/pelican/plugins/seo/seo_enhancer/__init__.py @@ -64,7 +64,7 @@ def populate_robots(self, document): "disallow": robots_file.get_disallow, } - def generate_robots(self, rules, output_path): + def generate_robots(self, rules, output_path, sitemap_url=None): """Create robots.txt, with noindex and disallow rules for each document URL.""" if not os.path.isdir(output_path): os.mkdir(output_path) @@ -78,6 +78,8 @@ def generate_robots(self, rules, output_path): robots_file.write("\n" + "Noindex: " + rule.get("document_url")) if rule.get("disallow"): robots_file.write("\n" + "Disallow: " + rule.get("document_url")) + if sitemap_url: + robots_file.write("\n" + "Sitemap: " + sitemap_url) logger.info("SEO plugin - SEO Enhancement: robots.txt file created") diff --git a/pelican/plugins/seo/settings.py b/pelican/plugins/seo/settings.py index d1d7c0b..ede8d12 100644 --- a/pelican/plugins/seo/settings.py +++ b/pelican/plugins/seo/settings.py @@ -4,6 +4,7 @@ SEO_ENHANCER = False SEO_ENHANCER_OPEN_GRAPH = False SEO_ENHANCER_TWITTER_CARDS = False +SEO_ENHANCER_SITEMAP_URL = None SEO_ARTICLES_LIMIT = 10 SEO_PAGES_LIMIT = 10 diff --git a/pelican/plugins/seo/tests/test_run_robots_file.py b/pelican/plugins/seo/tests/test_run_robots_file.py new file mode 100644 index 0000000..b908424 --- /dev/null +++ b/pelican/plugins/seo/tests/test_run_robots_file.py @@ -0,0 +1,24 @@ +from seo.seo import run_robots_file +import pytest +from unittest.mock import patch, call, MagicMock + + +def test_run_robots_file(): + class FakeGenerator: + context = { + "SEO_ENHANCER": True, + "SEO_ENHANCER_SITEMAP_URL": "https://www.example.com/sitemap.xml", + } + output_path = "foo" + + with patch("seo.seo.SEOEnhancer") as patched_seo_enhancer: + run_robots_file([FakeGenerator()]) + + assert ( + call().generate_robots( + rules=[], + output_path="foo", + sitemap_url="https://www.example.com/sitemap.xml", + ) + in patched_seo_enhancer.mock_calls + ) diff --git a/pelican/plugins/seo/tests/test_seo_enhancer.py b/pelican/plugins/seo/tests/test_seo_enhancer.py index 316e5bd..21f8dda 100644 --- a/pelican/plugins/seo/tests/test_seo_enhancer.py +++ b/pelican/plugins/seo/tests/test_seo_enhancer.py @@ -1,5 +1,7 @@ """Units tests for SEO Enhancer.""" +from pathlib import Path +from tempfile import TemporaryDirectory from unittest.mock import mock_open, patch import pytest @@ -42,6 +44,23 @@ def test_generate_robots_file(self, fake_seo_enhancer, fake_robots_rules): fake_rule = args[0] assert "Noindex: fake-title.html" in fake_rule + def test_generate_robots_file_with_sitemap_url( + self, fake_seo_enhancer, fake_robots_rules + ): + """Test if generate_robots create a robots.txt file by mocking open().""" + sitemap_url = "https://www.example.com/sitemap.xml" + + with TemporaryDirectory() as tmp_dir_name: + fake_seo_enhancer.generate_robots( + rules=fake_robots_rules, + output_path=tmp_dir_name, + sitemap_url=sitemap_url, + ) + robots_txt_path = Path(tmp_dir_name) / "robots.txt" + contents = robots_txt_path.read_text() + + assert sitemap_url in contents + @pytest.mark.parametrize("open_graph", (True, False)) def test_launch_html_enhancer_returns_dict( self, fake_article, fake_seo_enhancer, open_graph diff --git a/pelican/plugins/seo/tests/test_settings.py b/pelican/plugins/seo/tests/test_settings.py index 107730c..7fb746a 100644 --- a/pelican/plugins/seo/tests/test_settings.py +++ b/pelican/plugins/seo/tests/test_settings.py @@ -12,7 +12,7 @@ def test_get_settings(): settings = get_plugin_settings(context=pelican_context) assert ( - len(settings) == 8 + len(settings) == 9 ) # 6 in the plugin settings file + 2 from the Pelican context # Let's define a setting in Pelican context that @@ -21,5 +21,5 @@ def test_get_settings(): settings = get_plugin_settings(context={"SEO_REPORT": False}) - assert len(settings) == 6 + assert len(settings) == 7 assert settings["SEO_REPORT"] is False From 63a2397ee88367ab935c6d76dc94016ece048681 Mon Sep 17 00:00:00 2001 From: Max Pfeiffer Date: Fri, 21 Mar 2025 09:43:29 +0100 Subject: [PATCH 2/4] Updated docs explaining the new feature --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 35a1d15..8b87be0 100644 --- a/README.md +++ b/README.md @@ -73,6 +73,13 @@ $ pelican content --verbose Done: Processed 1 articles, 0 drafts, 0 pages, 0 hidden pages and 0 draft pages in 0.17 seconds. ``` +If you want to include a sitemap into your `robots.txt` file which +[Google recommends](https://developers.google.com/search/docs/crawling-indexing/robots/create-robots-txt), you can do +this with this setting: +```python +SEO_ENHANCER_SITEMAP_URL = "https://www.example.com/sitemap.xml" +``` + ### SEO Report The SEO plugin analyzes all your articles and pages and generate an SEO HTML report in your Pelican project root: `seo-report.html` From d07b62854326f46b9095588fdf779b1ef8913afd Mon Sep 17 00:00:00 2001 From: Justin Mayer Date: Fri, 25 Apr 2025 13:11:02 +0200 Subject: [PATCH 3/4] Tweak README & CHANGELOG --- CHANGELOG.md | 2 +- README.md | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79661b1..92fd998 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ CHANGELOG 1.3.0 - 2025-01-15 ------------------ -- Add ability to configure the plugin via the Pelican settings file +Add ability to configure the plugin via the Pelican settings file 1.2.2 - 2021-06-21 ------------------ diff --git a/README.md b/README.md index 8b87be0..bb01d71 100644 --- a/README.md +++ b/README.md @@ -73,9 +73,9 @@ $ pelican content --verbose Done: Processed 1 articles, 0 drafts, 0 pages, 0 hidden pages and 0 draft pages in 0.17 seconds. ``` -If you want to include a sitemap into your `robots.txt` file which -[Google recommends](https://developers.google.com/search/docs/crawling-indexing/robots/create-robots-txt), you can do -this with this setting: +If you want to include a site map in your `robots.txt` file, which +[Google recommends](https://developers.google.com/search/docs/crawling-indexing/robots/create-robots-txt), +you can specify the appropriate URL for your site map XML file via the following setting: ```python SEO_ENHANCER_SITEMAP_URL = "https://www.example.com/sitemap.xml" ``` From 8b08ca1074191365b7cd73c1601a88e3b15a4230 Mon Sep 17 00:00:00 2001 From: Justin Mayer Date: Fri, 25 Apr 2025 13:14:32 +0200 Subject: [PATCH 4/4] Prepare release --- RELEASE.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 RELEASE.md diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 0000000..2ecf95f --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,3 @@ +Release type: minor + +Add setting to add site map to `robots.txt`