Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 17 additions & 12 deletions pelican/plugins/seo/seo_enhancer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import os

from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, NavigableString

from .html_enhancer import HTMLEnhancer
from .robots_file_creator import RobotsFileCreator
Expand Down Expand Up @@ -83,17 +83,29 @@ def generate_robots(self, rules, output_path, sitemap_url=None):

logger.info("SEO plugin - SEO Enhancement: robots.txt file created")

@staticmethod
def _add_meta_tag(soup, attr_name, prefix, name, content):
tag = soup.new_tag(
name="meta",
attrs={attr_name: prefix + ":" + name, "content": content},
)
soup.head.append(tag)
soup.head.append(NavigableString("\n"))

def add_html_to_file(self, enhancements, path):
"""Open HTML file, add enhancements with bs4 and create the new HTML files."""

with open(path, encoding="utf8") as html_file:
html_content = html_file.read()
soup = BeautifulSoup(html_content, features="html.parser")
soup = BeautifulSoup(
html_content, features="html.parser", preserve_whitespace_tags={"html"}
)

canonical_tag = soup.new_tag(
"link", rel="canonical", href=enhancements.get("canonical_tag")
)
soup.head.append(canonical_tag)
soup.head.append(NavigableString("\n"))

schemas = [e for e in enhancements if e.endswith("_schema")]
for schema in schemas:
Expand All @@ -102,23 +114,16 @@ def add_html_to_file(self, enhancements, path):
# Google valids schema only with double quotes
schema_script.append(json.dumps(enhancements[schema], ensure_ascii=False))
soup.head.append(schema_script)
soup.head.append(NavigableString("\n"))

# Let's add first Twitter Cards tags in the HTML if feature is enabled
if "twitter_cards" in enhancements:
for tw_property, tw_content in enhancements["twitter_cards"].items():
twitter_cards_tag = soup.new_tag(
name="meta",
attrs={"name": "twitter:" + tw_property, "content": tw_content},
)
soup.head.append(twitter_cards_tag)
self._add_meta_tag(soup, "name", "twitter", tw_property, tw_content)

if "open_graph" in enhancements:
for og_property, og_content in enhancements["open_graph"].items():
open_graph_tag = soup.new_tag(
name="meta",
attrs={"property": "og:" + og_property, "content": og_content},
)
soup.head.append(open_graph_tag)
self._add_meta_tag(soup, "property", "og", og_property, og_content)

with open(path, "w", encoding="utf8") as html_file:
html_file.write(str(soup))
Expand Down
124 changes: 56 additions & 68 deletions pelican/plugins/seo/tests/test_seo_enhancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,25 +117,21 @@ def test_add_html_enhancements_to_file(self, fake_article, fake_seo_enhancer):
assert (
fake_html_content
== """<html>
<head>
<title>Fake Title</title>
<meta content="Fake description" name="description"/>
<link href="https://www.fakesite.com/fake-title.html" rel="canonical"/>\
<script type="application/ld+json">\
{"@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{"@type": "ListItem", "position": 1, "name": "Fake Site Name", "item": "https://www.fakesite.com"}, {"@type": "ListItem", "position": 2, "name": "Fake_file", "item": "https://www.fakesite.com/fake_file.html"}]}\
</script>\
<script type="application/ld+json">\
{"@context": "https://schema.org", "@type": "Article", "author": {"@type": "Person", "name": "Fake author"}, "publisher": {"@type": "Organization", "name": "Fake Site Name", "logo": {"@type": "ImageObject", "url": "https://www.fakesite.com/fake-logo.jpg"}}, "headline": "Fake Title", "about": "Fake category", "datePublished": "2019-04-03 23:49"}\
</script>\
<head>
<title>Fake Title</title>
<meta content="Fake description" name="description"/>
<link href="https://www.fakesite.com/fake-title.html" rel="canonical"/>
<script type="application/ld+json">{"@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{"@type": "ListItem", "position": 1, "name": "Fake Site Name", "item": "https://www.fakesite.com"}, {"@type": "ListItem", "position": 2, "name": "Fake_file", "item": "https://www.fakesite.com/fake_file.html"}]}</script>
<script type="application/ld+json">{"@context": "https://schema.org", "@type": "Article", "author": {"@type": "Person", "name": "Fake author"}, "publisher": {"@type": "Organization", "name": "Fake Site Name", "logo": {"@type": "ImageObject", "url": "https://www.fakesite.com/fake-logo.jpg"}}, "headline": "Fake Title", "about": "Fake category", "datePublished": "2019-04-03 23:49"}</script>
</head>
<body>
<h1>Fake content title</h1>
<p>Fake content 🙃</p>
<a href="https://www.fakesite.com">Fake internal link</a>
<p>Fake content with <code>inline code</code></p>
<p>Fake content with "<a href="https://www.fakesite.com">Fake inline internal link</a>"</p>
</body>
</html>"""
<body>
<h1>Fake content title</h1>
<p>Fake content 🙃</p>
<a href="https://www.fakesite.com">Fake internal link</a>
<p>Fake content with <code>inline code</code></p>
<p>Fake content with "<a href="https://www.fakesite.com">Fake inline internal link</a>"</p>
</body>
</html>"""
)

@pytest.mark.parametrize(
Expand Down Expand Up @@ -231,31 +227,27 @@ def test_add_html_enhancements_to_file_with_open_graph(
assert (
fake_html_content
== """<html>
<head>
<title>Fake Title</title>
<meta content="Fake description" name="description"/>
<link href="https://www.fakesite.com/fake-title.html" rel="canonical"/>\
<script type="application/ld+json">\
{"@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{"@type": "ListItem", "position": 1, "name": "Fake Site Name", "item": "https://www.fakesite.com"}, {"@type": "ListItem", "position": 2, "name": "Fake_file", "item": "https://www.fakesite.com/fake_file.html"}]}\
</script>\
<script type="application/ld+json">\
{"@context": "https://schema.org", "@type": "Article", "author": {"@type": "Person", "name": "Fake author"}, "publisher": {"@type": "Organization", "name": "Fake Site Name", "logo": {"@type": "ImageObject", "url": "https://www.fakesite.com/fake-logo.jpg"}}, "headline": "Fake Title", "about": "Fake category", "datePublished": "2019-04-03 23:49"}\
</script>\
<meta content="https://www.fakesite.com/fake-title.html" property="og:url"/>\
<meta content="website" property="og:type"/>\
<meta content="OG Title" property="og:title"/>\
<meta content="OG Description" property="og:description"/>\
<meta content="https://www.fakesite.com/og-image.jpg" property="og:image"/>\
<meta content="fr_FR" property="og:locale"/>\
<head>
<title>Fake Title</title>
<meta content="Fake description" name="description"/>
<link href="https://www.fakesite.com/fake-title.html" rel="canonical"/>
<script type="application/ld+json">{"@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{"@type": "ListItem", "position": 1, "name": "Fake Site Name", "item": "https://www.fakesite.com"}, {"@type": "ListItem", "position": 2, "name": "Fake_file", "item": "https://www.fakesite.com/fake_file.html"}]}</script>
<script type="application/ld+json">{"@context": "https://schema.org", "@type": "Article", "author": {"@type": "Person", "name": "Fake author"}, "publisher": {"@type": "Organization", "name": "Fake Site Name", "logo": {"@type": "ImageObject", "url": "https://www.fakesite.com/fake-logo.jpg"}}, "headline": "Fake Title", "about": "Fake category", "datePublished": "2019-04-03 23:49"}</script>
<meta content="https://www.fakesite.com/fake-title.html" property="og:url"/>
<meta content="website" property="og:type"/>
<meta content="OG Title" property="og:title"/>
<meta content="OG Description" property="og:description"/>
<meta content="https://www.fakesite.com/og-image.jpg" property="og:image"/>
<meta content="fr_FR" property="og:locale"/>
</head>
<body>
<h1>Fake content title</h1>
<p>Fake content 🙃</p>
<a href="https://www.fakesite.com">Fake internal link</a>
<p>Fake content with <code>inline code</code></p>
<p>Fake content with "<a href="https://www.fakesite.com">Fake inline internal link</a>"</p>
</body>
</html>"""
<body>
<h1>Fake content title</h1>
<p>Fake content 🙃</p>
<a href="https://www.fakesite.com">Fake internal link</a>
<p>Fake content with <code>inline code</code></p>
<p>Fake content with "<a href="https://www.fakesite.com">Fake inline internal link</a>"</p>
</body>
</html>"""
)

def test_add_html_enhancements_to_file_with_twitter_cards(
Expand Down Expand Up @@ -295,31 +287,27 @@ def test_add_html_enhancements_to_file_with_twitter_cards(
assert (
fake_html_content
== """<html>
<head>
<title>Fake Title</title>
<meta content="Fake description" name="description"/>
<link href="https://www.fakesite.com/fake-title.html" rel="canonical"/>\
<script type="application/ld+json">\
{"@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{"@type": "ListItem", "position": 1, "name": "Fake Site Name", "item": "https://www.fakesite.com"}, {"@type": "ListItem", "position": 2, "name": "Fake_file", "item": "https://www.fakesite.com/fake_file.html"}]}\
</script>\
<script type="application/ld+json">\
{"@context": "https://schema.org", "@type": "Article", "author": {"@type": "Person", "name": "Fake author"}, "publisher": {"@type": "Organization", "name": "Fake Site Name", "logo": {"@type": "ImageObject", "url": "https://www.fakesite.com/fake-logo.jpg"}}, "headline": "Fake Title", "about": "Fake category", "datePublished": "2019-04-03 23:49"}\
</script>\
<meta content="summary" name="twitter:card"/>\
<meta content="@TestTWCards" name="twitter:site"/>\
<meta content="https://www.fakesite.com/fake-title.html" property="og:url"/>\
<meta content="website" property="og:type"/>\
<meta content="OG Title" property="og:title"/>\
<meta content="OG Description" property="og:description"/>\
<meta content="https://www.fakesite.com/og-image.jpg" property="og:image"/>\
<meta content="fr_FR" property="og:locale"/>\
<head>
<title>Fake Title</title>
<meta content="Fake description" name="description"/>
<link href="https://www.fakesite.com/fake-title.html" rel="canonical"/>
<script type="application/ld+json">{"@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [{"@type": "ListItem", "position": 1, "name": "Fake Site Name", "item": "https://www.fakesite.com"}, {"@type": "ListItem", "position": 2, "name": "Fake_file", "item": "https://www.fakesite.com/fake_file.html"}]}</script>
<script type="application/ld+json">{"@context": "https://schema.org", "@type": "Article", "author": {"@type": "Person", "name": "Fake author"}, "publisher": {"@type": "Organization", "name": "Fake Site Name", "logo": {"@type": "ImageObject", "url": "https://www.fakesite.com/fake-logo.jpg"}}, "headline": "Fake Title", "about": "Fake category", "datePublished": "2019-04-03 23:49"}</script>
<meta content="summary" name="twitter:card"/>
<meta content="@TestTWCards" name="twitter:site"/>
<meta content="https://www.fakesite.com/fake-title.html" property="og:url"/>
<meta content="website" property="og:type"/>
<meta content="OG Title" property="og:title"/>
<meta content="OG Description" property="og:description"/>
<meta content="https://www.fakesite.com/og-image.jpg" property="og:image"/>
<meta content="fr_FR" property="og:locale"/>
</head>
<body>
<h1>Fake content title</h1>
<p>Fake content 🙃</p>
<a href="https://www.fakesite.com">Fake internal link</a>
<p>Fake content with <code>inline code</code></p>
<p>Fake content with "<a href="https://www.fakesite.com">Fake inline internal link</a>"</p>
</body>
</html>"""
<body>
<h1>Fake content title</h1>
<p>Fake content 🙃</p>
<a href="https://www.fakesite.com">Fake internal link</a>
<p>Fake content with <code>inline code</code></p>
<p>Fake content with "<a href="https://www.fakesite.com">Fake inline internal link</a>"</p>
</body>
</html>"""
)