Skip to content

Commit c67beee

Browse files
committed
Resolve redirects when testing URLs for equality
The URL DOIs resolve to can move around, with redirects pointing to the new location. To make the tests more robust, only fail if the URLs differ after redirections. See also https://www.crossref.org/blog/urls-and-dois-a-complicated-relationship/
1 parent 0518560 commit c67beee

File tree

1 file changed

+30
-2
lines changed

1 file changed

+30
-2
lines changed

tests/test_doi.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
import os
44
from pkg_resources import parse_version
55

6+
from urllib.request import Request, urlopen
7+
from urllib.parse import urlparse, urlunparse
8+
from warnings import warn
9+
610
import pytest
711

812
from doi import (
@@ -11,6 +15,30 @@
1115
)
1216

1317

18+
def simplify_url(u):
19+
return urlparse(u)._replace(query='', fragment='')
20+
21+
22+
def resolve_redirects(u):
23+
# Unconditionally upgrade to https, since some resolvers seem to require it
24+
# If removed, it'd make sense to canonicalize in simplify_url instead to
25+
# prevent spurious test failures
26+
u = urlunparse(urlparse(u)._replace(scheme='https'))
27+
req = Request(u, headers={'User-Agent': 'Mozilla/5.0'})
28+
with urlopen(req) as r:
29+
return simplify_url(r.url)
30+
31+
32+
def normalize_eq(u, v):
33+
if u == v:
34+
return True
35+
warn(f"{u} textually differs from {v}, please update the relevant case.\n"
36+
"Attempting to recover by resolving redirects")
37+
return (simplify_url(u) == simplify_url(v)
38+
or resolve_redirects(u) == resolve_redirects(v)
39+
)
40+
41+
1442
def test_valid_version() -> None:
1543
"""Check that the package defines a valid __version__"""
1644
assert parse_version(__version__) >= parse_version("0.1.0")
@@ -32,7 +60,7 @@ def test_validate_doi() -> None:
3260
'https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'),
3361
]
3462
for doi, url in data:
35-
assert url == validate_doi(doi)
63+
assert normalize_eq(url, validate_doi(doi))
3664

3765
for doi in ['', 'asdf']:
3866
try:
@@ -49,7 +77,7 @@ def test_get_real_url_from_doi() -> None:
4977
'article/abs/pii/S0009261497040141'),
5078
]
5179
for doi, url in data:
52-
assert url == get_real_url_from_doi(doi)
80+
assert normalize_eq(url, get_real_url_from_doi(doi))
5381

5482

5583
def test_find_doi_in_line() -> None:

0 commit comments

Comments
 (0)