From 24d0d29fe494ea998c1473c54d55a02f8c5fd597 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sun, 9 Jun 2019 22:57:12 +0200 Subject: [PATCH 01/28] Change urls to point papis --- CONTRIBUTING.rst | 8 ++++---- README.rst | 12 ++++++------ docs/conf.py | 4 ++-- setup.py | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 1c1de3d..4886204 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -13,7 +13,7 @@ Types of Contributions Report Bugs ~~~~~~~~~~~ -Report bugs at https://github.com/alejandrogallo/doi/issues. +Report bugs at https://github.com/papis/python-doi/issues. If you are reporting a bug, please include: @@ -37,7 +37,7 @@ articles, and such. Submit Feedback ~~~~~~~~~~~~~~~ -The best way to send feedback is to file an issue at https://github.com/alejandrogallo/doi/issues. +The best way to send feedback is to file an issue at https://github.com/papis/python-doi/issues. If you are proposing a feature: @@ -51,7 +51,7 @@ Get Started! In short, -1. Clone the repository from ``git@github.com:alejandrogallo/doi.git`` +1. Clone the repository from ``git@github.com:papis/python-doi.git`` 2. Fork the repo on GitHub to your personal account. 3. Add your fork as a remote. 4. Pull in the latest changes from the master branch. @@ -77,6 +77,6 @@ Before you submit a pull request, check that it meets these guidelines: 2. If the pull request adds functionality, the docs should be updated. Put your new functionality into a function with a docstring, and add the feature to the list in README.rst. -3. Check https://travis-ci.org/alejandrogallo/doi/pull_requests +3. Check https://travis-ci.org/papis/python-doi/pull_requests and make sure that the tests pass for all supported Python versions. diff --git a/README.rst b/README.rst index ed5ede0..29d5e27 100644 --- a/README.rst +++ b/README.rst @@ -5,12 +5,12 @@ python-doi :alt: python-doi on the Python Package Index :target: https://pypi.python.org/pypi/python-doi -.. image:: https://img.shields.io/travis/alejandrogallo/python-doi.svg +.. image:: https://img.shields.io/travis/papis/python-doi.svg :alt: Travis Continuous Integration - :target: https://travis-ci.org/alejandrogallo/python-doi -.. image:: https://coveralls.io/repos/github/alejandrogallo/python-doi/badge.svg?branch=master + :target: https://travis-ci.org/papis/python-doi +.. image:: https://coveralls.io/repos/github/papis/python-doi/badge.svg?branch=master :alt: Coveralls - :target: https://coveralls.io/github/alejandrogallo/python-doi?branch=master + :target: https://coveralls.io/github/papis/python-doi?branch=master .. image:: https://readthedocs.org/projects/python-doi/badge/?version=latest :alt: Documentation Status :target: https://python-doi.readthedocs.io/en/latest/?badge=latest @@ -44,9 +44,9 @@ To install the latest development version of python-doi from `Github`_. .. code-block:: console - $ pip install git+https://github.com/alejandrogallo/python-doi.git@master#egg=doi + $ pip install git+https://github.com/papis/python-doi.git@master#egg=doi -.. _Github: https://github.com/alejandrogallo/python-doi +.. _Github: https://github.com/papis/python-doi Usage ----- diff --git a/docs/conf.py b/docs/conf.py index a8f8866..c11f786 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -66,8 +66,8 @@ pygments_style = 'sphinx' extlinks = { - 'issue': ('https://github.com/alejandrogallo/doi/issues/%s', '#'), - 'pr': ('https://github.com/alejandrogallo/doi/pull/%s', 'PR #'), + 'issue': ('https://github.com/papis/python-doi/issues/%s', '#'), + 'pr': ('https://github.com/papis/python-doi/pull/%s', 'PR #'), } # autodoc settings diff --git a/setup.py b/setup.py index 06915f0..ff79b45 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ def get_version(filename): name='python-doi', packages=find_packages(where="src"), package_dir={"": "src"}, - url='https://github.com/alejandrogallo/doi', + url='https://github.com/papis/python-doi', version=version, zip_safe=False, ) From a261ad880b5e8a4bf216e8e62e79a5d3783cc6d8 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sun, 9 Jun 2019 22:57:48 +0200 Subject: [PATCH 02/28] Version 0.1.1 --- src/doi/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doi/__init__.py b/src/doi/__init__.py index 24b69b8..84d076d 100644 --- a/src/doi/__init__.py +++ b/src/doi/__init__.py @@ -2,7 +2,7 @@ import logging -__version__ = '0.1.0' +__version__ = '0.1.1' logger = logging.getLogger("doi") From e61c84fa8da8304ab4c67d0affc37e1fd62d32e8 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sun, 9 Jun 2019 23:07:08 +0200 Subject: [PATCH 03/28] Fix elsevier test --- tests/test_doi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_doi.py b/tests/test_doi.py index be2acce..bea393a 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -26,7 +26,7 @@ def test_validate_doi(): 'https://www.tandfonline.com/doi/full/10.1080/14786442408634457'), ('10.1021/jp003647e', 'https://pubs.acs.org/doi/10.1021/jp003647e'), ('10.1016/S0009-2614(97)04014-1', - 'http://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'), + 'https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'), ] for doi, url in data: assert(url == validate_doi(doi)) From 32f0e5ee8d2b8d04e6f8004ca0c94eadd5e34a5d Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Tue, 3 Dec 2019 23:32:21 -0600 Subject: [PATCH 04/28] clean up and update docs. * make DOI uppercase always. * add docs to `get_real_url_from_doi`. * remove types (will add back with typing) * added API to generated docs. --- docs/api.rst | 5 +++ docs/conf.py | 1 + docs/index.rst | 3 +- setup.py | 6 ++-- src/doi/__init__.py | 75 ++++++++++++++++++++++----------------------- 5 files changed, 48 insertions(+), 42 deletions(-) create mode 100644 docs/api.rst diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 0000000..3266019 --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,5 @@ +API +=== + +.. automodule:: doi + :members: diff --git a/docs/conf.py b/docs/conf.py index c11f786..e9c303f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -40,6 +40,7 @@ 'sphinx.ext.todo', 'dollarmath', 'sphinx.ext.inheritance_diagram', + 'sphinx_autodoc_typehints', ] if os.getenv('SPELLCHECK'): extensions += 'sphinxcontrib.spelling', diff --git a/docs/index.rst b/docs/index.rst index 813567c..d5b346c 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,8 +8,7 @@ Welcome to python-doi's documentation! readme contributing authors - API -=== + api .. toctree:: :maxdepth: 1 diff --git a/setup.py b/setup.py index ff79b45..ea038a2 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,8 @@ def get_version(filename): dev_requirements = [ 'coverage', 'pytest', 'pytest-cov==2.5.0', 'twine', 'pep8', - 'flake8', 'wheel', 'sphinx', 'sphinx-autobuild', 'sphinx_rtd_theme'] + 'flake8', 'wheel', + 'sphinx', 'sphinx-autobuild', 'sphinx-autodoc-typehints', 'sphinx_rtd_theme'] version = get_version('./src/doi/__init__.py') @@ -34,9 +35,10 @@ def get_version(filename): 'Intended Audience :: Developers', 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 'Natural Language :: English', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', ], description="Python package to work with Document Object Identifier (doi)", install_requires=requirements, diff --git a/src/doi/__init__.py b/src/doi/__init__.py index 84d076d..62772d4 100644 --- a/src/doi/__init__.py +++ b/src/doi/__init__.py @@ -1,28 +1,26 @@ import re +import sys import logging - __version__ = '0.1.1' - - logger = logging.getLogger("doi") -def pdf_to_doi(filepath, maxlines=float('inf')): - """Try to get doi from a filepath, it looks for a regex in the binary - data and returns the first doi found, in the hopes that this doi +def pdf_to_doi(filepath, maxlines=None): + """Try to get DOI from a filepath. It looks for a regex in the binary + data and returns the first DOI found, in the hopes that this DOI is the correct one. - :param filepath: Path to the pdf file - :type filepath: str + :param filepath: Path to the pdf file. :param maxlines: Maximum number of lines that should be checked - For some documnets, it would spend a long time trying to look for - a doi, and dois in the middle of documents don't tend to be the correct - doi of the document. - :type maxlines: int - :returns: DOI or None - :rtype: str or None + For some documents, it could spend a long time trying to look for + a DOI, and DOIs in the middle of documents don't tend to be the correct + DOI of the document. + :returns: DOI or ``None``. """ + if maxlines is None: + maxlines = sys.maxsize + with open(filepath, 'rb') as fd: for j, line in enumerate(fd): doi = find_doi_in_text(line.decode('ascii', errors='ignore')) @@ -30,38 +28,32 @@ def pdf_to_doi(filepath, maxlines=float('inf')): return doi if j > maxlines: return None - else: - return None + return None def validate_doi(doi): - """We check that the DOI can be resolved by official means. If so, we - return the resolved URL, otherwise, we return None (which means the DOI is - invalid). - - http://www.doi.org/factsheets/DOIProxy.html + """We check that the DOI can be resolved by + `official means `_. If so, we + return the resolved URL, otherwise, we return ``None`` (which means the + DOI is invalid). - :param doi: Doi identificator - :type doi: str - :returns: It returns the url assigned to the doi if everything went right - :rtype: str - - :raises ValueError: Whenever the doi is not valid + :param doi: Identifier. + :returns: The URL assigned to the DOI or ``None``. """ from urllib.error import HTTPError, URLError import urllib.request import urllib.parse import json url = "https://doi.org/api/handles/{doi}".format(doi=doi) - logger.debug('handle url %s' % url) + logger.debug('handle url %s', url) request = urllib.request.Request(url) try: result = json.loads(urllib.request.urlopen(request).read().decode()) if 'values' in result: - url = [v['data']['value'] + urls = [v['data']['value'] for v in result['values'] if v.get('type') == 'URL'] - return url[0] if url else None + return urls[0] if urls else None except HTTPError: raise ValueError('HTTP 404: DOI not found') except URLError as e: @@ -80,11 +72,11 @@ def validate_doi(doi): def get_clean_doi(doi): - """Check if doi is actually a url and in that case just get - the exact doi. + """Check if the DOI is actually a URL and in that case just get + the exact DOI. - :doi: String containing a doi - :returns: The pure doi + :param doi: String containing a DOI. + :returns: The extracted DOI. """ doi = re.sub(r'%2F', '/', doi) # For pdfs @@ -96,8 +88,10 @@ def get_clean_doi(doi): def find_doi_in_text(text): - """ - Try to find a doi in a text + """Try to find a DOI in a text. + + :param text: Text in which to look for DOI. + :returns: A DOI, if found, otherwise ``None``. """ text = get_clean_doi(text) forbidden_doi_characters = r'"\s%$^\'<>@,;:#?&' @@ -126,11 +120,16 @@ def find_doi_in_text(text): def get_real_url_from_doi(doi): + """Get a URL corresponding to a DOI. + + :param doi: Identifier. + :returns: A URL for the DOI. If the DOI is invalid, return ``None``. + """ url = validate_doi(doi) - if not url: + if url is None: return url - m = re.match('.*linkinghub\.elsevier.*/pii/([A-Z0-9]+).*', url, re.I) + m = re.match(r'.*linkinghub\.elsevier.*/pii/([A-Z0-9]+).*', url, re.I) if m: return ('https://www.sciencedirect.com/science/article/abs/pii/{pii}' .format(pii=m.group(1))) From f4aa0511bebdca7ab6a4b2320a2484eec61e980e Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Tue, 3 Dec 2019 23:43:09 -0600 Subject: [PATCH 05/28] add typing information --- setup.py | 1 + src/doi/__init__.py | 15 +++++++++------ src/doi/py.typed | 0 tests/test_doi.py | 28 +++++++++++++++------------- 4 files changed, 25 insertions(+), 19 deletions(-) create mode 100644 src/doi/py.typed diff --git a/setup.py b/setup.py index ea038a2..0f69304 100644 --- a/setup.py +++ b/setup.py @@ -50,6 +50,7 @@ def get_version(filename): include_package_data=True, keywords='doi', name='python-doi', + package_data={"doi": ["py.typed"]}, packages=find_packages(where="src"), package_dir={"": "src"}, url='https://github.com/papis/python-doi', diff --git a/src/doi/__init__.py b/src/doi/__init__.py index 62772d4..376b63a 100644 --- a/src/doi/__init__.py +++ b/src/doi/__init__.py @@ -2,11 +2,14 @@ import sys import logging +from typing import Optional + + __version__ = '0.1.1' -logger = logging.getLogger("doi") +logger = logging.getLogger("doi") # type: logging.Logger -def pdf_to_doi(filepath, maxlines=None): +def pdf_to_doi(filepath: str, maxlines: Optional[int] = None) -> Optional[str]: """Try to get DOI from a filepath. It looks for a regex in the binary data and returns the first DOI found, in the hopes that this DOI is the correct one. @@ -31,7 +34,7 @@ def pdf_to_doi(filepath, maxlines=None): return None -def validate_doi(doi): +def validate_doi(doi: str) -> Optional[str]: """We check that the DOI can be resolved by `official means `_. If so, we return the resolved URL, otherwise, we return ``None`` (which means the @@ -71,7 +74,7 @@ def validate_doi(doi): raise ValueError('Something unexpected happened') -def get_clean_doi(doi): +def get_clean_doi(doi: str) -> str: """Check if the DOI is actually a URL and in that case just get the exact DOI. @@ -87,7 +90,7 @@ def get_clean_doi(doi): return doi -def find_doi_in_text(text): +def find_doi_in_text(text: str) -> Optional[str]: """Try to find a DOI in a text. :param text: Text in which to look for DOI. @@ -119,7 +122,7 @@ def find_doi_in_text(text): return None -def get_real_url_from_doi(doi): +def get_real_url_from_doi(doi: str) -> Optional[str]: """Get a URL corresponding to a DOI. :param doi: Identifier. diff --git a/src/doi/py.typed b/src/doi/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_doi.py b/tests/test_doi.py index bea393a..f158e7c 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -9,12 +9,12 @@ ) -def test_valid_version(): +def test_valid_version() -> None: """Check that the package defines a valid __version__""" assert parse_version(__version__) >= parse_version("0.1.0") -def test_validate_doi(): +def test_validate_doi() -> None: data = [ ('10.1063/1.5081715', 'http://aip.scitation.org/doi/10.1063/1.5081715'), @@ -29,25 +29,26 @@ def test_validate_doi(): 'https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'), ] for doi, url in data: - assert(url == validate_doi(doi)) + assert url == validate_doi(doi) for doi in ['', 'asdf']: try: validate_doi(doi) except ValueError as e: - assert(str(e) == 'HTTP 404: DOI not found') + assert str(e) == 'HTTP 404: DOI not found' -def test_get_real_url_from_doi(): + +def test_get_real_url_from_doi() -> None: data = [ ('10.1016/S0009-2614(97)04014-1', 'https://www.sciencedirect.com/science/' 'article/abs/pii/S0009261497040141'), ] for doi, url in data: - assert(url == get_real_url_from_doi(doi)) + assert url == get_real_url_from_doi(doi) -def test_find_doi_in_line(): +def test_find_doi_in_line() -> None: test_data = [ ('http://dx.doi.org/10.1063/1.881498', '10.1063/1.881498'), ('http://dx.doi.org/10.1063%2F1.881498', '10.1063/1.881498'), @@ -61,8 +62,8 @@ def test_find_doi_in_line(): ('/scitation.org/doi/10.1063/1.88149 8?234saf=34', '10.1063/1.88149'), ('/scitation.org/doi/10.1063/1.uniau12?as=234', '10.1063/1.uniau12'), - ('https://doi.org/10.1093/analys/anw053' , '10.1093/analys/anw053'), - ('http://.scitation.org/doi/10.1063/1.mart(88)1498?asdfwer' , + ('https://doi.org/10.1093/analys/anw053', '10.1093/analys/anw053'), + ('http://.scitation.org/doi/10.1063/1.mart(88)1498?asdfwer', '10.1063/1.mart(88)1498'), ('@ibook{doi:10.1002/9780470125915.ch2,', '10.1002/9780470125915.ch2'), (' str: From 0577dff3ec6ef56ebe42aea4b0dbdbcecf9aae89 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sun, 29 Dec 2019 01:52:58 +0100 Subject: [PATCH 09/28] Add CHANGELOG --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..dd8885f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +v0.2 +==== + +- Remove support for python 3.4 and lower. +- Add type annotations. +- Simplify `validate_doi` to just raise a `404` error in case + something went wrong. From 23981ae0f43e2c4ba7bf56ad8cb1455c4d170c85 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sun, 29 Dec 2019 02:00:04 +0100 Subject: [PATCH 10/28] Add flake8 to travis and fix its errors --- .travis.yml | 1 + src/doi/__init__.py | 2 +- tests/test_doi.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 865ffda..70589f5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,5 +14,6 @@ install: script: - py.test --doctest-modules --cov=doi src tests - mypy src/ tests + - flake8 src/ tests after_success: - coveralls diff --git a/src/doi/__init__.py b/src/doi/__init__.py index 5e7f3b4..1dc1d62 100644 --- a/src/doi/__init__.py +++ b/src/doi/__init__.py @@ -43,7 +43,7 @@ def validate_doi(doi: str) -> Optional[str]: :param doi: Identifier. :returns: The URL assigned to the DOI or ``None``. """ - from urllib.error import HTTPError, URLError + from urllib.error import HTTPError import urllib.request import urllib.parse import json diff --git a/tests/test_doi.py b/tests/test_doi.py index f158e7c..0d87c0c 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -66,7 +66,7 @@ def test_find_doi_in_line() -> None: ('http://.scitation.org/doi/10.1063/1.mart(88)1498?asdfwer', '10.1063/1.mart(88)1498'), ('@ibook{doi:10.1002/9780470125915.ch2,', '10.1002/9780470125915.ch2'), - ('application/pdf' 'doi:10.1063/1.5079474', '10.1063/1.5079474'), From 2c5d55dd837bde23adc5a6a0624b752b18ce7b1c Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sun, 29 Dec 2019 02:09:15 +0100 Subject: [PATCH 11/28] Update readthedocs to use pip --- readthedocs.yml => .readthedocs.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename readthedocs.yml => .readthedocs.yml (74%) diff --git a/readthedocs.yml b/.readthedocs.yml similarity index 74% rename from readthedocs.yml rename to .readthedocs.yml index 0c49bb5..140fc04 100644 --- a/readthedocs.yml +++ b/.readthedocs.yml @@ -1,5 +1,3 @@ -formats: - - none build: image: latest python: @@ -7,5 +5,7 @@ python: # Note that pip_install is buggy, but setup_py_install will not take into # account any dependencies from setup.py. *All* dependencies must be # declared in docs/rtd_environment.yml - setup_py_install: true - pip_install: false + #setup_py_install: true + pip_install: true + extra_requirements: + - dev From a6648848b069d98d70b1affd175ba319b5749573 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sun, 29 Dec 2019 02:16:09 +0100 Subject: [PATCH 12/28] Bump version --- src/doi/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/doi/__init__.py b/src/doi/__init__.py index 1dc1d62..e19359f 100644 --- a/src/doi/__init__.py +++ b/src/doi/__init__.py @@ -5,7 +5,7 @@ from typing import Optional -__version__ = '0.1.1' +__version__ = '0.2.0' logger = logging.getLogger("doi") # type: logging.Logger From 5730a066a561ee10e397c4955a2dba8a92ff97f9 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sat, 19 Nov 2022 01:37:33 +0100 Subject: [PATCH 13/28] Create codeql.yml --- .github/workflows/codeql.yml | 73 ++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..fa89976 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,73 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ "master" ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ "master" ] + + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # â„šī¸ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" From 633bcc7ba47f7b393f5b6469a8af39268303c1ac Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sat, 19 Nov 2022 12:15:35 +0100 Subject: [PATCH 14/28] Create pyre.yml --- .github/workflows/pyre.yml | 46 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 .github/workflows/pyre.yml diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml new file mode 100644 index 0000000..5ff8885 --- /dev/null +++ b/.github/workflows/pyre.yml @@ -0,0 +1,46 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# This workflow integrates Pyre with GitHub's +# Code Scanning feature. +# +# Pyre is a performant type checker for Python compliant with +# PEP 484. Pyre can analyze codebases with millions of lines +# of code incrementally – providing instantaneous feedback +# to developers as they write code. +# +# See https://pyre-check.org + +name: Pyre + +on: + workflow_dispatch: + push: + branches: [ "master" ] + pull_request: + branches: [ "master" ] + +permissions: + contents: read + +jobs: + pyre: + permissions: + actions: read + contents: read + security-events: write + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Run Pyre + uses: facebook/pyre-action@60697a7858f7cc8470d8cc494a3cf2ad6b06560d + with: + # To customize these inputs: + # See https://github.com/facebook/pyre-action#inputs + repo-directory: './' + requirements-path: 'requirements.txt' From 9e620b2f149902d7daaca830af9ce150d526f7d4 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sat, 19 Nov 2022 12:45:02 +0100 Subject: [PATCH 15/28] Update pyre.yml --- .github/workflows/pyre.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml index 5ff8885..c761f69 100644 --- a/.github/workflows/pyre.yml +++ b/.github/workflows/pyre.yml @@ -43,4 +43,4 @@ jobs: # To customize these inputs: # See https://github.com/facebook/pyre-action#inputs repo-directory: './' - requirements-path: 'requirements.txt' + From 11f7e750867effe66c5861d30f3d34304e305534 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Sat, 19 Nov 2022 13:04:39 +0100 Subject: [PATCH 16/28] Update pyre.yml --- .github/workflows/pyre.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml index c761f69..21fd46f 100644 --- a/.github/workflows/pyre.yml +++ b/.github/workflows/pyre.yml @@ -42,5 +42,5 @@ jobs: with: # To customize these inputs: # See https://github.com/facebook/pyre-action#inputs - repo-directory: './' + repo-directory: './src/' From 051856058f3917b33f039e803902188f16e511df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mat=C4=9Bj=20Cepl?= Date: Sun, 5 Feb 2023 22:24:37 +0100 Subject: [PATCH 17/28] Add net markers in tests to run pytest -k "not net" --- setup.cfg | 4 +++- tests/test_doi.py | 4 ++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 894f9dc..f587ed0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,7 +5,9 @@ universal = 1 exclude = docs [tool:pytest] -collect_ignore = ['setup.py'] +markers = + net: marks tests that call use the net (using the URL endpoint, deselect with '-k "not net"') + [mypy] disallow_redefinition = True diff --git a/tests/test_doi.py b/tests/test_doi.py index 0d87c0c..4d3d478 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -3,6 +3,8 @@ import os from pkg_resources import parse_version +import pytest + from doi import ( validate_doi, find_doi_in_text, __version__, pdf_to_doi, get_real_url_from_doi @@ -14,6 +16,7 @@ def test_valid_version() -> None: assert parse_version(__version__) >= parse_version("0.1.0") +@pytest.mark.net def test_validate_doi() -> None: data = [ ('10.1063/1.5081715', @@ -38,6 +41,7 @@ def test_validate_doi() -> None: assert str(e) == 'HTTP 404: DOI not found' +@pytest.mark.net def test_get_real_url_from_doi() -> None: data = [ ('10.1016/S0009-2614(97)04014-1', From f27f38d1a9a616d66e19b55c97521a996ce31e53 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 16 Mar 2024 10:16:09 +0200 Subject: [PATCH 18/28] makefile: simplify targets --- Makefile | 484 ++++--------------------------------------------------- 1 file changed, 27 insertions(+), 457 deletions(-) diff --git a/Makefile b/Makefile index c300533..7e98982 100644 --- a/Makefile +++ b/Makefile @@ -1,460 +1,30 @@ +PYTHON?=python -X dev -# File: common-makefile/src/version.m4 -MAKEFILE_VERSION = v0.0.1-21-g392d792 -MAKEFILE_DATE = 31-07-2017 15:47 -MAKEFILE_AUTHOR = Alejandro Gallo -MAKEFILE_URL = https://github.com/alejandrogallo/python-makefile -MAKEFILE_LICENSE = GPLv3 +all: help - - - -## < /dev/null) -# If messages should have color -WITH_COLOR ?= 1 - -ifneq ($(strip $(QUIET)),0) -FD_OUTPUT = 2>&1 > /dev/null -else -FD_OUTPUT = -endif - -ifdef DEBUG -DBG_FLAG = -DBG_FILE ?= .makefile-dbg -$(shell date | $(SED) "p; s/./=/g" > $(DBG_FILE)) -else -DBG_FLAG = @ -DBG_FILE = -endif - -define log-debug ->> $(or $(DBG_FILE),/dev/null) echo -endef - -# Print commands like [CMD] -define print-cmd-name -"[$(COLOR_LB) \ -$(shell \ - if test "$(1)" = g++; then \ - echo -n GXX; \ - elif test "$(1)" = gcc; then \ - echo -n GCC; \ - elif test "$(1)" = icc; then \ - echo -n ICC; \ - elif test "$(1)" = cc; then \ - echo -n CC; \ - elif test "$(1)" = povray; then \ - echo -n POV; \ - elif test "$(1)" = perl; then \ - echo -n PL; \ - elif test "$(1)" = perl5; then \ - echo -n PL5; \ - elif test "$(1)" = ruby; then \ - echo -n RB; \ - elif test "$(1)" = ruby2; then \ - echo -n RB2; \ - elif test "$(1)" = python; then \ - echo -n PY; \ - elif test "$(1)" = python2; then \ - echo -n PY2; \ - elif test "$(1)" = python3; then \ - echo -n PY3; \ - elif test "$(1)" = pdflatex; then \ - echo -n pdfTeX; \ - elif test "$(1)" = bash; then \ - echo -n BASH; \ - elif test "$(1)" = gnuplot; then \ - echo -n GPT; \ - elif test "$(1)" = mupdf; then \ - echo -n muPDF; \ - else \ - echo -n "$(1)" | tr a-z A-Z ; \ - fi -)\ -$(COLOR_E)]" -endef - -ifndef QQUIET - -ifeq ($(strip $(WITH_COLOR)),1) -# Red -COLOR_R ?= $(if $(TPUT),$(shell $(TPUT) setaf 1),"\033[0;31m") -# Green -COLOR_G ?= $(if $(TPUT),$(shell $(TPUT) setaf 2),"\033[0;32m") -# Yellow -COLOR_Y ?= $(if $(TPUT),$(shell $(TPUT) setaf 3),"\033[0;33m") -# Dark blue -COLOR_DB ?= $(if $(TPUT),$(shell $(TPUT) setaf 4),"\033[0;34m") -# Lila -COLOR_L ?= $(if $(TPUT),$(shell $(TPUT) setaf 5),"\033[0;35m") -# Light blue -COLOR_LB ?= $(if $(TPUT),$(shell $(TPUT) setaf 6),"\033[0;36m") -# Empty color -COLOR_E ?= $(if $(TPUT),$(shell $(TPUT) sgr0),"\033[0m") -ARROW ?= @echo "$(COLOR_L)===>$(COLOR_E)" -else -ARROW ?= @echo "===>" -endif #WITH_COLOR - -ECHO ?= @echo - -else -ARROW := @ > /dev/null echo -ECHO := @ > /dev/null echo -endif #QQUIET - - - - - - -# File: ctags.m4 - - -# ==================================== -# Ctags generation for latex documents -# ==================================== -# -# Generate a tags file so that you can navigate through the tags using -# compatible editors such as emacs or (n)vi(m). -# -tags: ## Create python exhuberant ctags - $(CTAGS) --language-force=python -R * - - - -# File: install.m4 - - -# Old-style requirements file -REQUIREMENTS ?= requirements.txt -# Command to be run when make `install` is run -INSTALL_COMMAND ?= $(PYTHON) setup.py install -# Command to be run when make `install-local` is run -INSTALL_LOCAL_COMMAND ?= $(PYTHON) setup.py install --user -# Command to be run when make `install-dev` is run -INSTALL_DEV_COMMAND ?= $(PYTHON) setup.py develop -# Command to be run when make `install-dev-local` is run -INSTALL_DEV_LOCAL_COMMAND ?= $(PYTHON) setup.py develop --user -# Command to be run when make `uninstall` is run -UNINSTALL_COMMAND ?= $(PIP) uninstall $(shell $(PYTHON) setup.py --name) -# Command to be run when make `install-deps` is run -INSTALL_DEPS_COMMAND ?= $(PIP) install -r requirements.txt -# Command to be run when make `install-deps-local` is run -INSTALL_DEPS_LOCAL_COMMAND ?= $(PIP) install --user -r requirements.txt -install-dev-local: ## Install developement version locally - $(ARROW) Installing development version locally - $(DBG_FLAG)$(INSTALL_DEV_LOCAL_COMMAND) - -install-dev: ## Install developement version - $(ARROW) Installing development version - $(DBG_FLAG)$(INSTALL_DEV_COMMAND) - -install-local: ## Install the package locally - $(ARROW) Installing locally - $(DBG_FLAG)$(INSTALL_LOCAL_COMMAND) - -install: ## Install the package - $(ARROW) Installing... - $(DBG_FLAG)$(INSTALL_COMMAND) - -uninstall: ## Uninstall the package - $(ARROW) Uninstalling... - $(DBG_FLAG)$(UNINSTALL_COMMAND) - -install-deps-local: ## Install python requirements locally - $(ARROW) Installing dependencies... - $(DBG_FLAG)$(INSTALL_DEPS_LOCAL_COMMAND) - -install-deps: ## Install python requirements - $(ARROW) Installing dependencies... - $(DBG_FLAG)$(INSTALL_DEPS_COMMAND) - - - -# File: lint.m4 - - -# Linter program -PY_LINTER ?= flake8 -# ============ -# Check syntax -# ============ -# -# It checks the syntax (lints) of all the tex sources using the program in the -# TEX_LINTER variable. -# -lint: ## Check syntax of sources - $(PY_LINTER) - - - -# File: doc.m4 - - -doc: ## Create documentation - make -C doc/ html - -doc-%: - make -C doc/ $* - -update-gh-pages: ## Update github pages - @echo "Warning: Black magic in action" - git push origin $$(git subtree split --prefix doc/build/html/ master):gh-pages --force - - - - -# File: test.m4 - - -# Command to run for `make test` -TEST_COMMAND ?= $(PYTHON) setup.py test -test: ## Run the tests - $(DBG_FLAG)$(TEST_COMMAND) - - - -# File: virtualenv.m4 - - -ENV ?= -ENV_FOLDER ?= env -ENV_PIP ?= $(ENV_FOLDER)/bin/pip -ENV_PYTHON ?= $(ENV_FOLDER)/bin/python -VIRTUALENV ?= virtualenv - -ifdef ENV -PYTHON = $(ENV_PYTHON) -PIP = $(ENV_PIP) -DEPENDENCIES += virtualenv -DIST_DEPENDENCIES += virtualenv -endif - -virtualenv: $(ENV_FOLDER) ## Create the python virtual environment -$(ENV_FOLDER): - $(ARROW) "Creating virtual environment in '$(ENV_FOLDER)' \ - with python executable '$(PYTHON)'" - $(DBG_FLAG)$(VIRTUALENV) -p $(PYTHON) $(ENV_FOLDER) - - - - -# File: common-makefile/src/update.m4 - - -MAKEFILE_UPDATE_URL ?= https://raw.githubusercontent.com/alejandrogallo/python-makefile/master/dist/Makefile - - -# =============================== -# Update the makefile from source -# =============================== -# -# You can always get the latest `Makefile` version using this target. You may -# override the `MAKEFILE_UPDATE_URL` to any path where you save your own -# personal makefile -# -update: ## Update the makefile from the repository - $(ARROW) "Getting makefile from $(MAKEFILE_UPDATE_URL)" - $(DBG_FLAG)wget $(MAKEFILE_UPDATE_URL) -O Makefile - - - - -# File: common-makefile/src/clean.m4 - - -# Remove command flags -RM_FLAGS ?= -rf - -# Default clean file to be cleaned -DEFAULT_CLEAN_FILES ?= - -# Files to be cleaned -CLEAN_FILES ?= $(DEFAULT_CLEAN_FILES) - -# ============= -# Main cleaning -# ============= -# -# This does a main cleaning of the produced auxiliary files. Before using it -# check which files are going to be cleaned up. -# -clean: ## Remove build and temporary files - $(ARROW) Cleaning up... - $(DBG_FLAG) {\ - for file in $(CLEAN_FILES); do \ - test -e $$file && { \ - $(RM) $(RM_FLAGS) $$file && \ - echo $(call print-cmd-name,RM) "$$file";\ - } || : ; \ - done \ - } - - - - -# File: common-makefile/src/print-variable.m4 - - -# This is used for printing defined variables from Some other scripts. For -# instance if you want to know the value of the `PDF_VIEWER` defined in the -# Makefile, then you would do -# ``` -# make print-PDF_VIEWER -# ``` -# and this would output `PDF_VIEWER=mupdf` for instance. -FORCE: -print-%: - $(DBG_FLAG)echo '$*=$($*)' - -# ===================================== -# Print a variable used by the Makefile -# ===================================== -# -# For debugging purposes it is useful to print out some variables that the -# makefile is using, for that just type `make print` and you will be prompted -# to insert the name of the variable that you want to know. -# -FORCE: -print: ## Print a variable - $(DBG_FLAG)read -p "Variable to print: " variable && \ - $(MAKE) --no-print-directory print-$$variable - - - - -# File: common-makefile/src/help.m4 - - - -# ================ -# Print quick help -# ================ -# -# It prints a quick help in the terminal -help: ## Prints help for targets with comments - $(DBG_FLAG)$(or $(AWK),awk) ' \ - BEGIN {FS = ":.*?## "}; \ - /^## *< Date: Sat, 16 Mar 2024 10:28:40 +0200 Subject: [PATCH 19/28] setup: switch to pyproject and hatchling --- MANIFEST.in | 11 ------- pyproject.toml | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++ setup.cfg | 26 ---------------- setup.py | 60 ------------------------------------ 4 files changed, 82 insertions(+), 97 deletions(-) delete mode 100644 MANIFEST.in create mode 100644 pyproject.toml delete mode 100644 setup.cfg delete mode 100644 setup.py diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 2d4499e..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,11 +0,0 @@ -graft src -include AUTHORS.rst -include CONTRIBUTING.rst -include LICENSE -include README.rst - -recursive-include tests * -recursive-exclude * __pycache__ -recursive-exclude * *.py[co] - -recursive-include docs *.rst conf.py Makefile make.bat *.jpg *.png *.gif diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1e3bc83 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,82 @@ +[build-system] +build-backend = "hatchling.build" +requires = [ + "hatchling>=1.10", +] + +[project] +name = "python-doi" +version = "0.2.0" +description = "Python package to work with Document Object Identifiers (DOIs)" +readme = "README.rst" +keywords = [ + "doi", +] +license = { text = "GPL-3.0-or-later" } +maintainers = [{ name = "Alejandro Gallo", email = "aamsgallo@gmail.com" }] +authors = [{ name = "Alejandro Gallo", email = "aamsgallo@gmail.com" }] +requires-python = ">=3.8" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Utilities", +] + +[project.optional-dependencies] +develop = [ + "flake8", + "flake8-bugbear", + "Flake8-pyproject", + "flake8-quotes", + "mypy>=0.7", + "pep8-naming", + "pytest", + "pytest-cov", + "python-coveralls", +] +docs = [ + "sphinx>=4", + "sphinx_rtd_theme>=1", +] + +[project.urls] +Repository = "https://github.com/papis/python-doi" + +[tool.hatch.build.targets.sdist] +exclude = [".github", "docs/build"] + +[tool.hatch.build.targets.wheel] +packages = ["src/doi"] + +[tool.flake8] +select = ["B", "D", "E", "F", "N", "Q", "W"] +extend-ignore = ["B019", "E123", "N818", "W503"] +max-line-length = 88 +inline-quotes = "double" +multiline-quotes = "double" + +[tool.pytest.ini_options] +addopts = [ + "--doctest-modules", + "--cov=src/doi", +] +markers = [ + "net: marks tests that call use the net" +] + +[tool.mypy] +strict = true +show_column_numbers = true +hide_error_codes = false +pretty = true +warn_unused_ignores = false diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index f587ed0..0000000 --- a/setup.cfg +++ /dev/null @@ -1,26 +0,0 @@ -[bdist_wheel] -universal = 1 - -[flake8] -exclude = docs - -[tool:pytest] -markers = - net: marks tests that call use the net (using the URL endpoint, deselect with '-k "not net"') - - -[mypy] -disallow_redefinition = True -warn_unused_configs = True -disallow_any_generics = True -disallow_subclassing_any = True -disallow_untyped_calls = True -disallow_untyped_defs = True -disallow_incomplete_defs = True -check_untyped_defs = True -disallow_untyped_decorators = True -no_implicit_optional = True -warn_redundant_casts = True -warn_unused_ignores = True -warn_return_any = True -no_implicit_reexport = True diff --git a/setup.py b/setup.py deleted file mode 100644 index 25324a7..0000000 --- a/setup.py +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -"""The setup script.""" - -from setuptools import setup, find_packages - - -def get_version(filename): - """Extract the package version""" - with open(filename) as in_fh: - for line in in_fh: - if line.startswith('__version__'): - return line.split('=')[1].strip()[1:-1] - raise ValueError("Cannot extract version from %s" % filename) - - -with open('README.rst') as readme_file: - readme = readme_file.read() - -requirements = [] - -dev_requirements = [ - 'coverage', 'pytest', 'pytest-cov==2.5.0', 'twine', 'pep8', - 'flake8', 'wheel', 'mypy', - 'sphinx', 'sphinx-autobuild', 'sphinx-autodoc-typehints', - 'sphinx_rtd_theme'] - -version = get_version('./src/doi/__init__.py') - -setup( - author="Alejandro Gallo", - author_email='aamsgallo@gmail.com', - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Natural Language :: English', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - ], - description="Python package to work with Document Object Identifier (doi)", - install_requires=requirements, - extras_require={ - 'dev': dev_requirements, - }, - license="GNU General Public License v3", - long_description=readme, - include_package_data=True, - keywords='doi', - name='python-doi', - package_data={"doi": ["py.typed"]}, - packages=find_packages(where="src"), - package_dir={"": "src"}, - url='https://github.com/papis/python-doi', - version=version, - zip_safe=False, -) From 758318b79cd58dd7c00f0fe1f46672ab8113ab06 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 16 Mar 2024 10:28:59 +0200 Subject: [PATCH 20/28] ci: remove travis.yaml --- .travis.yml | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 70589f5..0000000 --- a/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -# Config file for automatic testing at travis-ci.org - -language: python -python: - - 3.8 - - 3.7 - - 3.6 - - 3.5 - -install: - - pip install -e .[dev] - - pip install coveralls - -script: - - py.test --doctest-modules --cov=doi src tests - - mypy src/ tests - - flake8 src/ tests -after_success: - - coveralls From 507acb0677467c3de5c705a821e32b09e8489d8d Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 16 Mar 2024 10:33:17 +0200 Subject: [PATCH 21/28] ci: add tests to ci --- .github/workflows/codeql.yml | 69 ++++++++++-------------------------- .github/workflows/main.yml | 43 ++++++++++++++++++++++ .github/workflows/pyre.yml | 46 ------------------------ 3 files changed, 62 insertions(+), 96 deletions(-) create mode 100644 .github/workflows/main.yml delete mode 100644 .github/workflows/pyre.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index fa89976..52e191d 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -1,23 +1,13 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# name: "CodeQL" on: push: branches: [ "master" ] pull_request: - # The branches below must be a subset of the branches above branches: [ "master" ] - + schedule: + # 17:00 on Friday (UTC) + - cron: "00 17 * * 5" jobs: analyze: @@ -32,42 +22,21 @@ jobs: fail-fast: false matrix: language: [ 'python' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support steps: - - name: Checkout repository - uses: actions/checkout@v3 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v2 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - - # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - # queries: security-extended,security-and-quality - - - # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v2 - - # â„šī¸ Command-line programs to run using the OS shell. - # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 - with: - category: "/language:${{matrix.language}}" + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + queries: +security-and-quality + + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..b049b7c --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,43 @@ +name: CI + +on: + push: + branches: [ "master", "ci-*" ] + pull_request: + branches: [ "master" ] + schedule: + # 17:00 on Friday (UTC) + - cron: "00 17 * * 5" + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] + fail-fast: false + + steps: + - uses: actions/checkout@v4 + - name: Set up python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --editable '.[develop,docs]' + shell: bash + + - name: Check linting and type annotations + run: | + python -m flake8 src tests + python -m mypy src tests + shell: bash + + - name: Run tests + if: success() || failure() + run: | + python -m pytest -v -s src tests + shell: bash diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml deleted file mode 100644 index 21fd46f..0000000 --- a/.github/workflows/pyre.yml +++ /dev/null @@ -1,46 +0,0 @@ -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -# This workflow integrates Pyre with GitHub's -# Code Scanning feature. -# -# Pyre is a performant type checker for Python compliant with -# PEP 484. Pyre can analyze codebases with millions of lines -# of code incrementally – providing instantaneous feedback -# to developers as they write code. -# -# See https://pyre-check.org - -name: Pyre - -on: - workflow_dispatch: - push: - branches: [ "master" ] - pull_request: - branches: [ "master" ] - -permissions: - contents: read - -jobs: - pyre: - permissions: - actions: read - contents: read - security-events: write - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - submodules: true - - - name: Run Pyre - uses: facebook/pyre-action@60697a7858f7cc8470d8cc494a3cf2ad6b06560d - with: - # To customize these inputs: - # See https://github.com/facebook/pyre-action#inputs - repo-directory: './src/' - From a64941e750783d7ced841b432daf9775e3bd33c9 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 16 Mar 2024 10:39:36 +0200 Subject: [PATCH 22/28] style: fix flake8 issues --- src/doi/__init__.py | 40 +++++++++---------- tests/test_doi.py | 96 +++++++++++++++++++++------------------------ 2 files changed, 64 insertions(+), 72 deletions(-) diff --git a/src/doi/__init__.py b/src/doi/__init__.py index e19359f..cd19507 100644 --- a/src/doi/__init__.py +++ b/src/doi/__init__.py @@ -5,7 +5,7 @@ from typing import Optional -__version__ = '0.2.0' +__version__ = "0.2.0" logger = logging.getLogger("doi") # type: logging.Logger @@ -24,9 +24,9 @@ def pdf_to_doi(filepath: str, maxlines: Optional[int] = None) -> Optional[str]: if maxlines is None: maxlines = sys.maxsize - with open(filepath, 'rb') as fd: + with open(filepath, "rb") as fd: for j, line in enumerate(fd): - doi = find_doi_in_text(line.decode('ascii', errors='ignore')) + doi = find_doi_in_text(line.decode("ascii", errors="ignore")) if doi: return doi if j > maxlines: @@ -48,16 +48,16 @@ def validate_doi(doi: str) -> Optional[str]: import urllib.parse import json url = "https://doi.org/api/handles/{doi}".format(doi=doi) - logger.debug('handle url %s', url) + logger.debug("handle url %s", url) request = urllib.request.Request(url) try: result = json.loads(urllib.request.urlopen(request).read().decode()) except HTTPError: - raise ValueError('HTTP 404: DOI not found') + raise ValueError("HTTP 404: DOI not found") else: - urls = [v['data']['value'] - for v in result['values'] if v.get('type') == 'URL'] + urls = [v["data"]["value"] + for v in result["values"] if v.get("type") == "URL"] return urls[0] if urls else None @@ -68,12 +68,12 @@ def get_clean_doi(doi: str) -> str: :param doi: String containing a DOI. :returns: The extracted DOI. """ - doi = re.sub(r'%2F', '/', doi) + doi = re.sub(r"%2F", "/", doi) # For pdfs - doi = re.sub(r'\)>', ' ', doi) - doi = re.sub(r'\)/S/URI', ' ', doi) - doi = re.sub(r'(/abstract)', '', doi) - doi = re.sub(r'\)$', '', doi) + doi = re.sub(r"\)>", " ", doi) + doi = re.sub(r"\)/S/URI", " ", doi) + doi = re.sub(r"(/abstract)", "", doi) + doi = re.sub(r"\)$", "", doi) return doi @@ -87,11 +87,11 @@ def find_doi_in_text(text: str) -> Optional[str]: forbidden_doi_characters = r'"\s%$^\'<>@,;:#?&' # Sometimes it is in the javascript defined var_doi = re.compile( - r'doi(.org)?' - r'\s*(=|:|/|\()\s*' - r'("|\')?' - r'(?P[^{fc}]+)' - r'("|\'|\))?' + r"doi(.org)?" + r"\s*(=|:|/|\()\s*" + r"(\"|')?" + r"(?P[^{fc}]+)" + r"(\"|'|\))?" .format( fc=forbidden_doi_characters ), re.I @@ -102,7 +102,7 @@ def find_doi_in_text(text: str) -> Optional[str]: try: m = next(miter) if m: - doi = m.group('doi') + doi = m.group("doi") return get_clean_doi(doi) except StopIteration: pass @@ -119,8 +119,8 @@ def get_real_url_from_doi(doi: str) -> Optional[str]: if url is None: return url - m = re.match(r'.*linkinghub\.elsevier.*/pii/([A-Z0-9]+).*', url, re.I) + m = re.match(r".*linkinghub\.elsevier.*/pii/([A-Z0-9]+).*", url, re.I) if m: - return ('https://www.sciencedirect.com/science/article/abs/pii/{pii}' + return ("https://www.sciencedirect.com/science/article/abs/pii/{pii}" .format(pii=m.group(1))) return url diff --git a/tests/test_doi.py b/tests/test_doi.py index 4d3d478..fa51492 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -1,52 +1,44 @@ -"""Tests for `doi` package.""" - import os -from pkg_resources import parse_version import pytest from doi import ( - validate_doi, find_doi_in_text, __version__, pdf_to_doi, + validate_doi, find_doi_in_text, pdf_to_doi, get_real_url_from_doi ) -def test_valid_version() -> None: - """Check that the package defines a valid __version__""" - assert parse_version(__version__) >= parse_version("0.1.0") - - @pytest.mark.net def test_validate_doi() -> None: data = [ - ('10.1063/1.5081715', - 'http://aip.scitation.org/doi/10.1063/1.5081715'), - ('10.1007%2FBF01451751', - 'http://link.springer.com/10.1007/BF01451751'), - ('10.1103/PhysRevLett.49.57', - 'https://link.aps.org/doi/10.1103/PhysRevLett.49.57'), - ('10.1080/14786442408634457', - 'https://www.tandfonline.com/doi/full/10.1080/14786442408634457'), - ('10.1021/jp003647e', 'https://pubs.acs.org/doi/10.1021/jp003647e'), - ('10.1016/S0009-2614(97)04014-1', - 'https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'), + ("10.1063/1.5081715", + "http://aip.scitation.org/doi/10.1063/1.5081715"), + ("10.1007%2FBF01451751", + "http://link.springer.com/10.1007/BF01451751"), + ("10.1103/PhysRevLett.49.57", + "https://link.aps.org/doi/10.1103/PhysRevLett.49.57"), + ("10.1080/14786442408634457", + "https://www.tandfonline.com/doi/full/10.1080/14786442408634457"), + ("10.1021/jp003647e", "https://pubs.acs.org/doi/10.1021/jp003647e"), + ("10.1016/S0009-2614(97)04014-1", + "https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141"), ] for doi, url in data: assert url == validate_doi(doi) - for doi in ['', 'asdf']: + for doi in ["", "asdf"]: try: validate_doi(doi) except ValueError as e: - assert str(e) == 'HTTP 404: DOI not found' + assert str(e) == "HTTP 404: DOI not found" @pytest.mark.net def test_get_real_url_from_doi() -> None: data = [ - ('10.1016/S0009-2614(97)04014-1', - 'https://www.sciencedirect.com/science/' - 'article/abs/pii/S0009261497040141'), + ("10.1016/S0009-2614(97)04014-1", + "https://www.sciencedirect.com/science/" + "article/abs/pii/S0009261497040141"), ] for doi, url in data: assert url == get_real_url_from_doi(doi) @@ -54,40 +46,40 @@ def test_get_real_url_from_doi() -> None: def test_find_doi_in_line() -> None: test_data = [ - ('http://dx.doi.org/10.1063/1.881498', '10.1063/1.881498'), - ('http://dx.doi.org/10.1063%2F1.881498', '10.1063/1.881498'), - (2*'qer '+'var doi = "12345/12345.3"', '12345/12345.3'), - (2*'qer '+"var doi = '12345/12345.3';fas", '12345/12345.3'), - (2*'qer '+"var DoI = 12345%2F12345.3", '12345/12345.3'), - (2*'qer '+"var DoI : 12345%2F12345.3", '12345/12345.3'), - ('http://scitation.org/doi/10.1063/1.881498', '10.1063/1.881498'), - ('org/doi(10.1063/1.881498)', '10.1063/1.881498'), - ('/scitation.org/doi/10.1063/1.881498?234saf=34', '10.1063/1.881498'), - ('/scitation.org/doi/10.1063/1.88149 8?234saf=34', '10.1063/1.88149'), - ('/scitation.org/doi/10.1063/1.uniau12?as=234', - '10.1063/1.uniau12'), - ('https://doi.org/10.1093/analys/anw053', '10.1093/analys/anw053'), - ('http://.scitation.org/doi/10.1063/1.mart(88)1498?asdfwer', - '10.1063/1.mart(88)1498'), - ('@ibook{doi:10.1002/9780470125915.ch2,', '10.1002/9780470125915.ch2'), + ("http://dx.doi.org/10.1063/1.881498", "10.1063/1.881498"), + ("http://dx.doi.org/10.1063%2F1.881498", "10.1063/1.881498"), + (2 * "qer " + "var doi = '12345/12345.3'", "12345/12345.3"), + (2 * "qer " + "var doi = '12345/12345.3';fas", "12345/12345.3"), + (2 * "qer " + "var DoI = 12345%2F12345.3", "12345/12345.3"), + (2 * "qer " + "var DoI : 12345%2F12345.3", "12345/12345.3"), + ("http://scitation.org/doi/10.1063/1.881498", "10.1063/1.881498"), + ("org/doi(10.1063/1.881498)", "10.1063/1.881498"), + ("/scitation.org/doi/10.1063/1.881498?234saf=34", "10.1063/1.881498"), + ("/scitation.org/doi/10.1063/1.88149 8?234saf=34", "10.1063/1.88149"), + ("/scitation.org/doi/10.1063/1.uniau12?as=234", + "10.1063/1.uniau12"), + ("https://doi.org/10.1093/analys/anw053", "10.1093/analys/anw053"), + ("http://.scitation.org/doi/10.1063/1.mart(88)1498?asdfwer", + "10.1063/1.mart(88)1498"), + ("@ibook{doi:10.1002/9780470125915.ch2,", "10.1002/9780470125915.ch2"), ('application/pdf' - 'doi:10.1063/1.5079474', - '10.1063/1.5079474'), - ('<(DOI:10.1002/9780470915.CH2)/S/URI,', '10.1002/9780470915.CH2'), - ('URL<(DOI:10.1002/9780470125915.CH2,', '10.1002/9780470125915.CH2'), - (r'A<>/' - r'Border[0 0 0]/M(D:20181022082356+0530)/Rect[147.40158 594.36926' - r'347.24957 605.36926]/Subtype/Link/Type/A', - '10.1016/j.comptc.2018.10.004'), - ('doi(10.1038/s41535-018-0103-6;)', '10.1038/s41535-018-0103-6'), + "doi:10.1063/1.5079474", + "10.1063/1.5079474"), + ("<(DOI:10.1002/9780470915.CH2)/S/URI,", "10.1002/9780470915.CH2"), + ("URL<(DOI:10.1002/9780470125915.CH2,", "10.1002/9780470125915.CH2"), + (r"A<>/" + r"Border[0 0 0]/M(D:20181022082356+0530)/Rect[147.40158 594.36926" + r"347.24957 605.36926]/Subtype/Link/Type/A", + "10.1016/j.comptc.2018.10.004"), + ("doi(10.1038/s41535-018-0103-6;)", "10.1038/s41535-018-0103-6"), ] for url, doi in test_data: assert find_doi_in_text(url) == doi def test_doi_from_pdf() -> None: - f = os.path.join(os.path.dirname(__file__), 'resources', 'doc.pdf') + f = os.path.join(os.path.dirname(__file__), "resources", "doc.pdf") assert os.path.exists(f) - assert pdf_to_doi(f) == '10.1103/PhysRevLett.50.1998' + assert pdf_to_doi(f) == "10.1103/PhysRevLett.50.1998" From 844fdfd3ea28d973379513b853f960a5fa85dde2 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Sat, 16 Mar 2024 15:32:15 +0200 Subject: [PATCH 23/28] tests: fix tests --- tests/test_doi.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/test_doi.py b/tests/test_doi.py index fa51492..bb3a763 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -12,16 +12,17 @@ def test_validate_doi() -> None: data = [ ("10.1063/1.5081715", - "http://aip.scitation.org/doi/10.1063/1.5081715"), + "https://pubs.aip.org/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled"), # noqa: E501 ("10.1007%2FBF01451751", - "http://link.springer.com/10.1007/BF01451751"), + "http://link.springer.com/10.1007/BF01451751"), ("10.1103/PhysRevLett.49.57", - "https://link.aps.org/doi/10.1103/PhysRevLett.49.57"), + "https://link.aps.org/doi/10.1103/PhysRevLett.49.57"), ("10.1080/14786442408634457", - "https://www.tandfonline.com/doi/full/10.1080/14786442408634457"), - ("10.1021/jp003647e", "https://pubs.acs.org/doi/10.1021/jp003647e"), + "https://www.tandfonline.com/doi/full/10.1080/14786442408634457"), + ("10.1021/jp003647e", + "https://pubs.acs.org/doi/10.1021/jp003647e"), ("10.1016/S0009-2614(97)04014-1", - "https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141"), + "https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141"), ] for doi, url in data: assert url == validate_doi(doi) @@ -57,21 +58,21 @@ def test_find_doi_in_line() -> None: ("/scitation.org/doi/10.1063/1.881498?234saf=34", "10.1063/1.881498"), ("/scitation.org/doi/10.1063/1.88149 8?234saf=34", "10.1063/1.88149"), ("/scitation.org/doi/10.1063/1.uniau12?as=234", - "10.1063/1.uniau12"), + "10.1063/1.uniau12"), ("https://doi.org/10.1093/analys/anw053", "10.1093/analys/anw053"), ("http://.scitation.org/doi/10.1063/1.mart(88)1498?asdfwer", - "10.1063/1.mart(88)1498"), + "10.1063/1.mart(88)1498"), ("@ibook{doi:10.1002/9780470125915.ch2,", "10.1002/9780470125915.ch2"), ('application/pdf' "doi:10.1063/1.5079474", - "10.1063/1.5079474"), + "10.1063/1.5079474"), ("<(DOI:10.1002/9780470915.CH2)/S/URI,", "10.1002/9780470915.CH2"), ("URL<(DOI:10.1002/9780470125915.CH2,", "10.1002/9780470125915.CH2"), (r"A<>/" r"Border[0 0 0]/M(D:20181022082356+0530)/Rect[147.40158 594.36926" r"347.24957 605.36926]/Subtype/Link/Type/A", - "10.1016/j.comptc.2018.10.004"), + "10.1016/j.comptc.2018.10.004"), ("doi(10.1038/s41535-018-0103-6;)", "10.1038/s41535-018-0103-6"), ] for url, doi in test_data: From 34ea67ce89b16ceac5bb47df5ab7a098d6c06bc6 Mon Sep 17 00:00:00 2001 From: gesh Date: Mon, 23 Jun 2025 21:59:08 +0300 Subject: [PATCH 24/28] tests: Configure pytest to ignore docs --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1e3bc83..aa6195c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,6 +69,7 @@ multiline-quotes = "double" addopts = [ "--doctest-modules", "--cov=src/doi", + "--ignore=docs", ] markers = [ "net: marks tests that call use the net" From 2e4b622e00d72097e369b7be5ad61e107ebf3f8d Mon Sep 17 00:00:00 2001 From: gesh Date: Tue, 15 Apr 2025 21:07:43 +0300 Subject: [PATCH 25/28] Resolve redirects when testing URLs for equality The URL DOIs resolve to can move around, with redirects pointing to the new location. To make the tests more robust, only fail if the URLs differ after redirections. See also https://www.crossref.org/blog/urls-and-dois-a-complicated-relationship/ --- tests/test_doi.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/tests/test_doi.py b/tests/test_doi.py index bb3a763..d062379 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -1,5 +1,9 @@ import os +from urllib.request import Request, urlopen +from urllib.parse import urlparse, urlunparse +from warnings import warn + import pytest from doi import ( @@ -8,6 +12,30 @@ ) +def simplify_url(u): + return urlparse(u)._replace(query='', fragment='') + + +def resolve_redirects(u): + # Unconditionally upgrade to https, since some resolvers seem to require it + # If removed, it'd make sense to canonicalize in simplify_url instead to + # prevent spurious test failures + u = urlunparse(urlparse(u)._replace(scheme='https')) + req = Request(u, headers={'User-Agent': 'Mozilla/5.0'}) + with urlopen(req) as r: + return simplify_url(r.url) + + +def normalize_eq(u, v): + if u == v: + return True + warn(f"{u} textually differs from {v}, please update the relevant case.\n" + "Attempting to recover by resolving redirects") + return (simplify_url(u) == simplify_url(v) + or resolve_redirects(u) == resolve_redirects(v) + ) + + @pytest.mark.net def test_validate_doi() -> None: data = [ @@ -25,7 +53,7 @@ def test_validate_doi() -> None: "https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141"), ] for doi, url in data: - assert url == validate_doi(doi) + assert normalize_eq(url, validate_doi(doi)) for doi in ["", "asdf"]: try: @@ -42,7 +70,7 @@ def test_get_real_url_from_doi() -> None: "article/abs/pii/S0009261497040141"), ] for doi, url in data: - assert url == get_real_url_from_doi(doi) + assert normalize_eq(url, get_real_url_from_doi(doi)) def test_find_doi_in_line() -> None: From ca6dcf0f03e8402ef13ea3375eb96734af77dcfe Mon Sep 17 00:00:00 2001 From: gesh Date: Sun, 29 Jun 2025 17:15:02 +0300 Subject: [PATCH 26/28] Use cloudscraper to solve cloudflare challenges Also put in a fallback using requests, but it is hacky and only works sometimes. cloudscraper stands a better chance of consistently being able to get to the final URL --- pyproject.toml | 5 +++++ tests/test_doi.py | 44 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index aa6195c..faba3d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,11 @@ docs = [ "sphinx>=4", "sphinx_rtd_theme>=1", ] +# For solving client-side challenges on DDoS-protected sites +# (eg those using CloudFlare) +challenges = [ + "cloudscraper", +] [project.urls] Repository = "https://github.com/papis/python-doi" diff --git a/tests/test_doi.py b/tests/test_doi.py index d062379..1668af0 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -1,6 +1,10 @@ import os -from urllib.request import Request, urlopen +import requests +try: + import cloudscraper +except ImportError: + cloudscraper = None from urllib.parse import urlparse, urlunparse from warnings import warn @@ -21,21 +25,47 @@ def resolve_redirects(u): # If removed, it'd make sense to canonicalize in simplify_url instead to # prevent spurious test failures u = urlunparse(urlparse(u)._replace(scheme='https')) - req = Request(u, headers={'User-Agent': 'Mozilla/5.0'}) - with urlopen(req) as r: - return simplify_url(r.url) + if cloudscraper: + scraper = cloudscraper.create_scraper() + return simplify_url(scraper.get(u).url) -def normalize_eq(u, v): + # Try emulating a browser to not get blocked + h = {'User-Agent': 'Mozilla/5.0'} + resp = requests.get(u, headers=h) + return simplify_url(resp.url) + + +def normalize_eq(u, v, expect_diff=False): if u == v: return True - warn(f"{u} textually differs from {v}, please update the relevant case.\n" - "Attempting to recover by resolving redirects") + if not expect_diff: + warn(f"{u} textually differs from {v}, please update the relevant case.\n" + "Attempting to recover by resolving redirects") return (simplify_url(u) == simplify_url(v) or resolve_redirects(u) == resolve_redirects(v) ) +@pytest.mark.net +@pytest.mark.parametrize( + "needs_cloudscraper, urls", + [ + (True, + ["http://pubs.aip.org/aip/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled", # noqa: E501 + "http://pubs.aip.org/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled", # noqa: E501 + "http://aip.scitation.org/doi/10.1063/1.5081715" + ]), + ] +) +def test_redirect(needs_cloudscraper, urls) -> None: + base = urls[0] + if needs_cloudscraper and cloudscraper is None: + pytest.skip(f"cloudscraper needed to solve CloudFlare challenge on {base}") + for other in urls[1:]: + assert normalize_eq(base, other, expect_diff=True) + + @pytest.mark.net def test_validate_doi() -> None: data = [ From 8e5f3c9228c7ad4348a6af97e8192fac57c3865d Mon Sep 17 00:00:00 2001 From: gesh Date: Sun, 29 Jun 2025 17:13:54 +0300 Subject: [PATCH 27/28] Parametrize tests This eg makes it easier to spot which particular iteration breaks --- tests/test_doi.py | 48 ++++++++++++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/tests/test_doi.py b/tests/test_doi.py index 1668af0..91f9e51 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -67,8 +67,9 @@ def test_redirect(needs_cloudscraper, urls) -> None: @pytest.mark.net -def test_validate_doi() -> None: - data = [ +@pytest.mark.parametrize( + "doi,url", + [ ("10.1063/1.5081715", "https://pubs.aip.org/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled"), # noqa: E501 ("10.1007%2FBF01451751", @@ -82,29 +83,41 @@ def test_validate_doi() -> None: ("10.1016/S0009-2614(97)04014-1", "https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141"), ] - for doi, url in data: - assert normalize_eq(url, validate_doi(doi)) +) +def test_validate_doi(doi, url) -> None: + assert normalize_eq(url, validate_doi(doi)) + - for doi in ["", "asdf"]: - try: - validate_doi(doi) - except ValueError as e: - assert str(e) == "HTTP 404: DOI not found" +@pytest.mark.parametrize( + "doi", + [ + "", + "asdf" + ] +) +def test_validate_invalid_doi(doi) -> None: + try: + validate_doi(doi) + except ValueError as e: + assert str(e) == "HTTP 404: DOI not found" @pytest.mark.net -def test_get_real_url_from_doi() -> None: - data = [ +@pytest.mark.parametrize( + "doi,url", + [ ("10.1016/S0009-2614(97)04014-1", "https://www.sciencedirect.com/science/" "article/abs/pii/S0009261497040141"), ] - for doi, url in data: - assert normalize_eq(url, get_real_url_from_doi(doi)) +) +def test_get_real_url_from_doi(doi, url) -> None: + assert normalize_eq(url, get_real_url_from_doi(doi)) -def test_find_doi_in_line() -> None: - test_data = [ +@pytest.mark.parametrize( + "url, doi", + [ ("http://dx.doi.org/10.1063/1.881498", "10.1063/1.881498"), ("http://dx.doi.org/10.1063%2F1.881498", "10.1063/1.881498"), (2 * "qer " + "var doi = '12345/12345.3'", "12345/12345.3"), @@ -133,8 +146,9 @@ def test_find_doi_in_line() -> None: "10.1016/j.comptc.2018.10.004"), ("doi(10.1038/s41535-018-0103-6;)", "10.1038/s41535-018-0103-6"), ] - for url, doi in test_data: - assert find_doi_in_text(url) == doi +) +def test_find_doi_in_line(url, doi) -> None: + assert find_doi_in_text(url) == doi def test_doi_from_pdf() -> None: From ab9d72ae86c3ef83cdf2e56793875057b0d47788 Mon Sep 17 00:00:00 2001 From: gesh Date: Sun, 29 Jun 2025 18:06:03 +0300 Subject: [PATCH 28/28] Make test_redirect cases prettier --- tests/test_doi.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/test_doi.py b/tests/test_doi.py index 91f9e51..1049b8b 100644 --- a/tests/test_doi.py +++ b/tests/test_doi.py @@ -47,9 +47,15 @@ def normalize_eq(u, v, expect_diff=False): ) +def listmin(param): + if isinstance(param, list): + return min(param) + return "" + + @pytest.mark.net @pytest.mark.parametrize( - "needs_cloudscraper, urls", + "needs_cloudscraper, urls", ids=listmin, argvalues= [ (True, ["http://pubs.aip.org/aip/jcp/article/150/7/074102/197572/Exact-two-component-equation-of-motion-coupled", # noqa: E501