diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..a7cba45 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,3 @@ +[run] +omit = + tests/* diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml new file mode 100644 index 0000000..204f6df --- /dev/null +++ b/.github/workflows/codecov.yml @@ -0,0 +1,37 @@ +name: Run tests and upload coverage report to Codecov + +on: + push: + branches: [dev, main] + +jobs: + coverage: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install Python dependencies + run: | + pip install -r requirements.txt + pip install codecov + pip install -e . + + - name: Run tests with coverage + env: + SKIP_ONLINE_TESTS: "true" + run: | + coverage run -m pytest + coverage xml + + - name: Upload score to Codecov + uses: codecov/codecov-action@v4 + with: + files: coverage.xml + token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/mkdocs.yml b/.github/workflows/mkdocs.yml new file mode 100644 index 0000000..b40ad50 --- /dev/null +++ b/.github/workflows/mkdocs.yml @@ -0,0 +1,39 @@ +name: Build and deploy mkdocs site to GitHub Pages + +on: + push: + branches: [main] + +jobs: + mkdocs: + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Install docs dependencies + run: | + pip install mkdocs + + - name: Generate index.md from README.md + run: | + cp README.md docs/index.md + sed -i 's|docs/guide.md|guide/|g' docs/index.md + mkdir docs/media + cp media/logo.png docs/media/logo.png + + - name: Build mkdocs site + run: mkdocs build + + - name: Deploy site to gh-pages branch + uses: peaceiris/actions-gh-pages@v4 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./site diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..31910b2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.idea/ +*.egg-info/ +.env +__pycache__/ +.coverage +dist/ +coverage.xml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c29ce2f --- /dev/null +++ b/LICENSE @@ -0,0 +1,287 @@ + EUROPEAN UNION PUBLIC LICENCE v. 1.2 + EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined +below) which is provided under the terms of this Licence. Any use of the Work, +other than as authorised under this Licence is prohibited (to the extent such +use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as +defined below) has placed the following notice immediately following the +copyright notice for the Work: + + Licensed under the EUPL + +or has expressed by any other means his willingness to license under the EUPL. + +1. Definitions + +In this Licence, the following terms have the following meaning: + +- ‘The Licence’: this Licence. + +- ‘The Original Work’: the work or software distributed or communicated by the + Licensor under this Licence, available as Source Code and also as Executable + Code as the case may be. + +- ‘Derivative Works’: the works or software that could be created by the + Licensee, based upon the Original Work or modifications thereof. This Licence + does not define the extent of modification or dependence on the Original Work + required in order to classify a work as a Derivative Work; this extent is + determined by copyright law applicable in the country mentioned in Article 15. + +- ‘The Work’: the Original Work or its Derivative Works. + +- ‘The Source Code’: the human-readable form of the Work which is the most + convenient for people to study and modify. + +- ‘The Executable Code’: any code which has generally been compiled and which is + meant to be interpreted by a computer as a program. + +- ‘The Licensor’: the natural or legal person that distributes or communicates + the Work under the Licence. + +- ‘Contributor(s)’: any natural or legal person who modifies the Work under the + Licence, or otherwise contributes to the creation of a Derivative Work. + +- ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of + the Work under the terms of the Licence. + +- ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, + renting, distributing, communicating, transmitting, or otherwise making + available, online or offline, copies of the Work or providing access to its + essential functionalities at the disposal of any other natural or legal + person. + +2. Scope of the rights granted by the Licence + +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, +sublicensable licence to do the following, for the duration of copyright vested +in the Original Work: + +- use the Work in any circumstance and for all usage, +- reproduce the Work, +- modify the Work, and make Derivative Works based upon the Work, +- communicate to the public, including the right to make available or display + the Work or copies thereof to the public and perform publicly, as the case may + be, the Work, +- distribute the Work or copies thereof, +- lend and rent the Work or copies thereof, +- sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now +known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to +exercise his moral right to the extent allowed by law in order to make effective +the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to +any patents held by the Licensor, to the extent necessary to make use of the +rights granted on the Work under this Licence. + +3. Communication of the Source Code + +The Licensor may provide the Work either in its Source Code form, or as +Executable Code. If the Work is provided as Executable Code, the Licensor +provides in addition a machine-readable copy of the Source Code of the Work +along with each copy of the Work that the Licensor distributes or indicates, in +a notice following the copyright notice attached to the Work, a repository where +the Source Code is easily and freely accessible for as long as the Licensor +continues to distribute or communicate the Work. + +4. Limitations on copyright + +Nothing in this Licence is intended to deprive the Licensee of the benefits from +any exception or limitation to the exclusive rights of the rights owners in the +Work, of the exhaustion of those rights or of other applicable limitations +thereto. + +5. Obligations of the Licensee + +The grant of the rights mentioned above is subject to some restrictions and +obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or +trademarks notices and all notices that refer to the Licence and to the +disclaimer of warranties. The Licensee must include a copy of such notices and a +copy of the Licence with every copy of the Work he/she distributes or +communicates. The Licensee must cause any Derivative Work to carry prominent +notices stating that the Work has been modified and the date of modification. + +Copyleft clause: If the Licensee distributes or communicates copies of the +Original Works or Derivative Works, this Distribution or Communication will be +done under the terms of this Licence or of a later version of this Licence +unless the Original Work is expressly distributed only under this version of the +Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee +(becoming Licensor) cannot offer or impose any additional terms or conditions on +the Work or Derivative Work that alter or restrict the terms of the Licence. + +Compatibility clause: If the Licensee Distributes or Communicates Derivative +Works or copies thereof based upon both the Work and another work licensed under +a Compatible Licence, this Distribution or Communication can be done under the +terms of this Compatible Licence. For the sake of this clause, ‘Compatible +Licence’ refers to the licences listed in the appendix attached to this Licence. +Should the Licensee's obligations under the Compatible Licence conflict with +his/her obligations under this Licence, the obligations of the Compatible +Licence shall prevail. + +Provision of Source Code: When distributing or communicating copies of the Work, +the Licensee will provide a machine-readable copy of the Source Code or indicate +a repository where this Source will be easily and freely available for as long +as the Licensee continues to distribute or communicate the Work. + +Legal Protection: This Licence does not grant permission to use the trade names, +trademarks, service marks, or names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the copyright notice. + +6. Chain of Authorship + +The original Licensor warrants that the copyright in the Original Work granted +hereunder is owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she brings +to the Work are owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent +Contributors grant You a licence to their contributions to the Work, under the +terms of this Licence. + +7. Disclaimer of Warranty + +The Work is a work in progress, which is continuously improved by numerous +Contributors. It is not a finished work and may therefore contain defects or +‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ basis +and without warranties of any kind concerning the Work, including without +limitation merchantability, fitness for a particular purpose, absence of defects +or errors, accuracy, non-infringement of intellectual property rights other than +copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a condition +for the grant of any rights to the Work. + +8. Disclaimer of Liability + +Except in the cases of wilful misconduct or damages directly caused to natural +persons, the Licensor will in no event be liable for any direct or indirect, +material or moral, damages of any kind, arising out of the Licence or of the use +of the Work, including without limitation, damages for loss of goodwill, work +stoppage, computer failure or malfunction, loss of data or any commercial +damage, even if the Licensor has been advised of the possibility of such damage. +However, the Licensor will be liable under statutory product liability laws as +far such laws apply to the Work. + +9. Additional agreements + +While distributing the Work, You may choose to conclude an additional agreement, +defining obligations or services consistent with this Licence. However, if +accepting obligations, You may act only on your own behalf and on your sole +responsibility, not on behalf of the original Licensor or any other Contributor, +and only if You agree to indemnify, defend, and hold each Contributor harmless +for any liability incurred by, or claims asserted against such Contributor by +the fact You have accepted any warranty or additional liability. + +10. Acceptance of the Licence + +The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ +placed under the bottom of a window displaying the text of this Licence or by +affirming consent in any other similar way, in accordance with the rules of +applicable law. Clicking on that icon indicates your clear and irrevocable +acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and +conditions by exercising any rights granted to You by Article 2 of this Licence, +such as the use of the Work, the creation by You of a Derivative Work or the +Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public + +In case of any Distribution or Communication of the Work by means of electronic +communication by You (for example, by offering to download the Work from a +remote location) the distribution channel or media (for example, a website) must +at least provide to the public the information requested by the applicable law +regarding the Licensor, the Licence and the way it may be accessible, concluded, +stored and reproduced by the Licensee. + +12. Termination of the Licence + +The Licence and the rights granted hereunder will terminate automatically upon +any breach by the Licensee of the terms of the Licence. + +Such a termination will not terminate the licences of any person who has +received the Work from the Licensee under the Licence, provided such persons +remain in full compliance with the Licence. + +13. Miscellaneous + +Without prejudice of Article 9 above, the Licence represents the complete +agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable +law, this will not affect the validity or enforceability of the Licence as a +whole. Such provision will be construed or reformed so as necessary to make it +valid and enforceable. + +The European Commission may publish other linguistic versions or new versions of +this Licence or updated versions of the Appendix, so far this is required and +reasonable, without reducing the scope of the rights granted by the Licence. New +versions of the Licence will be published with a unique version number. + +All linguistic versions of this Licence, approved by the European Commission, +have identical value. Parties can take advantage of the linguistic version of +their choice. + +14. Jurisdiction + +Without prejudice to specific agreement between parties, + +- any litigation resulting from the interpretation of this License, arising + between the European Union institutions, bodies, offices or agencies, as a + Licensor, and any Licensee, will be subject to the jurisdiction of the Court + of Justice of the European Union, as laid down in article 272 of the Treaty on + the Functioning of the European Union, + +- any litigation arising between other parties and resulting from the + interpretation of this License, will be subject to the exclusive jurisdiction + of the competent court where the Licensor resides or conducts its primary + business. + +15. Applicable Law + +Without prejudice to specific agreement between parties, + +- this Licence shall be governed by the law of the European Union Member State + where the Licensor has his seat, resides or has his registered office, + +- this licence shall be governed by Belgian law if the Licensor has no seat, + residence or registered office inside a European Union Member State. + +Appendix + +‘Compatible Licences’ according to Article 5 EUPL are: + +- GNU General Public License (GPL) v. 2, v. 3 +- GNU Affero General Public License (AGPL) v. 3 +- Open Software License (OSL) v. 2.1, v. 3.0 +- Eclipse Public License (EPL) v. 1.0 +- CeCILL v. 2.0, v. 2.1 +- Mozilla Public Licence (MPL) v. 2 +- GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 +- Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for + works other than software +- European Union Public Licence (EUPL) v. 1.1, v. 1.2 +- Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong + Reciprocity (LiLiQ-R+). + +The European Commission may update this Appendix to later versions of the above +licences without producing a new version of the EUPL, as long as they provide +the rights granted in Article 2 of this Licence and protect the covered Source +Code from exclusive appropriation. + +All other changes or additions to this Appendix require the production of a new +EUPL version. \ No newline at end of file diff --git a/README.md b/README.md index 8334ec1..a7dff64 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,58 @@ -# pystiller -A wrap around the DistillerSR APIs. +# pystiller + +[![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![codecov](https://codecov.io/gh/openefsa/pystiller/branch/main/graph/badge.svg?token=VL7426RVCI)](https://codecov.io/gh/openefsa/pystiller) + +## Overview + +The **pystiller** package provides a pool of functions to query **DistillerSR** +through its APIs. It features authentication and utilities to retrieve data +from DistillerSR projects and reports. + +The package is intended for researchers, analysts, and practitioners who +require convenient programmatic access to DistillerSR data. + +## Installation + +### From PyPi + +``` +pip install pystiller +``` + +### Development version + +To install the latest development version: + +``` +pip install git+https://github.com/openefsa/pystiller.git +``` + +## Requirements + +An active internet connection is required, as the package communicates with +DistillerSR online services to fetch and process data. + +## Usage + +Once installed, load the package as usual: + +```python +from pystiller import * +``` + +Basic usage examples and full documentation are available in the package +[guide](docs/guide.md). + +## Authors and maintainers + +- **Lorenzo Copelli** (author, [ORCID](https://orcid.org/0009-0002-4305-065X)). +- **Fulvio Barizzone** (author, [ORCID](https://orcid.org/0009-0006-3035-520X)). +- **Dayana Stephanie Buzle** (author, [ORCID](https://orcid.org/0009-0003-2990-7431)). +- **Rafael Vieira** (author, [ORCID](https://orcid.org/0009-0009-0289-5438)). +- **Luca Belmonte** (author, maintainer, [ORCID](https://orcid.org/0000-0002-7977-9170)). + +## Links + +- **Homepage**: [GitHub](https://github.com/openefsa/pystiller). +- **Bug Tracker**: [Issues on GitHub](https://github.com/openefsa/pystiller/issues). +- **DistillerSR API Documentation**: [https://apidocs.evidencepartners.com/](https://apidocs.evidencepartners.com/). diff --git a/docs/guide.md b/docs/guide.md new file mode 100644 index 0000000..e0523bb --- /dev/null +++ b/docs/guide.md @@ -0,0 +1,211 @@ +# Introduction to pystiller + +## Overview + +The **pystiller** package provides a pool of functions to query **DistillerSR** +through its APIs. It features authentication and utilities to retrieve data +from DistillerSR projects and reports. + +The package is intended for researchers, analysts, and practitioners who +require convenient programmatic access to DistillerSR data. + +## Installation + +### From PyPi + +``` +pip install pystiller +``` + +### Development version + +To install the latest development version: + +``` +pip install git+https://github.com/openefsa/pystiller.git +``` + +## Requirements + +An active internet connection is required, as the package communicates with +DistillerSR online services to fetch and process data. + +## Working with API keys and environment variables + +The *pystiller* package requires your personal API key provided by DistillerSR. +You can provide your API key in one of two ways: + +1. By setting it in the `.env` file.\ +2. By including it manually in the authentication request. + +### Setting the API key via `.env` + +A `.env` file is used to define environment variables that Python can load at +runtime. This approach is particularly convenient for sensitive information +like API keys, as it allows you to use them in any Python script or function +without hardcoding them. + +Place the `.env` file in the root directory of you project (for example, +`C:/Users/username/Documents/myProject/.env` on Windows or +`~/Documents/myProject/.env` on Unix-like systems). You can create or edit this +file with any plain text editor. + +Add your DistillerSR API key in the following format: + +`DISTILLER_API_KEY=` + +Once the file is saved, the variable will be correctly set for the library to +use during execution. + +### Setting the API key manually for the authentication request + +Alternatively, you can provide the API key directly in the `distiller_key` +argument of the `Client()` constructor. This is useful if you refer not to +store the API key globally. For example: + +```python +from pystiller import Client + +client = Client(distiller_key="") +``` + +Note that if an API key is explicitly provided, the API key set through the +`.env` file will be ignored, if any. + +### Setting the DistillerSR instance URL + +The *pystiller* package needs to know the instance URL on which DistillerSR is +running to function properly. You can provide the instance URL in one of two +ways: + +1. By setting it in the `.env` file.\ +2. By including it manually in each API request. + +If you prefer to store the URL in the `.env` file, add your DistillerSR +instance URL in the following format: + +`DISTILLER_INSTANCE_URL=` + +After saving the file, R will automatically read the API key on startup. + +Alternatively, you can provide the instance URL directly in the +`distiller_instance_url` argument of the `Client()` constructor. This is useful +if you refer not to store the instance URL globally. For example: + +```python +from pystiller import Client + +client = Client(distiller_instance_url="") +``` + +## Basic usage + +The main purpose of *pystiller* is to query the DistillerSR APIs for specific +project or report codes and retrieve relevant information across various +endpoints. + +Below are examples demonstrating how to use the functions in this package. +First, load the *pystiller* package: + +```python +from pystiller import * +``` + +Then, initialize the client by specifying the API key and/or the instance URL +you want to use: + +```python +# Use the API key and the instance URL defined in .env file. +client = Client() +# Manually define the API key and the instance URL. +client = Client( + distiller_key="", + distiller_instance_url="" +) +``` + +To explore the arguments and usage of a specific function, you can run: + +```python +help(function_name) +``` + +This will show the full documentation for the function, including its +arguments, return values, and usage examples. + +For example, if you are working with the `Client.get_report()` function, +you can check its documentation with: + +```python +help(Client.get_report) +``` + +## Getting an authentication token + +Before using functions of this package, you must obtain an authentication token +derived from the API key provided by DistillerSR. The client automatically +requests the token upon creation using the specified API key. + +By default, Distiller tokens expire after 60 minutes (1 hour). Automatic +refreshes of the token can be enabled by setting the `automatic_token_refresh` +parameter to `True` during client initialisation. For example: + +```python +client = Client(automatic_token_refresh=True) +``` + +The obtained token can be used to perform API calls using the +`Client.get_projects()`, `Client.get_reports()`, and `Client.get_report()` +functions. + +## Getting the list of projects associated with the user + +If you want to retrieve the list of all the available projects associated with +your DistillerSR account, you can browse them with the `Client.get_projects()` +function, as follows: + +```python +client = Client() + +projects = client.get_projects() + +print(projects) +``` + +## Getting the list of reports associated with a project + +Each individual project has its own associated set of projects. You can +retrieve the list of associated reports with the `Client.get_reports()` +function, as follows: + +```python +client = Client() + +reports = client.get_reports(project_id=123) + +print(reports) +``` + +## Getting a specific report + +You can retrieve a specific report with the `Client.get_report()` function by +specifying a project ID and a report ID, as follows: + +```python +client = Client() + +project_id_ = 123 +report_id_ = 456 + +report = client.get_report( + projectId=project_id_, + reportId=report_id_, + format=ReportFormat.CSV +) + +print(report.head()) +``` + +Note that for very large reports, CSV files are generally a better choice. +Exporting to Excel may cause issues when tables exceed one million rows, +whereas CSV handles large datasets more reliably. diff --git a/media/logo.png b/media/logo.png new file mode 100644 index 0000000..4f81af6 Binary files /dev/null and b/media/logo.png differ diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..2f13b11 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,13 @@ +site_name: pystiller +site_description: A wrap around the DistillerSR APIs +site_url: https://openefsa.github.io/pystiller +repo_url: https://github.com/openefsa/pystiller +docs_dir: docs +site_dir: site + +theme: + name: mkdocs + +nav: + - Home: 'index.md' + - 'Get started': 'guide.md' \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e99f27a --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["setuptools>=61", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "pystiller" +version = "1.0.0" +description = "A wrap around the DistillerSR APIs" +readme = "README.md" +authors = [ + { name = "Lorenzo Copelli" }, + { name = "Fulvio Barizzone" }, + { name = "Dayana Stephanie Buzle" }, + { name = "Rafael Vieira" }, + { name = "Luca Belmonte" } +] +maintainers = [ + { name = "Luca Belmonte", email = "luca.belmonte@efsa.europa.eu" } +] +license = "EUPL-1.2" +license-files = ["LICENSE"] +requires-python = ">=3.11" +dependencies = [ + "numpy>=2.0", + "pandas>=2.2", + "requests>=2.32", + "python-dotenv>=1.0" +] + +[project.optional-dependencies] +dev = [ + "openpyxl>=3.1.5", + "coverage>=7.6", + "pytest>=8.0", +] + +[project.urls] +"Homepage" = "https://github.com/openefsa/pystiller" +"Repository" = "https://github.com/openefsa/pystiller" +"Bug Tracker" = "https://github.com/openefsa/pystiller/issues" + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..af97454 Binary files /dev/null and b/requirements.txt differ diff --git a/src/pystiller/__init__.py b/src/pystiller/__init__.py new file mode 100644 index 0000000..f920f1f --- /dev/null +++ b/src/pystiller/__init__.py @@ -0,0 +1,7 @@ +from .client import Client +from ._core._datarama import ReportFormat + +__all__ = [ + "Client", + "ReportFormat" +] diff --git a/src/pystiller/_core/__init__.py b/src/pystiller/_core/__init__.py new file mode 100644 index 0000000..a9a2c5b --- /dev/null +++ b/src/pystiller/_core/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/src/pystiller/_core/_authentication.py b/src/pystiller/_core/_authentication.py new file mode 100644 index 0000000..c269ea8 --- /dev/null +++ b/src/pystiller/_core/_authentication.py @@ -0,0 +1,42 @@ +"""This module contains core functions for working with the Authentication +endpoint of the DistillerSR API. +""" + +from pystiller._utils import _checks, _requests + + +def _get_authentication_token(distiller_instance_url, distiller_key, + timeout=1800): + """Authenticate to a DistillerSR session. + + This helper function authenticates a user to a DistillerSR instance + using the personal access key. The function sets a valid + authentication token that can be used to access protected DistillerSR + API endpoints and records the exact date and time the token was issued, + which can be used to manage future refresh operations. + + Args: + distiller_instance_url (str): The URL of the DistillerSR instance. + distiller_key (str): The personal access key generated in DistillerSR. + timeout (int, optional): The maximum number of seconds to wait for the + authentication response. Defaults to 1800 seconds (30 minutes). + + Returns: + str: The obtained DistillerSR authentication token. + """ + + _checks._require_type(value=distiller_instance_url, expected_type=str) + _checks._require_type(value=distiller_key, expected_type=str) + _checks._require_type(value=timeout, expected_type=int) + + authentication_response_ = _requests._perform_authentication_request( + distiller_instance_url=distiller_instance_url, + distiller_key=distiller_key, + timeout=timeout) + + _requests._handle_http_errors(authentication_response_) + + response_data_ = _requests._parse_json_response( + authentication_response_) + + return response_data_["token"] diff --git a/src/pystiller/_core/_datarama.py b/src/pystiller/_core/_datarama.py new file mode 100644 index 0000000..a33d20f --- /dev/null +++ b/src/pystiller/_core/_datarama.py @@ -0,0 +1,154 @@ +"""This module contains core functions for working with the Datarama endpoints +of the DistillerSR API. +""" +import time +import pandas as pd +from enum import StrEnum + +from pystiller._utils import _checks, _requests + + +class ReportFormat(StrEnum): + """The supported report formats.""" + CSV = "csv", + EXCEL = "excel" + + +def _get_reports(project_id, distiller_instance_url, distiller_token, + timeout=1800): + """Get the list of the Distiller reports associated to a project. + + This internal function queries the DistillerSR API to retrieve the list of + reports associated with a project. The result is a data frame listing + available reports. + + Args: + project_id (int): The ID of the project as provided by DistillerSR. + distiller_instance_url (str): The URL of the DistillerSR instance. + distiller_token (str): The Distiller authentication token. + timeout (int, optional): The maximum number of seconds to wait for the + service response. Defaults to 1800 seconds (30 minutes). + + Returns: + pd.DataFrame: A data frame with four columns: + - id: The project ID. + - name: The project name. + - date: The creation date of the report. + - view: The format of the report (e.g., html, csv, excel). + """ + + _checks._require_type(value=project_id, expected_type=int) + _checks._require_type(value=distiller_instance_url, expected_type=str) + _checks._require_type(value=distiller_token, expected_type=str) + _checks._require_type(value=timeout, expected_type=int) + + reports_url_ = (f"{distiller_instance_url}/projects/{project_id}" + + "/reports/datarama") + + service_response_ = _requests._perform_service_request( + service_url=reports_url_, + distiller_token=distiller_token, + timeout=timeout) + + _requests._handle_http_errors( + response=service_response_, + error_message="Unable to retrieve reports") + + response_data_ = _requests._parse_json_response( + response=service_response_, + error_message="Failed to parse reports service response") + + response_data_ = pd.DataFrame(response_data_) + + return response_data_ + + +def _get_report(project_id, report_id, distiller_instance_url, distiller_token, + report_format = ReportFormat.CSV, timeout=1800, attempts=1, + retry_each=600, verbose=True): + """Get a Distiller report associated to a project. + + This internal function queries the DistillerSR API to retrieve a saved + report associated with a project. The result is a data frame containing + metadata about the saved report. + + Args: + project_id (int): The ID of the project as provided by DistillerSR. + report_id (int): The ID of the report as provided by DistillerSR. + distiller_instance_url (str): The URL of the DistillerSR instance. + distiller_token (str): The Distiller authentication token. + report_format (ReportFormat, optional): The desired format of the + document. Defaults to CSV (Comma Separated Values). + timeout (int, optional): The maximum number of seconds to wait for the + service response. Defaults to 1800 seconds (30 minutes). + attempts (int, optional): The maximum number of attempts. Defaults to 1 + attempt. + retry_each (int, optional): The delay between attempts. Defaults to + 600 seconds (10 minutes). + verbose (bool, optional): A flag to specify whether to make the + function verbose or not. Defaults to True. + + Returns: + pd.DataFrame: A data frame containing the Distiller report as designed + within DistillerSR. + """ + + _checks._require_type(value=project_id, expected_type=int) + _checks._require_type(value=report_id, expected_type=int) + _checks._require_type(value=distiller_instance_url, expected_type=str) + _checks._require_type(value=distiller_token, expected_type=str) + _checks._require_type(value=report_format, expected_type=ReportFormat) + _checks._require_type(value=timeout, expected_type=int) + _checks._require_type(value=attempts, expected_type=int) + _checks._require_minimum(value=attempts, minimum=1) + _checks._require_type(value=retry_each, expected_type=int) + _checks._require_minimum(value=retry_each, minimum=0) + + report_url_ = f"{distiller_instance_url}/datarama/query" + + request_body_ = { + "project_id": project_id, + "saved_report_id": report_id, + "use_saved_format": True + } + + for attempt_ in range(attempts): + if verbose and attempts > 1: + print(f"Starting attempt {attempt_ + 1}...") + try: + service_response_ = _requests._perform_service_request( + service_url=report_url_, + distiller_token=distiller_token, + body=request_body_, + timeout=timeout) + + _requests._handle_http_errors( + response=service_response_, + error_message=f"Unable to retrieve report {report_id}") + + if report_format == ReportFormat.CSV: + response_data_ = _requests._parse_csv_response( + response=service_response_, + error_message="Failed to parse CSV for report " + + f"{report_id}") + else: + response_data_ = _requests._parse_xlsx_response( + response=service_response_, + error_message="Failed to parse XLSX for report " + + f"{report_id}") + + response_data_ = pd.DataFrame(response_data_) + + return response_data_ + + except Exception as e_: + if verbose: + print(f"Attempt failed with reason:\n{e_}") + + if attempt_ < attempts: + if verbose: + print(f"Sleeping for {retry_each} seconds...") + time.sleep(retry_each) + + raise RuntimeError(f"Unable to retrieve report {report_id}\nAll " + + "attempts to retrieve the report failed") diff --git a/src/pystiller/_core/_projects.py b/src/pystiller/_core/_projects.py new file mode 100644 index 0000000..e56af8d --- /dev/null +++ b/src/pystiller/_core/_projects.py @@ -0,0 +1,52 @@ +"""This module contains core functions for working with the Projects endpoints +of the DistillerSR API. +""" + +import pandas as pd + +from pystiller._utils import _checks, _requests + + +def _get_projects(distiller_instance_url, distiller_token, timeout=1800): + """Get the list of the Distiller projects associated to the user. + + This internal function queries the DistillerSR API to retrieve the list of + projects accessible to the authenticated user. The result is a data frame + listing available projects. + + Args: + distiller_instance_url (str): The URL of the DistillerSR instance. + distiller_token (str): The Distiller authentication token. + timeout (int, optional): The maximum number of seconds to wait for the + service response. Defaults to 1800 seconds (30 minutes). + + Returns: + pd.DataFrame: A data frame with four columns: + - id: The project ID. + - name: The project name. + - de_project_id. + - is_hidden. + """ + + _checks._require_type(value=distiller_instance_url, expected_type=str) + _checks._require_type(value=distiller_token, expected_type=str) + _checks._require_type(value=timeout, expected_type=int) + + projects_url_ = f"{distiller_instance_url}/projects" + + service_response_ = _requests._perform_service_request( + service_url=projects_url_, + distiller_token=distiller_token, + timeout=timeout) + + _requests._handle_http_errors( + response=service_response_, + error_message="Unable to retrieve projects") + + response_data_ = _requests._parse_json_response( + response=service_response_, + error_message="Failed to parse projects service response") + + response_data_ = pd.DataFrame(response_data_) + + return response_data_ diff --git a/src/pystiller/_utils/__init__.py b/src/pystiller/_utils/__init__.py new file mode 100644 index 0000000..a9a2c5b --- /dev/null +++ b/src/pystiller/_utils/__init__.py @@ -0,0 +1 @@ +__all__ = [] diff --git a/src/pystiller/_utils/_checks.py b/src/pystiller/_utils/_checks.py new file mode 100644 index 0000000..d630cc6 --- /dev/null +++ b/src/pystiller/_utils/_checks.py @@ -0,0 +1,60 @@ +"""This module contains internal functions for performing type and data checks. +""" + + +def _require_type(value, expected_type): + """Check that a value is of the expected type. + + Args: + value: The value to check. + expected_type: The expected type. + + Raises: + TypeError: If the value is not of the expected type. + + Returns: + None: The function returns nothing if the check passes. + """ + + if not isinstance(value, expected_type): + raise TypeError(f"Expected type {expected_type}, got {type(value)}") + + +def _require_string_not_empty(value): + """Check that a string is not empty. + + Args: + value: The string to check. + + Raises: + ValueError: If the string is empty. + + Returns: + None: The function returns nothing if the check passes. + """ + + _require_type(value=value, expected_type=str) + + if not value.strip(): + raise ValueError("Expected non-empty string, got empty string") + + +def _require_minimum(value, minimum): + """Check that a value is not less than the given minimum. + + Args: + value: The value to check. + minimum: The minimum value. + + Raises: + ValueError: If the value is not less than the given minimum. + + Returns: + None: The function returns nothing if the check passes. + """ + + _require_type(value=value, expected_type=int) + _require_type(value=minimum, expected_type=int) + + if value < minimum: + raise ValueError(f"Expected value >= {minimum}, got {value}") diff --git a/src/pystiller/_utils/_data.py b/src/pystiller/_utils/_data.py new file mode 100644 index 0000000..dbd67b4 --- /dev/null +++ b/src/pystiller/_utils/_data.py @@ -0,0 +1,195 @@ +"""This module contains internal functions for processing API response data.""" + +import pandas as pd + + +def _flatten(nested_data, separator='_'): + """Flattens nested JSON structures into a pandas DataFrame. + + This helper function automatically expands lists into multiple rows and + flattens nested dictionaries. + + Args: + nested_data: The dictionary or list of dictionaries to flatten. + separator (str): The separator to use for nested columns names. + Defaults to '_'. + + Raises: + TypeError: If data is not a dictionary or a list of dictionaries. Data + can be empty. + + Returns: + pandas.DataFrame: A flattened DataFrame, where: + - Lists are expanded into multiple rows. + - Nested dictionaries become columns with separator-notation. + - Parent-level fields are repeated for each child row. + """ + + def _flatten_dictionary(dictionary, sep, parent_key=''): + """Flatten a nested dictionary without expanding lists. + + This helper function recursively traverses a dictionary and creates + flat key-value pairs for nested keys, using the specified separator. + Lists are kept as-is and not expanded by this function. + + Args: + dictionary (dict): The dictionary to flatten. + sep (str): The separator to use between parent and child keys. + parent_key (str, optional): The parent key for the flattened + dictionary. It is the prefix to prepend to keys (used in + recursion). Defaults to ''. + + Returns: + dict: A flattened dictionary with sep-notated keys for nested + structures. + """ + + dictionary_items_ = [] + + for key_, value_ in dictionary.items(): + new_key_ = f"{parent_key}{sep}{key_}" if parent_key else key_ + if isinstance(value_, dict): + dictionary_items_.extend( + _flatten_dictionary(dictionary=value_, sep=sep, + parent_key=new_key_).items()) + else: + dictionary_items_.append((new_key_, value_)) + + return dict(dictionary_items_) + + + def _is_dictionary_of_lists(dictionary): + """Check if a dictionary contains only lists as values. + + This helper function identifies the special case where the top-level + dictionary keys represent categories amd all values are lists. + + Args: + dictionary (dict): The dictionary to check. + + Returns: + bool: True if all values in the dictionary are lists, False + otherwise. + """ + + return all(isinstance(value_, list) for value_ in dictionary.values()) + + + def _expand_dictionary_of_lists(dictionary, sep): + """Expand a dictionary where all values are lists. + + This helper function handles the special case where each top-level key + maps to a list of items. It creates a "key" column to preserve the + original dictionary key and expands each list item into a separate row. + + Args: + dictionary (dict): The dictionary where all values are lists. + sep (str): The separator for flattening nested dictionaries within + list items. + + Returns: + pandas.DataFrame: DataFrame with a "key" column containing the + original dictionary key and additional columns from the list + items. + """ + + expanded_rows_ = [] + + for key_, value_ in dictionary.items(): + for item_ in value_: + if isinstance(item_, dict): + row_ = {"parent_key": key_} + flat_item_ = _flatten_dictionary(dictionary=item_, sep=sep) + row_.update(flat_item_) + expanded_rows_.append(row_) + else: + expanded_rows_.append({"parent_key": key_, + "atomic_value": item_}) + + return pd.DataFrame(expanded_rows_) + + + def _find_list_columns(dictionary): + """Finds dictionary keys that contain lists of dictionaries. + + This helper function identifies columns that need to be expanded into + multiple rows. It only considers lists that contain at least one + dictionary. + + Args: + dictionary (dict): The dictionary to check. + + Returns: + list: The list of keys whose values are lists of dictionaries. + """ + + list_keys_ = [] + + for key_, value_ in dictionary.items(): + if (isinstance(value_, list) and value_ + and any(isinstance(item_, dict) for item_ in value_)): + list_keys_.append(key_) + + return list_keys_ + + + def _expand_lists(dictionary, sep): + """Recursively expand lists in a dictionary into DataFrame rows. + + This helper is the main recursive function that handles the expansion + logic. It detects different types of structures and applies the + appropriate expansion strategy. It processes one list level at a time + and recurses for additional nested lists. + + Args: + dictionary (dict): The dictionary to process. + sep (str): The separator for nested column names. + + Returns: + pandas.DataFrame: A DataFrame with all lists expanded into rows and + nested dictionaries flattened. + """ + + if _is_dictionary_of_lists(dictionary=dictionary): + return _expand_dictionary_of_lists(dictionary=dictionary, sep=sep) + + list_keys_ = _find_list_columns(dictionary=dictionary) + + if not list_keys_: + flat_ = _flatten_dictionary(dictionary=dictionary, sep=sep) + return pd.DataFrame([flat_]) + + list_key_ = list_keys_[0] + list_data_ = dictionary[list_key_] + + base_data_ = {key_: value_ for key_, value_ in dictionary.items() + if key_ != list_key_} + + expanded_rows_ = [] + + for item_ in list_data_: + row_data_ = base_data_.copy() + + if isinstance(item_, dict): + for key_, value_ in item_.items(): + row_data_[f"{list_key_}{sep}{key_}"] = value_ + else: + row_data_[list_key_] = item_ + + expanded_rows_.append(_expand_lists(dictionary=row_data_, sep=sep)) + + return pd.concat(expanded_rows_, ignore_index=True) + + if not nested_data: + return pd.DataFrame() + + elif (isinstance(nested_data, list) + and all(isinstance(item_, dict) for item_ in nested_data)): + dataframes_ = [_expand_lists(dictionary=item_, sep=separator) + for item_ in nested_data] + return pd.concat(dataframes_, ignore_index=True) + + elif isinstance(nested_data, dict): + return _expand_lists(dictionary=nested_data, sep=separator) + + raise TypeError("Data must empty, a dictionary or a list of dictionaries") diff --git a/src/pystiller/_utils/_env.py b/src/pystiller/_utils/_env.py new file mode 100644 index 0000000..0a25dc7 --- /dev/null +++ b/src/pystiller/_utils/_env.py @@ -0,0 +1,32 @@ +"""This module contains internal functions for working with environment +variables.""" + +import os +from dotenv import load_dotenv + +from pystiller._utils import _checks + + +def _read_environment_variable(name): + """Reads an environment variable. + + Args: + name: The name of the environment variable to read. + + Raises: + ValueError: If the value is not set. + + Returns: + str: The value of the environment variable. + """ + + _checks._require_type(value=name, expected_type=str) + + load_dotenv() + + environment_variable_ = os.getenv(name) + + if environment_variable_ is None: + raise ValueError(f"The {name} environment variable is not set") + + return environment_variable_ diff --git a/src/pystiller/_utils/_requests.py b/src/pystiller/_utils/_requests.py new file mode 100644 index 0000000..3e9e9cf --- /dev/null +++ b/src/pystiller/_utils/_requests.py @@ -0,0 +1,217 @@ +"""This module contains internal functions for working with DistillerSR API +requests.""" + +import io +import requests +import pandas as pd +from json import JSONDecodeError + +from pystiller._utils import _checks, _data + + +def _perform_authentication_request(distiller_instance_url, distiller_key, + timeout=1800): + """Build and execute an authentication request for the DistillerSR API. + + This helper function configures and sends a GET authentication request to + the DistillerSR API, setting the necessary headers and authentication key. + It then performs the request and returns the corresponding response data. + + Args: + distiller_instance_url (str): The URL of the DistillerSR instance. + distiller_key (str): The personal access key generated in DistillerSR. + timeout (int, optional): The maximum number of seconds to wait for the + authentication response. Defaults to 1800 seconds (30 minutes). + + Returns: + class (requests.Response): The HTTP response object returned by + the request. + """ + + _checks._require_type(value=distiller_instance_url, expected_type=str) + _checks._require_type(value=distiller_key, expected_type=str) + _checks._require_type(value=timeout, expected_type=int) + + request_headers_ = { + "Authorization": f"Key {distiller_key}", + "Content-Type": "application/octet-stream" + } + + response_ = requests.post( + f"{distiller_instance_url}/auth", + headers=request_headers_, + timeout=timeout) + + return response_ + + +def _perform_service_request(service_url, distiller_token, body=None, + timeout=1800): + """Build and execute a service request to the DistillerSR API. + + This helper function configures and sends a service request to the + DistillerSR API, setting the necessary headers and authentication key. + It then performs the request and returns the corresponding response data. + + Args: + service_url (str): The URL of the service endpoint. + distiller_token (str): The personal access token generated by the + authentication request. + body (dict, optional): A dictionary containing the body parameters to + be encoded into JSON format. + timeout (int, optional): The maximum number of seconds to wait for the + service response. Defaults to 1800 seconds (30 minutes). + + Returns: + class (requests.Response): The HTTP response object returned by + the request. + """ + + _checks._require_type(value=service_url, expected_type=str) + _checks._require_type(value=distiller_token, expected_type=str) + if body is not None: + _checks._require_type(value=body, expected_type=dict) + _checks._require_type(value=timeout, expected_type=int) + + request_headers_ = { + "Authorization": f"Bearer {distiller_token}" + } + + if body is None: + request_headers_["Content-Type"] = "application/octet-stream" + + response_ = requests.get( + url=service_url, + headers=request_headers_, + timeout=timeout) + + else: + request_headers_["Content-Type"] = "application/json" + + response_ = requests.post( + url=service_url, + headers=request_headers_, + json=body, + timeout=timeout) + + return response_ + + +def _handle_http_errors(response, error_message="API request failed"): + """Handle non-successful HTTP responses from the DistillerSR API. + + This helper function checks whether an HTTP response from the DistillerSR + API indicates success (status code 200). If the response contains any other + status code, it raises a formatted error. + + Args: + response (requests.Response): The HTTP response object. + error_message (str, optional): The error message to be displayed. + + Raises: + requests.exceptions.HTTPError: If the request was not successful. + + Returns: + None: The function returns nothing if the request was successful. + """ + + _checks._require_type(value=response, expected_type=requests.Response) + _checks._require_type(value=error_message, expected_type=str) + + if response.status_code != 200: + raise requests.HTTPError(f"{error_message}\n(HTTP " + + f"{response.status_code})") + + +def _parse_json_response(response, + error_message="Failed to parse JSON response", + flatten=False): + """Parse a JSON API response. + + This helper function parses the JSON body of an API response. If the API + response body cannot be parsed as valid JSON, the function raises an error. + + Args: + response (requests.Response): The HTTP response object. + error_message (str, optional): The message to display in case of + errors. + flatten (bool, optional): If True, flattens the response body into a + data frame. Defaults to False. + + Raises: + JSONDecodeError: If the response body can not be parsed as valid JSON. + + Returns: + pd.DataFrame: A Pandas DataFrame representing the parsed JSON response. + """ + + _checks._require_type(value=response, expected_type=requests.Response) + _checks._require_type(value=error_message, expected_type=str) + + try: + response_data_ = response.json() + if flatten: + response_data_ = _data._flatten(response_data_) + except (JSONDecodeError, KeyError) as e_: + raise JSONDecodeError(f"{error_message}\n{e_.msg}", + e_.doc, e_.pos) + + return response_data_ + + +def _parse_csv_response(response, + error_message="Failed to parse CSV response"): + """Parse a CSV API response. + + This helper function parses the CSV body of an API response. If the API + response body cannot be parsed as valid CSV, the function raises an error. + + Args: + response (requests.Response): The HTTP response object. + error_message (str, optional): The message to display in case of errors. + + Raises: + ValueError: If the response body can not be parsed as valid CSV. + + Returns: + pd.DataFrame: A dictionary representing the parsed CSV response. + """ + + _checks._require_type(value=response, expected_type=requests.Response) + _checks._require_type(value=error_message, expected_type=str) + + try: + response_csv_ = pd.read_csv(io.StringIO(response.text)) + except Exception as e_: + raise ValueError(f"{error_message}\n{str(e_)}") + + return response_csv_ + + +def _parse_xlsx_response(response, + error_message="Failed to parse XLSX response"): + """Parse an XLSX API response. + + This helper function parses the XLSX body of an API response. If the API + response body cannot be parsed as valid XLSX, the function raises an error. + + Args: + response (requests.Response): The HTTP response object. + error_message (str, optional): The message to display in case of errors. + + Raises: + ValueError: If the response body can not be parsed as valid XLSX. + + Returns: + pd.DataFrame: A dictionary representing the parsed XLSX response. + """ + + _checks._require_type(value=response, expected_type=requests.Response) + _checks._require_type(value=error_message, expected_type=str) + + try: + response_xlsx_ = pd.read_excel(io.BytesIO(response.content)) + except Exception as e_: + raise ValueError(f"{error_message}\n{str(e_)}") + + return response_xlsx_ diff --git a/src/pystiller/client.py b/src/pystiller/client.py new file mode 100644 index 0000000..a68c128 --- /dev/null +++ b/src/pystiller/client.py @@ -0,0 +1,242 @@ +from datetime import datetime, timedelta + +from pystiller._core._datarama import ReportFormat +from pystiller._utils import _checks, _env +from pystiller._core import _authentication, _projects, _datarama + + +class Client: + """Client class for working with the DistillerSR API. + + Attributes: + _distiller_key (str): The API key used for authentication. + _distiller_instance_url (str): The Distiller instance URL. + _distiller_token (str): The Distiller authorization token. + _automatic_token_refresh (bool): If True, automatic token refresh is + performed when the token is going to expire. + _distiller_token_last_update (datetime.datetime): The datetime of the + last Distiller authentication token update (needed for refreshes). + + Methods: + get_projects(): Gets the list of Distiller projects associated with the + user. + get_reports(project_id): Gets the list of Distiller reports associated + to a project. + get_report(project_id, report_id): Retrieves a specific Distiller + report. + _get_or_refresh_token(): Checks if the Distiller token is still valid. + If not, it will request a new one. If the token is not set (e.g., + at client initialisation), it will request it. + """ + + def __init__(self, distiller_key=None, distiller_instance_url=None, + automatic_token_refresh=False): + """Initialize the client. + + Args: + distiller_key (str, optional): The API key used for authentication. + distiller_instance_url (str, optional): The Distiller instance URL. + automatic_token_refresh (bool, optional): If True, automatically + refresh the Distiller token if it is going to expire. Defaults + to False. + + Examples: + >>> from pystiller import Client + + >>> # Create a client using the API key and the instance URL + >>> # defined in the .env file. + >>> client_with_default = Client() + + >>> # Create a client using manually specified API key and instance + >>> # URL. + >>> client_with_customs = Client( + >>> distiller_key="", + >>> distiller_instance_url="" + >>> ) + """ + + if distiller_key is not None: + self._distiller_key = distiller_key + else: + self._distiller_key = _env._read_environment_variable( + name="DISTILLER_API_KEY") + + _checks._require_type(value=self._distiller_key, expected_type=str) + _checks._require_string_not_empty(value=self._distiller_key) + + if distiller_instance_url is not None: + self._distiller_instance_url = distiller_instance_url + else: + self._distiller_instance_url = _env._read_environment_variable( + name="DISTILLER_INSTANCE_URL") + + _checks._require_type(value=self._distiller_instance_url, + expected_type=str) + _checks._require_string_not_empty(value=self._distiller_instance_url) + + if self._distiller_instance_url.endswith('/'): + self._distiller_instance_url = self._distiller_instance_url[:-1] + + self._automatic_token_refresh = automatic_token_refresh + _checks._require_type(value=self._automatic_token_refresh, + expected_type=bool) + + self._distiller_token = None + self._distiller_token_last_update = None + + self._get_or_refresh_token() + + + def _get_or_refresh_token(self): + """Get or refresh a Distiller token. + + This helper function checks if the Distiller token is still valid. If + not, it will request a new one. If the token is not set (e.g., at + client initialisation), it will request it. Checks are based on the + last token update timestamp. The default duration for Distiller tokens + is 60 minutes (1 hour). The refresh will happen if the token is older + than 55 minutes. + + Returns: + None: The functions returns nothing. + """ + + token_missing_ = not self._distiller_token + + now_ = datetime.now() + + token_expired_ = ( + self._distiller_token_last_update and + now_ - self._distiller_token_last_update >= timedelta(minutes=55) + ) + + if token_missing_ or token_expired_: + self._distiller_token = _authentication._get_authentication_token( + distiller_instance_url=self._distiller_instance_url, + distiller_key=self._distiller_key) + self._distiller_token_last_update = now_ + + + def get_projects(self, timeout=1800): + """Get the list of the Distiller projects associated to the user. + + This function queries the DistillerSR API to retrieve the list of + projects accessible to the authenticated user. The result is a data + frame listing available projects. + + Args: + timeout (int, optional): The maximum number of seconds to wait for + the response. Defaults to 1800 seconds (30 minutes). + + Returns: + pd.DataFrame: A data frame with four columns: + - id: The project ID. + - name: The project name. + - de_project_id. + - is_hidden. + + Examples: + >>> from pystiller import Client + + >>> client = Client() + + >>> # Get the list of available projects. + >>> projects = client.get_projects() + """ + + if self._automatic_token_refresh: + self._get_or_refresh_token() + + return _projects._get_projects( + distiller_instance_url=self._distiller_instance_url, + distiller_token=self._distiller_token, + timeout=timeout) + + + def get_reports(self, project_id, timeout=1800): + """Get the list of the Distiller reports associated to a project. + + This function queries the DistillerSR API to retrieve the list of + reports associated with a project. The result is a data frame listing + available reports. + + Args: + project_id (int): The ID of the project as provided by DistillerSR. + timeout (int, optional): The maximum number of seconds to wait for + the service response. Defaults to 1800 seconds (30 minutes). + + Returns: + pd.DataFrame: A data frame with four columns: + - id: The project ID. + - name: The project name. + - date: The creation date of the report. + - view: The format of the report (e.g., html, csv, excel). + + Examples: + >>> from pystiller import Client + + >>> client = Client() + + >>> # Get the list of available reports. + >>> reports = client.get_reports(project_id=123) + """ + + if self._automatic_token_refresh: + self._get_or_refresh_token() + + return _datarama._get_reports( + project_id=project_id, + distiller_instance_url=self._distiller_instance_url, + distiller_token=self._distiller_token, + timeout=timeout) + + + def get_report(self, project_id, report_id, + report_format = ReportFormat.CSV, timeout=1800, attempts=1, + retry_each=600, verbose=True): + """Get a Distiller report associated to a project. + + This function queries the DistillerSR API to retrieve a saved report + associated with a project. The result is a data frame containing + metadata about the saved report. + + Args: + project_id (int): The ID of the project as provided by DistillerSR. + report_id (int): The ID of the report as provided by DistillerSR. + report_format (ReportFormat, optional): The desired format of the + document. Defaults to CSV (Comma Separated Values). + timeout (int, optional): The maximum number of seconds to wait for + the service response. Defaults to 1800 seconds (30 minutes). + attempts (int, optional): The maximum number of attempts. Defaults + to 1 attempt. + retry_each (int, optional): The delay between attempts. Defaults to + 600 seconds (10 minutes). + verbose (bool, optional): A flag to specify whether to make the + function verbose or not. Defaults to True. + + Returns: + pd.DataFrame: A data frame containing the Distiller report as + designed within DistillerSR. + + Examples: + >>> from pystiller import Client, ReportFormat + + >>> client = Client() + + >>> # Get a specific report. + >>> report = client.get_report(project_id=123, report_id=456) + """ + + if self._automatic_token_refresh: + self._get_or_refresh_token() + + return _datarama._get_report( + project_id=project_id, + report_id=report_id, + distiller_instance_url=self._distiller_instance_url, + distiller_token=self._distiller_token, + report_format=report_format, + timeout=timeout, + attempts=attempts, + retry_each=retry_each, + verbose=verbose) diff --git a/tests/test__authentication.py b/tests/test__authentication.py new file mode 100644 index 0000000..f995271 --- /dev/null +++ b/tests/test__authentication.py @@ -0,0 +1,77 @@ +import os +import json +import unittest +from unittest.mock import patch +import requests +from dotenv import load_dotenv +from requests import Response + +from pystiller._core._authentication import _get_authentication_token + +load_dotenv() + + +class TestAuthentication(unittest.TestCase): + + ############################### + # _get_authentication_token() # + ############################### + + def test__get_authentication_token_types(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _get_authentication_token, + distiller_instance_url=123, distiller_key="") + self.assertRaises(TypeError, _get_authentication_token, + distiller_instance_url="", distiller_key=123) + self.assertRaises(TypeError, _get_authentication_token, + distiller_instance_url="", distiller_key="", + timeout="") + + @patch("pystiller._core._authentication._requests." + + "_perform_authentication_request") + def test__get_authentication_token_bad_url(self, mock_auth_req): + """Test the behaviour for bad instance URLs.""" + mock_auth_req.side_effect = requests.exceptions.ConnectionError + self.assertRaises(Exception, _get_authentication_token, + distiller_instance_url="https://invalid-domain", + distiller_key="DISTILLER_API_KEY") + + # This test performs real requests. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__get_authentication_token_bad_url_online(self): + """Test the behaviour for bad instance URLs.""" + self.assertRaises(Exception, _get_authentication_token, + distiller_instance_url="https://invalid-domain", + distiller_key="DISTILLER_API_KEY") + + @patch("pystiller._core._authentication._requests." + + "_perform_authentication_request") + def test__get_authentication_token_output(self, mock_auth_req): + """Test the output type of the request.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "GET" + response_.headers["Content-Type"] = "application/json" + response_._content = (json.dumps({"token": "DISTILLER_TOKEN"}) + .encode("utf-8")) + mock_auth_req.return_value = response_ + token_ = _get_authentication_token( + distiller_instance_url="https://example.org", + distiller_key="DISTILLER_API_KEY" + ) + self.assertIsInstance(token_, str) + + # This test requires the DISTILLER_API_KEY and DISTILLER_INSTANCE_URL + # environment variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__get_authentication_token_output_online(self): + """Test the output type of the request.""" + token_ = _get_authentication_token( + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_key=os.getenv("DISTILLER_API_KEY") + ) + self.assertIsInstance(token_, str) diff --git a/tests/test__checks.py b/tests/test__checks.py new file mode 100644 index 0000000..975dd28 --- /dev/null +++ b/tests/test__checks.py @@ -0,0 +1,53 @@ +import unittest + +from pystiller._utils._checks import (_require_type, _require_string_not_empty, + _require_minimum) + + +class TestChecks(unittest.TestCase): + + ################### + # _require_type() # + ################### + + def test__require_type_invalid(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _require_type, value=123, + expected_type=str) + + def test__require_type_output(self): + """Test the behaviour for valid data.""" + self.assertIsNone(_require_type(value=123, expected_type=int)) + + ############################### + # _require_string_not_empty() # + ############################### + + def test__require_string_not_empty_invalid(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _require_string_not_empty, value=123) + + def test__require_string_not_empty_empty(self): + """Test the behaviour for empty strings.""" + self.assertRaises(ValueError, _require_string_not_empty, value="") + + def test__require_string_not_empty_output(self): + """Test the behaviour for valid data.""" + self.assertIsNone(_require_string_not_empty(value="test")) + + ###################### + # _require_minimum() # + ###################### + + def test__require_minimum_invalid(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _require_minimum, value='', minimum=0) + self.assertRaises(TypeError, _require_minimum, value=0, minimum='') + + def test__require_minimum_wrong(self): + """Test the behaviour for empty strings.""" + self.assertRaises(ValueError, _require_minimum, value=0, minimum=1) + + def test__require_minimum_output(self): + """Test the behaviour for valid data.""" + self.assertIsNone(_require_minimum(value=1, minimum=0)) diff --git a/tests/test__data.py b/tests/test__data.py new file mode 100644 index 0000000..4948c83 --- /dev/null +++ b/tests/test__data.py @@ -0,0 +1,130 @@ +import unittest + +from pystiller._utils._data import _flatten + + +class TestData(unittest.TestCase): + + ############## + # _flatten() # + ############## + + def test__flatten_invalid(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _flatten, nested_data=123) + + def test__flatten_empty(self): + """Test the behaviour for empty data.""" + self.assertTrue(_flatten(nested_data=list()).empty) + self.assertTrue(_flatten(nested_data=dict()).empty) + + def test__flatten_sep(self): + flattened_ = _flatten(nested_data={'a': {'b': 1}}) + self.assertEqual(flattened_.keys()[0], "a_b") + flattened_ = _flatten(nested_data={'a': {'b': 1}}, separator='.') + self.assertEqual(flattened_.keys()[0], "a.b") + + def test__flatten_dict1(self): + """Test the behaviour for flattening a dictionary.""" + flattened_ = _flatten(nested_data={ + 'a': 1, + 'b': 2, + 'c': 3, + 'd': { + '1': 4, + '2': 5 + } + }) + self.assertEqual( + list(flattened_.keys()), + ['a', 'b', 'c', "d_1", "d_2"] + ) + + def test__flatten_dict2(self): + """Test the behaviour for flattening a dictionary.""" + flattened_ = _flatten(nested_data={ + 'a': 1, + 'b': 2, + 'c': 3, + 'd': { + '1': 4, + '2': { + '3': 5 + } + } + }) + self.assertEqual( + list(flattened_.keys()), + ['a', 'b', 'c', "d_1", "d_2_3"] + ) + + def test__flatten_dict3(self): + """Test the behaviour for flattening a dictionary.""" + flattened_ = _flatten(nested_data={ + 'a': 1, + 'b': [ + {'x': 1, 'y': 2, 'z': 3}, + {'x': 4, 'y': 5, 'z': 6} + ] + }) + self.assertEqual( + list(flattened_.keys()), + ['a', "b_x", "b_y", "b_z"] + ) + + def test__flatten_dict4(self): + """Test the behaviour for flattening a dictionary.""" + flattened_ = _flatten(nested_data={ + 'a': 1, + 'b': [ + {'x': 1, 'y': 2, 'z': 3}, + {'x': 4, 'y': 5, 'z': 6}, + "some text" + ] + }) + self.assertEqual( + list(flattened_.keys()), + ['a', "b_x", "b_y", "b_z", 'b'] + ) + + def test__flatten_list_of_dicts(self): + """Test the behaviour for flattening a list of dictionaries.""" + flattened_ = _flatten(nested_data=[ + {'a': 1, 'b': 2, 'c': 3}, + {'a': 4, 'b': 5, 'c': 6} + ]) + self.assertEqual(list(flattened_.keys()), ['a', 'b', 'c']) + + def test__flatten_dict_of_lists1(self): + """Test the behaviour for flattening a dictionary of lists.""" + flattened_ = _flatten(nested_data={ + 'a': [ + {'x': 1, 'y': 2, 'z': 3}, + {'x': 4, 'y': 5, 'z': 6} + ], + 'b': [ + {'x': 7, 'y': 8, 'z': 9}, + {'x': 10, 'y': 11, 'z': 12} + ] + }) + self.assertEqual( + list(flattened_.keys()), + ["parent_key", 'x', 'y', 'z'] + ) + + def test__flatten_dict_of_lists2(self): + """Test the behaviour for flattening a dictionary of lists.""" + flattened_ = _flatten(nested_data={ + 'a': [ + {'x': 1, 'y': 2, 'z': 3}, + {'x': 4, 'y': 5, 'z': 6} + ], + 'b': [ + {'x': 7, 'y': 8, 'z': 9}, + "some text" + ] + }) + self.assertEqual( + list(flattened_.keys()), + ["parent_key", 'x', 'y', 'z', "atomic_value"] + ) diff --git a/tests/test__datarama.py b/tests/test__datarama.py new file mode 100644 index 0000000..07d7746 --- /dev/null +++ b/tests/test__datarama.py @@ -0,0 +1,240 @@ +import io +import json +import os +import unittest +from unittest.mock import patch +from requests.exceptions import HTTPError +import pandas as pd +import requests +from dotenv import load_dotenv +from pystiller._core import _authentication +from requests import Response + +from pystiller._core._datarama import _get_reports, _get_report, ReportFormat + +load_dotenv() + + +class TestDatarama(unittest.TestCase): + + ################## + # _get_reports() # + ################## + + def test__get_reports_types(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _get_reports, project_id='', + distiller_instance_url=123, distiller_token="") + self.assertRaises(TypeError, _get_reports, project_id=123, + distiller_instance_url=123, distiller_token="") + self.assertRaises(TypeError, _get_reports, project_id=123, + distiller_instance_url="", distiller_token=123) + self.assertRaises(TypeError, _get_reports, project_id=123, + distiller_instance_url="", distiller_token="", + timeout="") + + @patch("pystiller._core._datarama._requests._perform_service_request") + def test__get_reports_bad_url(self, mock_serv_req): + """Test the behaviour for bad instance URLs.""" + mock_serv_req.side_effect = requests.exceptions.ConnectionError + self.assertRaises(Exception, _get_reports, project_id=123, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_TOKEN") + + # This test performs real requests. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__get_reports_bad_url_online(self): + """Test the behaviour for bad instance URLs.""" + self.assertRaises(Exception, _get_reports, project_id=123, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_TOKEN") + + @patch("pystiller._core._datarama._requests._perform_service_request") + def test__get_reports_output(self, mock_serv_req): + """Test the output type of the request.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "GET" + response_.headers["Content-Type"] = "application/json" + response_._content = json.dumps({ + "a": [1], "b": [2], "c": [3], "d": [4] + }).encode("utf-8") + mock_serv_req.return_value = response_ + response_ = _get_reports( + project_id=123, + distiller_instance_url="https://example.org", + distiller_token="DISTILLER_TOKEN" + ) + self.assertIsInstance(response_, pd.DataFrame) + self.assertEqual(len(response_.columns), 4) + + # This test requires the DISTILLER_API_KEY, DISTILLER_INSTANCE_URL, and + # DISTILLER_PROJECT_ID_TEST environment variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__get_reports_output_online(self): + """Test the output type of the request.""" + token_ = _authentication._get_authentication_token( + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_key=os.getenv("DISTILLER_API_KEY") + ) + response_ = _get_reports( + project_id=int(os.getenv("DISTILLER_PROJECT_ID_TEST")), + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_token=token_ + ) + self.assertIsInstance(response_, pd.DataFrame) + self.assertEqual(len(response_.columns), 4) + + ################# + # _get_report() # + ################# + + def test__get_report_types(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _get_report, project_id="", report_id=456, + distiller_instance_url="", distiller_token="") + self.assertRaises(TypeError, _get_report, project_id=123, report_id="", + distiller_instance_url="", distiller_token="") + self.assertRaises(TypeError, _get_report, project_id=123, + report_id=456, distiller_instance_url=123, + distiller_token="") + self.assertRaises(TypeError, _get_report, project_id=123, + report_id=456, distiller_instance_url="", + distiller_token=123) + self.assertRaises(TypeError, _get_report, project_id=123, + report_id=456, distiller_instance_url="", + distiller_token="", report_format=123) + self.assertRaises(TypeError, _get_report, project_id=123, + report_id=456, distiller_instance_url="", + distiller_token="", report_format=ReportFormat.CSV, + timeout="") + self.assertRaises(TypeError, _get_report, project_id=123, + report_id=456, distiller_instance_url="", + distiller_token="", report_format=ReportFormat.CSV, + timeout=1, attempts="") + self.assertRaises(TypeError, _get_report, project_id=123, + report_id=456, distiller_instance_url="", + distiller_token="", report_format=ReportFormat.CSV, + timeout=1, attempts=1, retry_each="") + self.assertRaises(ValueError, _get_report, project_id=123, + report_id=456, distiller_instance_url="", + distiller_token="", report_format=ReportFormat.CSV, + timeout=1, attempts=0, retry_each=1) + self.assertRaises(ValueError, _get_report, project_id=123, + report_id=456, distiller_instance_url="", + distiller_token="", report_format=ReportFormat.CSV, + timeout=1, attempts=1, retry_each=-1) + + @patch("pystiller._core._datarama._requests._perform_service_request") + def test__get_report_bad_url(self, mock_serv_req): + """Test the behaviour for bad instance URLs.""" + mock_serv_req.side_effect = requests.exceptions.ConnectionError + self.assertRaises(Exception, _get_report, project_id=123, + report_id=456, attempts=1, retry_each=1, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_TOKEN") + + # This test performs real requests. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__get_report_bad_url_online(self): + """Test the behaviour for bad instance URLs.""" + self.assertRaises(Exception, _get_report, project_id=123, + report_id=456, attempts=1, retry_each=1, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_TOKEN") + + @patch("pystiller._core._datarama._requests._perform_service_request") + def test__get_report_output_xlsx(self, mock_serv_req): + """Test the output type of the request.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = ( + "application/vnd.openxmlformats-officedocuments." + + "spreadsheetml.sheet" + ) + dataframe_ = pd.DataFrame({ + 'a': [1], 'b': [2], 'c': [3], 'd': [4] + }) + buffer_ = io.BytesIO() + dataframe_.to_excel(buffer_, index=True) # type: ignore[arg-type] + buffer_.seek(0) + response_._content = buffer_.getvalue() + mock_serv_req.return_value = response_ + response_ = _get_report( + project_id=123, report_id=456, report_format=ReportFormat.EXCEL, + distiller_instance_url="https://example.org", attempts=1, + distiller_token="DISTILLER_TOKEN" + ) + self.assertIsInstance(response_, pd.DataFrame) + + # This test requires the DISTILLER_API_KEY, DISTILLER_INSTANCE_URL, + # DISTILLER_PROJECT_ID_TEST, and DISTILLER_REPORT_ID_TEST environment + # variables to be set. This test performs real requests to the DistillerSR + # API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__get_report_output_xlsx_online(self): + """Test the output type of the request.""" + token_ = _authentication._get_authentication_token( + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_key=os.getenv("DISTILLER_API_KEY") + ) + response_ = _get_report( + project_id=int(os.getenv("DISTILLER_PROJECT_ID_TEST")), + report_id=int(os.getenv("DISTILLER_REPORT_ID_TEST")), + report_format=ReportFormat.EXCEL, + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_token=token_, attempts=1 + ) + self.assertIsInstance(response_, pd.DataFrame) + + @patch("pystiller._core._datarama._requests._perform_service_request") + def test__get_report_output_csv(self, mock_serv_req): + """Test the output type of the request.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = "text/csv" + response_._content = "a,b\n1,2".encode("utf-8") + mock_serv_req.return_value = response_ + response_ = _get_report( + project_id=123, report_id=456, report_format=ReportFormat.CSV, + distiller_instance_url="https://example.org", + distiller_token="DISTILLER_TOKEN", attempts=1 + ) + self.assertIsInstance(response_, pd.DataFrame) + + @patch("pystiller._core._datarama._requests._perform_service_request") + @patch("pystiller._core._datarama.time.sleep") + def test__get_report_delay(self, mock_sleep, mock_serv_req): + """Test the output type of the request.""" + sleep_called_ = False + def _mark_sleep(*args, **kwargs): + nonlocal sleep_called_ + sleep_called_ = True + mock_sleep.side_effect = _mark_sleep + mock_serv_req.side_effect = HTTPError + self.assertRaises(RuntimeError, _get_report, project_id=123, + report_id=456, report_format=ReportFormat.CSV, + distiller_instance_url="https://example.org", + distiller_token="DISTILLER_TOKEN", attempts=2, + retry_each=1, verbose=False) + self.assertTrue(sleep_called_) + + @patch("pystiller._core._datarama._requests._perform_service_request") + def test__get_report_verbose(self, mock_serv_req): + """Test the output type of the request.""" + mock_serv_req.side_effect = HTTPError + self.assertRaises(RuntimeError, _get_report, project_id=123, + report_id=456, report_format=ReportFormat.CSV, + distiller_instance_url="https://example.org", + distiller_token="DISTILLER_TOKEN", attempts=2, + retry_each=1) diff --git a/tests/test__env.py b/tests/test__env.py new file mode 100644 index 0000000..476644a --- /dev/null +++ b/tests/test__env.py @@ -0,0 +1,28 @@ +import os +import unittest + +from pystiller._utils._env import _read_environment_variable + + +class TestEnv(unittest.TestCase): + + #################################### + # _read_environment_variable() # + #################################### + + def test__read_environment_variable_invalid(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _read_environment_variable, name=123) + + def test__read_environment_variable_not_set(self): + """Test the behaviour for unset environment variables.""" + if "TEST_VAR" in os.environ: + del os.environ["TEST_VAR"] + self.assertRaises(ValueError, _read_environment_variable, + name="TEST_VAR") + + def test__read_environment_variable_output(self): + """Test the behaviour for valid data.""" + os.environ["TEST_VAR"] = "some value" + self.assertIsInstance(_read_environment_variable(name="TEST_VAR"), str) + del os.environ["TEST_VAR"] diff --git a/tests/test__projects.py b/tests/test__projects.py new file mode 100644 index 0000000..37dce0b --- /dev/null +++ b/tests/test__projects.py @@ -0,0 +1,84 @@ +import json +import os +import unittest +from unittest.mock import patch +import pandas as pd +import requests +from dotenv import load_dotenv +from pystiller._core import _authentication +from requests import Response + +from pystiller._core._projects import _get_projects + +load_dotenv() + + +class TestProjects(unittest.TestCase): + + ################### + # _get_projects() # + ################### + + def test__get_projects_types(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _get_projects, + distiller_instance_url=123, distiller_token="") + self.assertRaises(TypeError, _get_projects, + distiller_instance_url="", distiller_token=123) + self.assertRaises(TypeError, _get_projects, + distiller_instance_url="", distiller_token="", + timeout="") + + @patch("pystiller._core._projects._requests._perform_service_request") + def test__get_projects_bad_url(self, mock_serv_req): + """Test the behaviour for bad instance URLs.""" + mock_serv_req.side_effect = requests.exceptions.ConnectionError + self.assertRaises(Exception, _get_projects, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_TOKEN") + + # This test performs real requests. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__get_projects_bad_url_online(self): + """Test the behaviour for bad instance URLs.""" + self.assertRaises(Exception, _get_projects, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_TOKEN") + + @patch("pystiller._core._projects._requests._perform_service_request") + def test__get_projects_output(self, mock_serv_req): + """Test the output type of the request.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "GET" + response_.headers["Content-Type"] = "application/json" + response_._content = json.dumps({ + "a": [1], "b": [2], "c": [3], "d": [4] + }).encode("utf-8") + mock_serv_req.return_value = response_ + response_ = _get_projects( + distiller_instance_url="https://example.org", + distiller_token="DISTILLER_TOKEN" + ) + self.assertIsInstance(response_, pd.DataFrame) + self.assertEqual(len(response_.columns), 4) + + # This test requires the DISTILLER_API_KEY and DISTILLER_INSTANCE_URL + # environment variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__get_projects_output_online(self): + """Test the output type of the request.""" + token_ = _authentication._get_authentication_token( + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_key=os.getenv("DISTILLER_API_KEY") + ) + response_ = _get_projects( + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_token=token_ + ) + self.assertIsInstance(response_, pd.DataFrame) + self.assertEqual(len(response_.columns), 4) diff --git a/tests/test__requests.py b/tests/test__requests.py new file mode 100644 index 0000000..29288eb --- /dev/null +++ b/tests/test__requests.py @@ -0,0 +1,356 @@ +import io +import json +import os +import unittest +from json import JSONDecodeError +from unittest.mock import patch +import pandas as pd +import requests +from dotenv import load_dotenv +from requests import Response, HTTPError + +from pystiller._utils._requests import (_perform_authentication_request, + _perform_service_request, + _handle_http_errors, + _parse_json_response, + _parse_csv_response, + _parse_xlsx_response) + +load_dotenv() + + +class TestRequests(unittest.TestCase): + + ##################################### + # _perform_authentication_request() # + ##################################### + + def test__perform_authentication_request_types(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _perform_authentication_request, + distiller_instance_url=123, distiller_key="") + self.assertRaises(TypeError, _perform_authentication_request, + distiller_instance_url="", distiller_key=123) + self.assertRaises(TypeError, _perform_authentication_request, + distiller_instance_url="", distiller_key="", + timeout="") + + @patch("pystiller._utils._requests.requests.post") + def test__perform_authentication_request_bad_url(self, mock_post): + """Test the behaviour for bad instance URLs.""" + mock_post.side_effect = requests.exceptions.ConnectionError + self.assertRaises(Exception, _perform_authentication_request, + distiller_instance_url="https://invalid-domain", + distiller_key="DISTILLER_API_KEY") + + # This test performs real requests. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__perform_authentication_request_bad_url_online(self): + """Test the behaviour for bad instance URLs.""" + self.assertRaises(Exception, _perform_authentication_request, + distiller_instance_url="https://invalid-domain", + distiller_key="DISTILLER_API_KEY") + + @patch("pystiller._utils._requests.requests.post") + def test__perform_authentication_request_output(self, mock_post): + """Test the output type of the request.""" + mock_post.return_value = Response() + response_ = _perform_authentication_request( + distiller_instance_url="https://example.org", + distiller_key="DISTILLER_API_KEY" + ) + self.assertIsInstance(response_, Response) + + # This test requires the DISTILLER_API_KEY and DISTILLER_INSTANCE_URL + # environment variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__perform_authentication_request_output_online(self): + """Test the output type of the request.""" + response_ = _perform_authentication_request( + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_key=os.getenv("DISTILLER_API_KEY") + ) + self.assertIsInstance(response_, Response) + + ############################## + # _perform_service_request() # + ############################## + + def test__perform_service_request_types(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, _perform_service_request, service_url=123, + distiller_token="") + self.assertRaises(TypeError, _perform_service_request, service_url="", + distiller_token=123) + self.assertRaises(TypeError, _perform_service_request, service_url="", + distiller_token="", body=123) + self.assertRaises(TypeError, _perform_service_request, service_url="", + distiller_token="", body={}, timeout="") + + @patch("pystiller._utils._requests.requests.get") + def test__perform_service_request_get_bad_url(self, mock_get): + """Test the behaviour for bad instance URLs in GET requests.""" + mock_get.side_effect = requests.exceptions.ConnectionError + self.assertRaises(Exception, _perform_service_request, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_API_TOKEN") + + # This test performs real requests. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__perform_service_request_get_bad_url_online(self): + """Test the behaviour for bad instance URLs in GET requests.""" + self.assertRaises(Exception, _perform_service_request, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_API_TOKEN") + + @patch("pystiller._utils._requests.requests.get") + def test__perform_service_request_get_output(self, mock_get): + """Test the output type of GET requests.""" + mock_get.return_value = Response() + response_ = _perform_service_request( + service_url="https://example.org/projects", + distiller_token="DISTILLER_API_TOKEN" + ) + self.assertIsInstance(response_, Response) + + # This test requires the DISTILLER_API_KEY and DISTILLER_INSTANCE_URL + # environment variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__perform_service_request_get_output_online(self): + """Test the output type of GET requests.""" + auth_response_ = _perform_authentication_request( + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_key=os.getenv("DISTILLER_API_KEY") + ) + token_ = auth_response_.json()["token"] + + response_ = _perform_service_request( + service_url=f"{os.getenv('DISTILLER_INSTANCE_URL')}/projects", + distiller_token=token_ + ) + self.assertIsInstance(response_, Response) + + @patch("pystiller._utils._requests.requests.post") + def test__perform_service_request_post_bad_url(self, mock_post): + """Test the behaviour for bad instance URLs in POST requests.""" + mock_post.side_effect = requests.exceptions.ConnectionError + self.assertRaises(Exception, _perform_service_request, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_TOKEN", body={'a': 1}) + + # This test performs real requests. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__perform_service_request_post_bad_url_online(self): + """Test the behaviour for bad instance URLs in POST requests.""" + self.assertRaises(Exception, _perform_service_request, + distiller_instance_url="https://invalid-domain", + distiller_token="DISTILLER_TOKEN", body={'a': 1}) + + @patch("pystiller._utils._requests.requests.post") + def test__perform_service_request_post_output(self, mock_post): + """Test the output type of the POST request.""" + mock_post.return_value = Response() + response_ = _perform_service_request( + service_url="https://example.org/datarama/query", + distiller_token="DISTILLER_TOKEN", body={'a': 1} + ) + self.assertIsInstance(response_, Response) + + # This test requires the DISTILLER_API_KEY and DISTILLER_INSTANCE_URL + # environment variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test__perform_service_request_post_output_online(self): + """Test the output type of the POST request.""" + auth_response_ = _perform_authentication_request( + distiller_instance_url=os.getenv("DISTILLER_INSTANCE_URL"), + distiller_key=os.getenv("DISTILLER_API_KEY") + ) + token_ = auth_response_.json()["token"] + + response_ = _perform_service_request( + service_url=f"{os.getenv('DISTILLER_INSTANCE_URL')}/datarama/query", + distiller_token=token_, + body={ + "project_id": 42483, + "saved_report_id": 155, + "use_saved_format": True + } + ) + self.assertIsInstance(response_, Response) + + ######################### + # _handle_http_errors() # + ######################### + + def test__handle_http_errors_types(self): + """Test the behaviour for invalid parameters.""" + self.assertRaises(TypeError, _handle_http_errors, response=123) + self.assertRaises(TypeError, _handle_http_errors, + response=requests.Response(), error_message=123) + + def test__handle_http_errors_valid(self): + """Test the behaviour for status code 200.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "GET" + response_.headers["Content-Type"] = "application/json" + response_._content = (json.dumps({"data": "Custom data"}) + .encode("utf-8")) + self.assertIsNone(_handle_http_errors(response=response_)) + + def test__handle_http_errors_invalid(self): + """Test the behaviour for bad status codes.""" + response_ = Response() + response_.status_code = 502 + response_.url = "https://example.org" + response_.method = "GET" + self.assertRaises(HTTPError, _handle_http_errors, response=response_) + + ########################## + # _parse_json_response() # + ########################## + + def test__parse_json_response_types(self): + """Test the behaviour for invalid parameters.""" + self.assertRaises(TypeError, _parse_json_response, response=123) + self.assertRaises(TypeError, _parse_json_response, + response=requests.Response(), error_message=123) + + def test__parse_json_response_valid(self): + """Test the behaviour for valid parameters and data.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = "application/json" + response_._content = (json.dumps({"data": "Custom data"}) + .encode("utf-8")) + self.assertIsInstance( + _parse_json_response(response=response_), + dict + ) + + def test__parse_json_response_valid_flatten(self): + """Test the behaviour for valid parameters and data.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = "application/json" + response_._content = (json.dumps({"data": "Custom data"}) + .encode("utf-8")) + self.assertIsInstance( + _parse_json_response(response=response_, flatten=True), + pd.DataFrame + ) + + def test__parse_json_response_invalid(self): + """Test the behaviour for invalid body data.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = "text/html" + response_._content = "content".encode("utf-8") + self.assertRaises( + JSONDecodeError, + _parse_json_response, + response=response_ + ) + + ######################### + # _parse_csv_response() # + ######################### + + def test__parse_csv_response_types(self): + """Test the behaviour for invalid parameters.""" + self.assertRaises(TypeError, _parse_csv_response, response=123) + self.assertRaises(TypeError, _parse_csv_response, + response=requests.Response(), error_message=123) + + def test__parse_csv_response_valid(self): + """Test the behaviour for valid parameters and data.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = "text/csv" + response_._content = "a,b\n1,2".encode("utf-8") + self.assertIsInstance( + _parse_csv_response(response=response_), + pd.DataFrame + ) + + def test__parse_csv_response_invalid(self): + """Test the behaviour for invalid body data.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = "text/csv" + response_._content = "a,b\n1,2\n3\n4,5,6".encode("utf-8") + self.assertRaises( + Exception, + _parse_csv_response, + response=response_ + ) + + ########################## + # _parse_xlsx_response() # + ########################## + + def test__parse_xlsx_response_types(self): + """Test the behaviour for invalid parameters.""" + self.assertRaises(TypeError, _parse_xlsx_response, response=123) + self.assertRaises(TypeError, _parse_xlsx_response, + response=requests.Response(), error_message=123) + + def test__parse_xlsx_response_valid(self): + """Test the behaviour for valid parameters and data.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = ( + "application/vnd.openxmlformats-officedocuments." + + "spreadsheetml.sheet" + ) + dataframe_ = pd.DataFrame({ + 'a': [1, 2], + 'b': [3, 4] + }) + buffer_ = io.BytesIO() + dataframe_.to_excel(buffer_, index=True) # type: ignore[arg-type] + buffer_.seek(0) + response_._content = buffer_.getvalue() + self.assertIsInstance( + _parse_xlsx_response(response=response_), + pd.DataFrame + ) + + def test__parse_xlsx_response_invalid(self): + """Test the behaviour for invalid body data.""" + response_ = Response() + response_.status_code = 200 + response_.url = "https://example.org" + response_.method = "POST" + response_.headers["Content-Type"] = ( + "application/vnd.openxmlformats-officedocuments." + + "spreadsheetml.sheet" + ) + response_._content = b"Not an XLSX content" + self.assertRaises( + Exception, + _parse_xlsx_response, + response=response_ + ) diff --git a/tests/test_client.py b/tests/test_client.py new file mode 100644 index 0000000..c57b165 --- /dev/null +++ b/tests/test_client.py @@ -0,0 +1,150 @@ +import os +import unittest +from unittest.mock import patch +import pandas as pd +from dotenv import load_dotenv + +from pystiller.client import Client + +load_dotenv() + +class TestClient(unittest.TestCase): + + ############## + # __init__() # + ############## + + def test___init___types(self): + """Test the behaviour for invalid data.""" + self.assertRaises(TypeError, Client, distiller_key=123, + distiller_instance_url="https://example.org") + self.assertRaises(ValueError, Client, distiller_key='', + distiller_instance_url="https://example.org") + self.assertRaises(TypeError, Client, distiller_key="DISTILLER_API_KEY", + distiller_instance_url=123) + self.assertRaises(ValueError, Client, + distiller_key="DISTILLER_API_KEY", + distiller_instance_url='') + + @patch("pystiller.client._env._read_environment_variable") + @patch("pystiller.client._authentication._get_authentication_token") + def test___init__1(self, mock_get_auth_token, mock_read_env_var): + """Test the correct creation of the object.""" + mock_read_env_var.return_value = "test" + mock_get_auth_token.return_value = "test" + with patch.dict(os.environ, { + "DISTILLER_API_KEY": "test", + "DISTILLER_INSTANCE_URL": "test", + }, clear=True): + self._client = Client() + self.assertIsInstance(self._client, Client) + + # This test requires the DISTILLER_API_KEY and DISTILLER_INSTANCE_URL + # environment variables to be set. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test___init__1_online(self): + """Test the correct creation of the object.""" + self._client = Client() + self.assertIsInstance(self._client, Client) + + @patch("pystiller.client._authentication._get_authentication_token") + def test___init__2(self, mock_get_auth_token): + """Test the correct creation of the object.""" + mock_get_auth_token.return_value = "test" + self.assertIsInstance(Client( + distiller_key="DISTILLER_API_KEY", + distiller_instance_url="DISTILLER_INSTANCE_URL" + ), Client) + + @patch("pystiller.client._authentication._get_authentication_token") + def test___init___instance_url_trailing_slash(self, mock_get_auth_token): + """Test the behaviour for trailing slash in instance URLs.""" + mock_get_auth_token.return_value = "test" + client_ = Client( + distiller_key="DISTILLER_API_KEY", + distiller_instance_url="https://example.org/") + self.assertEqual( + client_._distiller_instance_url, + "https://example.org") + + ################## + # get_projects() # + ################## + + @patch("pystiller.client._authentication._get_authentication_token") + @patch("pystiller.client._projects._get_projects") + def test_get_projects(self, mock_get_projects, mock_get_auth): + mock_get_projects.return_value = pd.DataFrame() + mock_get_auth.return_value = "test_token" + client_ = Client(distiller_key="DISTILLER_API_KEY", + distiller_instance_url="https://example.org", + automatic_token_refresh=True) + projects_ = client_.get_projects() + self.assertIsInstance(projects_, pd.DataFrame) + + # This test requires the DISTILLER_API_KEY and DISTILLER_INSTANCE_URL + # environment variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test_get_projects_online(self): + client_ = Client() + projects_ = client_.get_projects() + self.assertIsInstance(projects_, pd.DataFrame) + + ################# + # get_reports() # + ################# + + @patch("pystiller.client._authentication._get_authentication_token") + @patch("pystiller.client._datarama._get_reports") + def test_get_reports(self, mock_get_reports, mock_get_auth): + mock_get_reports.return_value = pd.DataFrame() + mock_get_auth.return_value = "test_token" + client_ = Client(distiller_key="DISTILLER_API_KEY", + distiller_instance_url="https://example.org", + automatic_token_refresh=True) + reports_ = client_.get_reports(project_id=123) + self.assertIsInstance(reports_, pd.DataFrame) + + # This test requires the DISTILLER_API_KEY, DISTILLER_INSTANCE_URL, and + # DISTILLER_PROJECT_ID_TEST environment variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test_get_reports_online(self): + client_ = Client() + reports_ = client_.get_reports( + project_id=int(os.getenv("DISTILLER_PROJECT_ID_TEST"))) + self.assertIsInstance(reports_, pd.DataFrame) + self.assertIsInstance(reports_, pd.DataFrame) + + ################ + # get_report() # + ################ + + @patch("pystiller.client._authentication._get_authentication_token") + @patch("pystiller.client._datarama._get_report") + def test_get_report(self, mock_get_report, mock_get_auth): + mock_get_report.return_value = pd.DataFrame() + mock_get_auth.return_value = "test_token" + client_ = Client(distiller_key="DISTILLER_API_KEY", + distiller_instance_url="https://example.org", + automatic_token_refresh=True) + report_ = client_.get_report(project_id=123, report_id=456) + self.assertIsInstance(report_, pd.DataFrame) + + # This test requires the DISTILLER_API_KEY, DISTILLER_INSTANCE_URL, + # DISTILLER_PROJECT_ID_TEST, and DISTILLER_REPORT_ID_TEST environment + # variables to be set. + # This test performs real requests to the DistillerSR API. + @unittest.skipIf(os.getenv("SKIP_ONLINE_TESTS") == "true", + "Skip online tests") + def test_get_report_online(self): + client_ = Client() + report_ = client_.get_report( + project_id=int(os.getenv("DISTILLER_PROJECT_ID_TEST")), + report_id=int(os.getenv("DISTILLER_REPORT_ID_TEST"))) + self.assertIsInstance(report_, pd.DataFrame) + self.assertIsInstance(report_, pd.DataFrame)