diff --git a/.coveragerc b/.coveragerc index 158768f3..20b55fca 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,4 +1,4 @@ [run] -omit = +omit = gptables/test/* - gptables/examples/* \ No newline at end of file + gptables/examples/* diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..53838a7e --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +ignore = E501, W503, W504 +exclude = .git,.vscode,docs/ diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..5f568e19 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,7 @@ +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners +# +# These owners will be the default owners for everything in +# the repo (*). Unless a later match takes precedence, +# the following will be requested for +# review when someone opens a pull request. +* @shilohd diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 11fc491e..77e46c05 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -6,15 +6,19 @@ labels: enhancement assignees: '' --- +It would help the development team for the requester to supply the following information: -**Is your feature request related to a problem? Please describe.** +**Is your feature request related to a problem? Please describe:** A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -**Describe the solution you'd like** +**Describe the solution you'd like:** A clear and concise description of what you want to happen. -**Describe alternatives you've considered** +**Please confirm you've checked our [won't fix](https://github.com/ONSdigital/gptables/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22wontfix%20%3Acry%3A%22) issues** +Yes/no + +**Describe alternatives you've considered:** A clear and concise description of any alternative solutions or features you've considered. -**Additional context** +**Additional context:** Add any other context or screenshots about the feature request here. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 51ccd506..a8e2ca27 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,5 +1,6 @@ + ### Proposed Changes - - + - ### Related Issues - Related to # @@ -12,12 +13,15 @@ Please indicate items that aren't necessary and why, with comments around incomp - [ ] Version number has been incremented, according to [SemVer][semver] - [ ] Changelog has been updated, listing changes to this version. Use the [keep a changelog][changelog] format - [ ] New features are tested +- [ ] New features follow the Analysis Function Releasing statistics in spreadsheets [guidance][guidance] - [ ] New features are documented using the [numpydoc][numpy-docstrings] docstring format - [ ] Other relevant package documentation is updated -- [ ] For new functionality, examples are included in the docs or a [feature request][feature-request] has -been made for it/them. +- [ ] For new functionality, examples are included in the docs or a [feature request][feature-request] has +been made for it/them +- [ ] Required workflows and pre-commits succeed [changelog]: [https://keepachangelog.com/en/1.0.0/] [feature-request]: [https://github.com/best-practice-and-impact/gptables/issues/new?assignees=&labels=enhancement&template=feature_request.md&title=] +[guidance]: https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/ [numpy-docstrings]: [https://numpydoc.readthedocs.io/en/latest/format.html] [semver]: [https://semver.org/spec/v2.0.0.html] diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 49125eee..71d32d9c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,8 +27,8 @@ jobs: run: | python -m pip install --upgrade pip pip install wheel - pip install -e .[testing] - + pip install -e .[dev] + - name: Run pytest run: | pytest --cov-config=.coveragerc --cov=gptables --cov-report=xml --cov-report=term-missing @@ -45,13 +45,13 @@ jobs: linux: - name: Tests on Linux + name: Build and test on Linux runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11', '3.12'] + - steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} @@ -62,17 +62,25 @@ jobs: run: | python -m pip install --upgrade pip pip install wheel - pip install --upgrade-strategy eager -e .[testing] + pip install --upgrade-strategy eager -e .[dev] + - name: Run pre-commit hooks + run: | + pre-commit run --all-files + - name: Build + run: | + pip install build + python -m build - name: Run pytest - run: pytest + run: | + python -m pip install --upgrade setuptools + pytest windows: - - name: Tests on Windows + name: Build and test on Windows runs-on: windows-latest strategy: matrix: - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.9', '3.10', '3.11', '3.12'] steps: @@ -91,35 +99,19 @@ jobs: run: | python -m pip install --upgrade pip pip install wheel - pip install --upgrade-strategy eager -e .[testing] - - name: Run pytest - run: pytest - - build_and_deploy_docs: - name: Build and deploy docs to Pages - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python 3.11 - uses: actions/setup-python@v4 - with: - python-version: '3.11' - - uses: actions/cache@v4 - with: - path: ~\AppData\Local\pip\Cache - key: ${{ runner.os }}-pip-${{ hashFiles('setup.py') }} - restore-keys: | - ${{ runner.os }}-pip- - - name: Install Python dependencies + pip install --upgrade-strategy eager -e .[dev] + - name: Run pre-commit hooks run: | - python -m pip install --upgrade pip - pip install wheel - pip install --upgrade-strategy eager -e .[docs] - - name: Build the book + pre-commit run --all-files + - name: Build run: | - (cd docs && make clean && make html) - + pip install build + python -m build + - name: Run pytest + run: | + python -m pip install --upgrade setuptools + pytest + publish: name: Publish to PyPi needs: [linux, windows, test-with-cov] @@ -133,7 +125,7 @@ jobs: - name: Checkout source uses: actions/checkout@v4 - name: Set up Python 3.11 - uses: actions/setup-python@v1 + uses: actions/setup-python@v5 with: python-version: '3.11' - name: Install build dependencies @@ -143,4 +135,4 @@ jobs: run: | python -m build - name: Publish - uses: pypa/gh-action-pypi-publish@v1.13.0 \ No newline at end of file + uses: pypa/gh-action-pypi-publish@v1.12.3 diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 00000000..c1d2d901 --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,28 @@ +name: Deploy docs + +on: + push: + branches: + - 'main' + +jobs: + deploy-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v5 + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: '3.12' + - name: Install reqs + run: pip install .[docs] + - name: Build docs + run: mkdocs build + - name: Deploy to GitHub Pages + uses: JamesIves/github-pages-deploy-action@v4.7.3 + with: + branch: gh-pages + folder: site + +permissions: + contents: write diff --git a/.gitignore b/.gitignore index ce294f0f..e3999dec 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,8 @@ env/ *~lock* docs/build/** gptables/examples/*.xlsx -.vscode/ \ No newline at end of file +gptables/test/actual_workbook.xlsx +.vscode/ +site/ +.venv +*.xlsx diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..bfd346c9 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,43 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-added-large-files + name: Check for files larger than 5 MB + args: [ "--maxkb=5120" ] + - id: end-of-file-fixer + name: Check for a blank line at the end of scripts (auto-fixes) + - id: trailing-whitespace + name: Check for trailing whitespaces (auto-fixes) + - repo: https://github.com/pycqa/isort + rev: 6.0.1 + hooks: + - id: isort + name: isort - Sort Python imports (auto-fixes) + args: [ "--profile", "black", "--filter-files" ] + - repo: https://github.com/psf/black + rev: 24.8.0 # Replace by any tag/version: https://github.com/psf/black/tags + hooks: + - id: black + name: black - consistent Python code formatting (auto-fixes) + language_version: python # Should be a command that runs python3.6+ + - repo: https://github.com/PyCQA/flake8 + rev: 7.1.2 + hooks: + - id: flake8 + name: flake8 - Python linting + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + name: detect-secrets - Detect secrets in staged code + exclude: .*/tests/.* + - repo: https://github.com/PyCQA/bandit + rev: 1.8.3 + hooks: + - id: bandit + name: bandit - Checks for vulnerabilities + args: ["-c", "pyproject.toml"] + additional_dependencies: ["bandit[toml]"] diff --git a/.vscode/settings.json b/.vscode/settings.json index c8e47193..ec90b7b3 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,4 +4,4 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true -} \ No newline at end of file +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 4a9dd039..00000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,40 +0,0 @@ -# Contributing - -When contributing to this repository, please first discuss the change you wish -to make via issue, email, or any other method with the owners of this -repository before making a change. - -## Pull/merge request process - -1. Branch from the `dev` branch. If you are implementing a feature name it - `feature/name_of_feature`, if you are implementing a bugfix name it - `bug/issue_name`. If they are associated with a specific issue, you - may use the issue number in place of the name. -1. Update the README.rst and other documentation with details of major changes - to the interface, this includes new environment variables, useful file - locations and container parameters. -1. Once you are ready for review please open a pull/merge request to the - `dev` branch. -1. You may merge the Pull/Merge Request in once you have the sign-off of two - maintainers. -1. If you are merging `dev` to `master`, you must increment the version number - in the VERSION file to the new version that this Pull/Merge Request would - represent. The versioning scheme we use is [SemVer](http://semver.org/). - - -## Code style - -- We name variables using few nouns in lowercase, e.g. `mapping_names` - or `increment`. -- We name functions using verbs in lowercase, e.g. `map_variables_to_names` or - `change_values`. -- We use the [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) - format for documenting features using docstrings. - -## Review process - -1. When we want to release the package we will request a formal review for any - non-minor changes. -2. The review process follows a similar process to ROpenSci. -3. Reviewers will be requested from associated communities. -4. Only once reviewers are satisfied, will the `dev` branch be released. diff --git a/LICENSE b/LICENSE index 9d4ac131..77526e91 100644 --- a/LICENSE +++ b/LICENSE @@ -20,7 +20,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- +------------------------------------------------------------------------------- For code from the XlsxWriter package, where used: diff --git a/MANIFEST.in b/MANIFEST.in index 33d64ace..3598e229 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,4 +5,4 @@ include requirements.txt recursive-include gptables/examples *.csv *.py recursive-include gptables/theme_pickles *.pickle recursive-include gptables/themes *.yaml -recursive-include gptables/test test*.py \ No newline at end of file +recursive-include gptables/test test*.py diff --git a/README.rst b/README.rst index 2f6371ba..c8d582a4 100644 --- a/README.rst +++ b/README.rst @@ -4,10 +4,6 @@ Good Practice Tables (gptables) .. image:: https://github.com/best-practice-and-impact/gptables/workflows/continuous-integration/badge.svg :target: https://github.com/best-practice-and-impact/gptables/actions :alt: Actions build status - -.. image:: https://readthedocs.org/projects/gptables/badge/?version=latest - :target: https://gptables.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status .. image:: https://badge.fury.io/py/gptables.svg :target: https://badge.fury.io/py/gptables @@ -15,49 +11,72 @@ Good Practice Tables (gptables) ``gptables`` is an opinionated python package for spreadsheet production. -It produces ``.xlsx`` files from your ``pandas`` dataframes or using -``reticulate`` in R. You define the mapping from your data to elements of the -table. It does the rest. +It produces ``.xlsx`` files from your ``pandas`` dataframes. -``gptables`` uses the official `guidance on good practice spreadsheets`_. +``gptables`` uses the Analysis Function `spreadsheet guidance`_. It advocates a strong adherence to the guidance by restricting the range of operations possible. The default theme ``gptheme`` should accommodate most use cases. However, the ``Theme`` object allows development of custom themes, where other formatting is required. -``gptables`` is developed and maintained by the Analysis Standards and Pipelines team at ONS. It can be -installed from `PyPI`_ or `GitHub`_. The source code is maintained on GitHub. -Users may also be interested in `a11ytables`_, an R native equivalent to -``gptables``, and `csvcubed`_, a package for turning data and metadata into +Users may also be interested in `csvcubed`_, a package for turning data and metadata into machine-readable CSV-W files. -5 Simple Steps --------------- +R users should check out `aftables`_, an R native equivalent to ``gptables``. -1. You map your data to the elements of a ``GPTable``. +Requirements +------------ +- Python 3.9+ -2. You can define the format of each element with a custom ``Theme``, or simply use the default - gptheme. +Using an earlier version? Install `gptables` version before 2.0.0. -3. Optionally design a ``Cover`` page to provide information that relates to all of the tables in your Workbook. +Install +------- +gptables is available from `PyPI`_. + +As a user: + +- Using a virtual environment? Use `pip install gptables` in a terminal. +- If you're not using a virtual environment, use `python -m pip install gptables` instead. -4. Optionally upload a ``notes_table`` with information about any notes. +All done! -5. You ``write_workbook`` to win. +As a developer: +- Navigate to the directory where this repo is cloned. +- Use `pip install -e .` to install an editable version of the package. +- Use `pip install .[dev]`to install the dependencies for developers. +- For working on docs, also use `pip install .[docs]`. +- Set up pre-commit to run automatically with `pre-commit install`. -**Note**: This package is not intending to create perfectly accessible spreadsheets but will help with the bulk of the work needed. Users of this packages should refer back to the `main spreadsheet guidance `_ or the `spreadsheet accessibility checklist `_ after using it to make sure nothing has been missed. +Usage +----- + +1. Map your data to the elements of a ``GPTable``. + +2. Define the format of each element with a custom ``Theme`` - or simply use the default ``gptheme``. + +3. Optionally design a ``Cover`` page to provide information that relates to all of the tables in your Workbook. + +4. Optionally supply a ``notes_table`` with information about any notes. + +5. Make you gptable with ``write_workbook``! + +**Note**: This package create perfectly accessible spreadsheets but will help with many requirements. +Users should refer to the Analysis Function `spreadsheet guidance`_ and the `spreadsheet accessibility checklist`_. Contributing ------------ - Found a bug, or would like to suggest a new feature? The best way is to let us know by raising an `issue`_. -Alternatively, please email Analysis Standards at Pipelines at the ONS (ASAP@ons.gov.uk), and let us know if you use the package so we can engage with you as a user. +Alternatively, please email us - the Analysis Standards at Pipelines team at the Office for National Statistics (ASAP@ons.gov.uk). + +Let us know if you use the package. We'd love to know what's working well, and what could be improved! Requests and fixes are managed according to resource capacity, and we aim to acknowledge queries within one working week. Please follow up in the case of this taking longer. -.. _`guidance on good practice spreadsheets`: https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/ +.. _`spreadsheet guidance`: https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/ +.. _`spreadsheet accessibility checklist`: https://analysisfunction.civilservice.gov.uk/policy-store/making-spreadsheets-accessible-a-brief-checklist-of-the-basics/ .. _`PyPI`: https://pypi.org/project/gptables/ -.. _`GitHub`: https://github.com/best-practice-and-impact/gptables -.. _`a11ytables`: https://best-practice-and-impact.github.io/aftables/index.html -.. _`csvcubed`: https://gss-cogs.github.io/csvcubed-docs/external/ -.. _`issue`: https://github.com/best-practice-and-impact/gptables/issues \ No newline at end of file +.. _`aftables`: https://best-practice-and-impact.github.io/aftables/index.html +.. _`csvcubed`: https://onsdigital.github.io/csvcubed-docs/external/ +.. _`issue`: https://github.com/best-practice-and-impact/gptables/issues diff --git a/VERSION b/VERSION index 867e5243..26aaba0e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.2.0 \ No newline at end of file +1.2.0 diff --git a/closed_issue_log.md b/closed_issue_log.md deleted file mode 100644 index 95b148ed..00000000 --- a/closed_issue_log.md +++ /dev/null @@ -1,29 +0,0 @@ -# Closed Issue Log - -This log documents feature requests that we do not currently intend to offer in gptables. -This is due to the request being non-compliant with Analysis Function [spreadsheet accessibility guidance][guidance], -or otherwise being out of scope of the package, designed for implementing this guidance. -These issues may be re-opened in future if guidance changes - please contribute to the discussion by heading to -the GitHub issue. - -## [180][issue-180]: Multiple tables in a single spreadsheet (05/08/2025) -Section 13 of the guidance, 'Worksheets with multiple tables', dissuades this. - -## [265][issue-265]: Rounding decimals (02/09/2025) -A user requested the ability to round a column to a given number of decimal places. This issue was closed because it can be handled by [pandas][pandas-decimal-rounding]. - -## [136][issue-136]: Add hyperlinks to Theme format elements (05/09/2025) -Section 7 of the guidance, under 'Making hyperlinks accessible' says links should be underlined and in a colour of suitable -contrast. Further advice was sought from the Presentation Champions group, with the feedback that users expect links to be blue. - -## [104][issue-104]: Support table formatting as one unit (05/09/2025) -Interpreted as requesting the ability to add a line above and below a table. Section 7 of the guidance, under -'Other pointers for formatting that should be followed in terms of best practice', mentions that gridlines and borders -should be avoided in favour of keeping things simple. This is also on advice from the Presentation Champions. - -[guidance]: https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/ -[issue-104]: https://github.com/best-practice-and-impact/gptables/issues/104 -[issue-136]: https://github.com/best-practice-and-impact/gptables/issues/136 -[issue-180]: https://github.com/best-practice-and-impact/gptables/issues/180 -[issue-265]: https://github.com/best-practice-and-impact/gptables/issues/265 -[pandas-decimal-rounding]: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.round.html diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d0c3cbf1..00000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api/api_reference.md b/docs/api/api_reference.md new file mode 100644 index 00000000..611ef497 --- /dev/null +++ b/docs/api/api_reference.md @@ -0,0 +1,17 @@ +# API Reference + +

Functions

+ +| Object | Description | +|-----------------------|---------------------------| +| [`produce_workbook()`](functions/produce_workbook.md) | Creates a formatted workbook suitable for further editing. | +| [`write_workbook()`](functions/write_workbook.md) | Creates and Writes out a formatted workbook. | + +

Classes

+| Object | Description | +|-----------------------|---------------------------| +| [`Cover`](classes/cover.md) | Stores information for the cover sheet of a workbook. | +| [`GPTable`](classes/gptable.md) | Stores information about data in a table. | +| [`GPWorkbook`](classes/gpworkbook.md) | Wrapper for `XlsxWriter.Workbook` to support further editing after `produce_workbook()` | +| [`GPWorksheet`](classes/gpworksheet.md) | Wrapper for `XlsxWriter.Worksheet` to support further editing after `produce_workbook()` | +| [`Theme`](classes/theme.md) | Used to set the formatting of various elements throughout the workbook. | diff --git a/docs/api/classes/cover.md b/docs/api/classes/cover.md new file mode 100644 index 00000000..a6f435e1 --- /dev/null +++ b/docs/api/classes/cover.md @@ -0,0 +1,4 @@ +::: gptables.core.cover.Cover + options: + heading: "Cover" + heading_level: 0 diff --git a/docs/api/classes/gptable.md b/docs/api/classes/gptable.md new file mode 100644 index 00000000..211af535 --- /dev/null +++ b/docs/api/classes/gptable.md @@ -0,0 +1,4 @@ +::: gptables.core.gptable.GPTable + options: + heading: "GPTable" + heading_level: 0 diff --git a/docs/api/classes/gpworkbook.md b/docs/api/classes/gpworkbook.md new file mode 100644 index 00000000..1470b4df --- /dev/null +++ b/docs/api/classes/gpworkbook.md @@ -0,0 +1,4 @@ +::: gptables.core.wrappers.GPWorkbook + options: + heading: "GPWorkbook" + heading_level: 0 diff --git a/docs/api/classes/gpworksheet.md b/docs/api/classes/gpworksheet.md new file mode 100644 index 00000000..d70fe4e9 --- /dev/null +++ b/docs/api/classes/gpworksheet.md @@ -0,0 +1,4 @@ +::: gptables.core.wrappers.GPWorksheet + options: + heading: "GPWorksheet" + heading_level: 0 diff --git a/docs/api/classes/theme.md b/docs/api/classes/theme.md new file mode 100644 index 00000000..dc9106e4 --- /dev/null +++ b/docs/api/classes/theme.md @@ -0,0 +1,4 @@ +::: gptables.core.theme.Theme + options: + heading: "Theme" + heading_level: 0 diff --git a/docs/api/functions/produce_workbook.md b/docs/api/functions/produce_workbook.md new file mode 100644 index 00000000..7f634aec --- /dev/null +++ b/docs/api/functions/produce_workbook.md @@ -0,0 +1,4 @@ +::: gptables.core.api.produce_workbook + options: + heading: "produce_workbook()" + heading_level: 0 diff --git a/docs/api/functions/write_workbook.md b/docs/api/functions/write_workbook.md new file mode 100644 index 00000000..1432077f --- /dev/null +++ b/docs/api/functions/write_workbook.md @@ -0,0 +1,4 @@ +::: gptables.core.api.write_workbook + options: + heading: "write_workbook()" + heading_level: 0 diff --git a/docs/getting_started/elements.md b/docs/getting_started/elements.md new file mode 100644 index 00000000..40f51a37 --- /dev/null +++ b/docs/getting_started/elements.md @@ -0,0 +1,18 @@ +# Table elements + +A `GPTable` is composed of a number of elements. + +On the table of contents, sheet names are presented alongside the title, subtitles, scope +and instructions associated with the `GPTable`. + +On the data spreadsheet, this information appears the top of the page with the source and legend. +The title is in large bold text. + +The data table is underneath, with the column headings in bold. In the same row on new lines are the units and any table note references. These can be defined in the `GPTables` object. + +Column headings, indices, and data are defined in a `pandas.DataFrame`. Up to the first 3 +columns of the `pandas.DataFrame` can be used as index columns for the data in the rest of the columns. + +![](../static/table_mapping.png) + +The presentation of these elements can be adjusted with the use of [themes](../how_to/custom_theme.md) and [additional formatting](../how_to/additional_formatting.md). diff --git a/docs/getting_started/tutorial.md b/docs/getting_started/tutorial.md new file mode 100644 index 00000000..2ce52836 --- /dev/null +++ b/docs/getting_started/tutorial.md @@ -0,0 +1,472 @@ +# Tutorial + +This section aims to demonstrate some basic `gptables` functionality. The code from each section +can be run from the +[examples](https://github.com/ONSdigital/gptables/tree/main/gptables/examples) folder, with more advanced +usage guides found in the how-tos and [API documentation](../api/api_reference.md). + +To install `gptables`, simply use `pip install gptables`. + +## Starting out + +First import `gptables` alongside any other necessary packages and read in the data. + +```python +import pandas as pd +import gptables as gpt + +penguins_data = pd.read_csv("penguins.csv") +``` + +Perform any data preparation, for example cleaning. Then construct the `GPTable` by defining some details +about the data, such as its title and source. The `table` containing the data should be a +`pandas.DataFrame`. + +```python +penguins_table = gpt.GPTable( + table = penguins_data, + table_name = "penguins_statistics", + title = "The Palmer Penguins Dataset", + subtitles = ["This is the first subtitle", + "This is another subtitle"], + scope = "Penguins", + source = "Palmer Station, Antarctica", +) +``` + +As a matter of preference, this can alternatively be achieved using a dictionary of keyword arguments: + +```python +kwargs = { + "table_name": "penguins_statistics", + "title": "The Palmer Penguins Dataset", + "subtitles": ["This is the first subtitle", + "This is another subtitle"], + "scope": "Penguins", + "source": "Palmer Station, Antarctica", +} + +penguins_table = gpt.GPTable(table = penguins_data, **kwargs) +``` + +Each `GPTable` should then be associated with a sheet name using a dictionary. + +```python +penguins_sheets = {"Penguins": penguins_table} +``` + +Finally, use `write_workbook()` with the output path, sheets, and any additional elements to create +and write a formatted Excel workbook. + +```python +gpt.write_workbook( + filename="python_penguins_gptable.xlsx", + sheets=penguins_sheets, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, +) +``` + +The workbook contains a table of contents, with sheet names linking +to the data sheets alongside descriptions of the data. There is a sheet for each dataset, on which +the specified details such as titles are presented in a minimal style with text of a legible +font and size. + +![](../static/getting_started_before_and_after.png) + +The code is combined below in an extendable tab. + +??? "Starting out" + ```python + import pandas as pd + import gptables as gpt + + penguins_data = pd.read_csv("penguins.csv") + + penguins_table = gpt.GPTable( + table = penguins_data, + table_name = "penguins_statistics", + title = "The Palmer Penguins Dataset", + subtitles = ["This is the first subtitle", + "This is another subtitle"], + scope = "Penguins", + source = "Palmer Station, Antarctica", + ) + + penguins_sheets = {"Penguins": penguins_table} + + gpt.write_workbook( + filename="gpt_starting_out.xlsx", + sheets=penguins_sheets, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + ``` + +## Customising the table of contents + +The description column in a table of contents can be customised by passing additional +elements from the `GPTable` into the `contentsheet_options` parameter +of `gptables.write_workbook()`. + +`contentsheet_options` can take `additional_elements`, including `'subtitles'`, `'scope'`, +`'source'`, and `'instructions'` to present more information about individual sheets within +the workbook: + +```python + penguins_table = gpt.GPTable( + ... + instructions="This workbook contains a single sheet. The name is a link to it." + subtitles=["This is the first subtitle", "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", + ... + ) + + ... + + gpt.write_workbook( + filename=output_path, + sheets=penguins_sheets, + contentsheet_options={"additional_elements": ["instructions", "subtitles", "scope", "source"]}, + ) +``` + +![](../static/table_of_contents_additional_elements.png) + +`contentsheet_options` also allows for customisation of the table of contents `title`, `subtitles`, +`table_name`, `instructions` and `column_names`. For example: + +```python + gpt.write_workbook( + filename=output_path, + sheets=penguins_sheets, + contentsheet_options={"title": "A title for the table of contents", + "subtitles": ["A subtitle for the table of contents"], + "additional_elements": ["subtitles", "scope"]}, + ) +``` + +![](../static/table_of_contents_customisation.png) + +Set `contentsheet_label = None` inside `gptables.write_workbook()` to disable creating +a table of contents. + +More information can be found in the [function documentation](../api/functions/write_workbook.md). + +## Adding notes + +Notes are useful for adding footnotes, clarifications, or extra information to help users interpret +the data. Notes can be attached to tables by supplying `notes_table` to +`produce_workbook()` or `write_workbook()`. + +Notes appear on a separate worksheet called Notes. They can be referenced in the `title`, `subtitles`, +`scope`, `source`, and `legend` elements using the notation `$$placeholder$$`. These placeholders +are replaced with numbered references in the final output. Notes cannot be added to individual data +cells or column headings. + +```python +penguins_table = gpt.GPTable( + ... + title = "The Palmer Penguins Dataset$$note_about_x$$", + subtitles = ["This is the first subtitle$$note_about_y$$", + "This is another subtitle"], + ... +) +``` +For the note table to appear on the notes sheet, it must be provided as a `pandas.DataFrame` to the +`notes_table` argument of `gptables.write_workbook()`. This should contain the text of the placeholder +or reference, the text for the note, and optionally any links to include with the note. + +Below, note references are first created using a dictionary of lists before being converted into a +`pandas.DataFrame`. All lists must be the same length - if a note has no link, use an empty +string (`""`) or `None` at that list position. + +```python +notes = { + "Note reference": ["note_about_x", "note_about_y", "note_about_z", "note_with_no_link"], + "Note text": [ + "This is a note about x linking to google.", + "This is a note about y linking to duckduckgo.", + "This is a note about z linking to the ONS website.", + "This is a note with no link." + ], + "Useful link": [ + "[google](https://www.google.com)", + "[duckduckgo](https://duckduckgo.com/)", + "[ONS](https://www.ons.gov.uk)", + None + ], +} +penguins_notes_table = pd.DataFrame.from_dict(notes) +``` + +When producing the workbook, specify the `notes_table`. +```python +gpt.write_workbook( + ... + notes_table=penguins_notes_table, + ... +) +``` + +The resulting spreadsheet contains a sheet called Notes. In a table, the automatically +generated note numbers are alongside the note text and link (if supplied). The note numbers +correspond to where placeholders were inserted in the title and subtitle. + +![](../static/tutorial_adding_notes.png) + +This is combined into a full example below in an extendable tab. + +??? "Adding notes" + + ```python + import pandas as pd + import gptables as gpt + + penguins_data = pd.read_csv("penguins.csv") + + penguins_table = gpt.GPTable(table=penguins_data, + table_name = "penguins_statistics", + title="The Palmer Penguins Dataset$$note_about_x$$", + subtitles = ["This is the first subtitle$$note_about_y$$", + "This is another subtitle"], + scope = "Penguins", + source = "Palmer Station, Antarctica") + + penguins_sheets = {"Penguins": penguins_table} + + notes = { + "Note reference": ["note_about_x", "note_about_y", "note_about_z", "note_with_no_link"], + "Note text": [ + "This is a note about x linking to google.", + "This is a note about y linking to duckduckgo.", + "This is a note about z linking to the ONS website.", + "This is a note with no link." + ], + "Useful link": [ + "[google](https://www.google.com)", + "[duckduckgo](https://duckduckgo.com/)", + "[ONS](https://www.ons.gov.uk)", + None + ], + } + penguins_notes_table = pd.DataFrame.from_dict(notes) + + gpt.write_workbook( + filename="gpt_adding_notes.xlsx", + sheets=penguins_sheets, + notes_table=penguins_notes_table, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + ``` + +The notes sheet `title`, `table_name` and `instructions`can be customised by supplying these to +the `notesheet_options` parameter in `write_workbook()` or `produce_workbook()`. An updated +label can be supplied to `notesheet_label`. + +## Adding a cover sheet + +Cover sheets can be used to provide information that is general to all tables in a workbook. + +To include a cover sheet, supply text elements to the attributes of a [`Cover`](../api/classes/cover.md) object: + +```python +penguins_cover = gpt.Cover( + cover_label = "Cover", + title = "Palmer Penguins Dataset", + intro=[ + "This spreadsheet contains a table of data obtained from the palmerpenguins package", + "This is intended to be a simple example of how to use the gptables package to create a spreadsheet with a cover sheet and data sheets.", + ], + about=[ + "Additional information about your publication can go here", + ], + contact=[ + "Tel: 01234 567890", + "Email: [example@email.address](mailto: example@email.address)", + ], +) +``` +This will automatically create a cover sheet with the subheadings "Introductory information", "About these data", and "Contact" if these attributes are included. + +Additional formatting can be introduced to create further subheadings if required: + +```python +penguins_cover = gpt.Cover( + cover_label = "Cover", + title = "Palmer Penguins Dataset", + intro=[ + "This spreadsheet contains a table of data obtained from the palmerpenguins package", + "This an example of how to use the gptables package to create a spreadsheet with a cover sheet and data sheets.", + ], + about=[ + "Additional information about your publication can go here", + [{"bold": True}, "Publication dates"], + "This data tables in this spreadsheet were originally published at 7:00am 01 January 2025.", + "The next publication will be published at 7:00am 01 January 2026.", + [{"bold": True}, "Methodology notes"], + "Information on methodology can be useful to users of your data", + [{"bold": True}, "Notes, blank cells and units"], + "Some cells in the tables refer to notes which can be found in the notes worksheet. Note markers are presented in square brackets, for example: [note 1].", + "Blank cells indicate no data. An explanation of why there is no data is given in the notes worksheet.", + "Some column headings give units, when this is the case the units are presented in round brackets to differentiate them from note markers.", + ], + contact=[ + "Tel: 01234 567890", + "Email: [example@email.address](mailto: example@email.address)", + ], +) + +``` + +Supply the `Cover` to the `cover_sheet` argument of `gptables.write_workbook()`: + +```python +gpt.write_workbook( + ... + cover_sheet=penguins_cover, + ... +) +``` +A cover sheet is created with the supplied information, with the title in large bold text +followed by the introduction, information about the data, and contact details. + +![](../static/cover_sheet.png) + +The code is combined with a full example below in an extendable tab. + +??? "Adding a cover sheet" + ```python + import pandas as pd + import gptables as gpt + + penguins_data = pd.read_csv("penguins.csv") + + penguins_table = gpt.GPTable( + table = penguins_data, + table_name = "penguins_statistics", + title = "The Palmer Penguins Dataset", + subtitles = ["This is the first subtitle", + "This is another subtitle"], + scope = "Penguins", + source = "Palmer Station, Antarctica", + ) + + penguins_sheets = {"Penguins": penguins_table} + + penguins_cover = gpt.Cover( + cover_label = "Cover", + title = "Palmer Penguins Dataset", + intro=[ + "This spreadsheet contains a table of data obtained from the palmerpenguins package", + "This is intended to be a simple example of how to use the gptables package to create a spreadsheet with a cover sheet and data sheets.", + ], + about=[ + "Additional information about your publication can go here", + [{"bold": True}, "Publication dates"], + "Date published: 01 January 2025.", + "Next release: 01 January 2026.", + [{"bold": True}, "Methodology notes"], + "Information on methodology can be useful to users of your data", + [{"bold": True}, "Notes, blank cells and units"], + "Some cells in the tables refer to notes which can be found in the notes worksheet. Note markers are presented in square brackets, for example: [note 1].", + "Blank cells indicate no data. An explanation of why there is no data is given in the notes worksheet, see the column headings for which notes you should refer to.", + "Some column headings give units, when this is the case the units are presented in round brackets to differentiate them from note markers.", + ], + contact=[ + "Tel: 01234 567890", + "Email: [example@email.address](mailto: example@email.address)", + ], + ) + + gpt.write_workbook( + filename="gpt_adding_cover.xlsx", + sheets=penguins_sheets, + cover=penguins_cover, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + ``` + +## Adding additional data sheets + +For additional data sheets, construct additional GPTables: + +```python +penguins_table_1 = gpt.GPTable( + table=penguins_data_1, + table_name="penguins_statistics_1", + title="The Palmer Penguins Dataset (Sheet 1)", + subtitles=["This is the first subtitle", "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +penguins_table_2 = gpt.GPTable( + table=penguins_data_2, + table_name="penguins_statistics_2", + title="The Palmer Penguins Dataset (Sheet 2)", + subtitles=["This is the first subtitle for sheet 2", "Another subtitle for sheet 2"], + scope="Penguins", + source="Palmer Station, Antarctica", +) +``` + +Collate the GPTables with their names in a dictionary: + +```python +penguins_sheets = { + "Penguins 1": penguins_table_1, + "Penguins 2": penguins_table_2 + } +``` + +Then, use `gptables.write_workbook()` to create and write out the workbook: + +```python +gpt.write_workbook( + ... + sheets=penguins_sheets, + ... +) +``` + +The code is combined into a full example below in an extendable tab. + +??? "Adding additional data sheets" + ```python + import pandas as pd + import gptables as gpt + + penguins_data = pd.read_csv("penguins.csv") + + penguins_data_1 = penguins_data.iloc[:, :10] + penguins_data_2 = pd.concat([penguins_data.iloc[:, :3], penguins_data.iloc[:, 10:]], axis=1) + + penguins_table_1 = gpt.GPTable( + table=penguins_data_1, + table_name="penguins_statistics_1", + title="The Palmer Penguins Dataset (Sheet 1)", + subtitles=["This is the first subtitle", "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", + ) + + penguins_table_2 = gpt.GPTable( + table=penguins_data_2, + table_name="penguins_statistics_2", + title="The Palmer Penguins Dataset (Sheet 2)", + subtitles=["This is the first subtitle for sheet 2", "Another subtitle for sheet 2"], + scope="Penguins", + source="Palmer Station, Antarctica", + ) + + penguins_sheets = { + "Penguins 1": penguins_table_1, + "Penguins 2": penguins_table_2 + } + + gpt.write_workbook( + filename="python_penguins_gptable.xlsx", + sheets=penguins_sheets, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + ``` diff --git a/docs/how_to/additional_formatting.md b/docs/how_to/additional_formatting.md new file mode 100644 index 00000000..e3a8cf50 --- /dev/null +++ b/docs/how_to/additional_formatting.md @@ -0,0 +1,216 @@ +# Add additional formatting +There may be specific user needs or organisational reasons requiring +formatting other than the `gptables` defaults. If this is required regularly, consider +the use of a [theme](custom_theme.md). + +A wide range of options are possible +with the `gptable.GPTable(..., additional_formatting = ...)` parameter. See the [XlsxWriter documentation](https://xlsxwriter.readthedocs.io/format.html#format-methods-and-format-properties) for all formatting options. + +!!! warning "Consider accessibility implications to formatting changes" + Additional formatting changes the `gptables` defaults, which can introduce accessibility issues. + Refer to the Releasing statistics in spreadsheets [guidance](https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/) and consider user needs + regarding accessiblity before adjusting the formatting. + +The sample code can be run from thes +[examples](https://github.com/ONSdigital/gptables/tree/main/gptables/examples) folder. + +## Using `additional_formatting` + +The `gptable.GPTable(..., additional_formatting = ...)` parameter allows for specifying +columns, rows, and/or cells and the corresponding formatting changes to make. + +!!! warning "Formatting conflicts" + There are some conflicts between additional formatting options, for example wrapping + and shrinking text. Outputs should be reviewed for correctness. + +The option of what to format is specified, followed by the specific columns, rows, or cells, +and then the formatting changes. To change the properties of columns called Species and Island +to be center-aligned and italic, for example: + +```python +sample_additional_formatting = [ + { + "column": { + "columns": ["Species", "Island"], + "format": { + "align": "center", + "italic": True, + }, + } + } +] +``` +Columns can be referenced by name or number. Rows may only be referenced by number, with `-1` +corresponding to the last row. Column and row numbers include indices and column headings. Numeric indexing refers to position within the table, not the position in the output Excel sheet. Cell formatting takes highest precedence, followed by row formatting, and finally column formatting. + +Multiple selections of columns, rows, and cells can be made in a single `additional_formatting` list. + +```python +penguins_additional_formatting = [ + { + "column": { + "columns": ["Species", "Island"], + "format": { + "align": "center", + "italic": True, + }, + } + }, + { + "column": {"columns": [3], "format": {"left": 1}} + }, + { + "row": { + "rows": -1, + "format": { + "bottom": 1, + "indent": 2, + }, + } + }, +] +``` + +This is combined with a basic example below in an extendable tab. The result is +italicisation of two columns, left bordering on the 4th column, and indentation in the final row. + +??? "Using additional formatting" + ```python + import pandas as pd + import gptables as gpt + + penguins_data = pd.read_csv("penguins.csv") + + penguins_additional_formatting = [ + { + "column": { + "columns": ["Species", "Island"], + "format": { + "align": "center", + "italic": True, + }, + } + }, + { + "column": {"columns": [3], "format": {"left": 1}} + }, + { + "row": { + "rows": -1, + "format": { + "bottom": 1, + "indent": 2, + }, + } + }, + ] + + penguins_table = gpt.GPTable( + table = penguins_data, + table_name = "penguins_statistics", + title = "The Palmer Penguins Dataset", + subtitles = ["This is the first subtitle", + "This is another subtitle"], + scope = "Penguins", + source = "Palmer Station, Antarctica", + additional_formatting = penguins_additional_formatting, + ) + + penguins_sheets = {"Penguins": penguins_table} + + wb = gpt.produce_workbook( + filename="gpt_additional_formatting.xlsx", + sheets=penguins_sheets + ) + wb.close() + ``` + +![](../static/howto_additional_formatting.png) + +## Formatting text + +Formatting can also be applied to the text in `title`, `subtitles`, `scope`, `source` +and `legend` elements without using `additional_formatting`. Avoid using text formatting to represent data or important information, as most formatting is neither accessible nor machine readable. + +Instead of a string, provide a list of strings and dictionaries containing valid [XlsxWriter format properties](https://xlsxwriter.readthedocs.io/format.html#format-methods-and-format-properties) +and values to the relevant parameter. The formatting defined in these dictionaries will be applied to the +next string in the list. For example: + +```python +formatted_subtitles = [ + "The first subtitle", + [{"bold": True}, "This", " is another subtitle"], +] +``` + +![](../static/howto_additional_formatting_text.png) + +This is combined with a basic example below in an extendable tab. + +??? "Formatting text" + ```python + import pandas as pd + import gptables as gpt + + penguins_data = pd.read_csv("penguins.csv") + + formatted_subtitles = [ + "The first subtitle", + [{"bold": True}, "This", " is another subtitle"], + ] + + penguins_table = gpt.GPTable( + table = penguins_data, + table_name = "penguins_statistics", + title = "The Palmer Penguins Dataset", + subtitles = formatted_subtitles + scope = "Penguins", + source = "Palmer Station, Antarctica", + ) + + penguins_sheets = {"Penguins": penguins_table} + + wb = gpt.produce_workbook( + filename="additional_formatting_example.xlsx", + sheets=penguins_sheets + ) + wb.close() + ``` + +This formatting is applied in addition to the +formatting of that element specified in the [`Theme`](../api/classes/theme.md). + +!!! warning "Formatting of note references and links" + Text formatting is not currently supported if the cell also contains note + references or links. This may be changed in the future if there is + sufficient user need, so please raise an issue if this is functionality + you need. + + +## Further formatting + +`gptables` outputs can also be built on with the [Format](https://xlsxwriter.readthedocs.io/format.html#the-format-class), [Workbook](https://xlsxwriter.readthedocs.io/workbook.html#the-workbook-class) +and [Worksheet](https://xlsxwriter.readthedocs.io/worksheet.html#the-worksheet-class) classes from +XlsxWriter. + +!!! warning "Competing formatting" + Some formatting will only occur where cells do not already have formatting applied, + for example in the `gptables` global [theme](https://github.com/ONSdigital/gptables/blob/e0dc2348e8172972ddd6ea2f737cb6047f591780/gptables/themes/gptheme.yaml#L1-L4) settings. + + Consult the XlsxWriter [Worksheet class documentation](https://xlsxwriter.readthedocs.io/worksheet.html#the-worksheet-class) as well as the `gptables` [theme how-to](../how_to/custom_theme.md) for more information. + +Worksheet properties can be altered directly, for example setting row height: + +```python +ws = wb.worksheets()[0] +ws.set_row(0, 30) +``` + +Or, by using `Format` objects: + +```python +italic_format = wb.add_format({"italic": True}) +ws.set_column( + 2, 3, 10, italic_format +) +``` diff --git a/docs/how_to/custom_theme.md b/docs/how_to/custom_theme.md new file mode 100644 index 00000000..fb050562 --- /dev/null +++ b/docs/how_to/custom_theme.md @@ -0,0 +1,122 @@ +# Custom themes with `gptables` + +It might be necessary to diverge from the `gptables` defaults for organisational +needs or user requirements. Where this is required regularly or across multiple teams, it can +be helpful for reproducibility and code readability to create a custom theme as opposed to supplying +an `additional_formatting` argument to `GPTable`. + +!!! warning "Consider accessibility implications from custom themes" + Using custom themes changes the `gptables` defaults, which can introduce accessibility issues. + Refer to the Releasing statistics in spreadsheets [guidance](https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/) and consider user needs + regarding accessiblity before adjusting the formatting. + +The sample code can be run from the +[examples](https://github.com/ONSdigital/gptables/tree/main/gptables/examples) folder. + +## Global formatting + +A .yaml can reformat settings across the whole workbook from global settings, as well as +specified elements of the sheet. A basic example is shown below, where the order in which elements +appear on the datasheet is supplied under `description_order`. + +```yaml +global: + font_size: 13 + font_name: Arial + font_color: '#AD0000' + +title: + font_size: 20 + +subtitle: + font_size: 16 + +data: + text_wrap: 1 + +description_order: + - instructions + - source + - legend + - scope +``` + +The path to the theme file or folder is supplied to the theme argument of `gptables.write_workbook()` +inside of `gptables.Theme()`: + +```python + gptables.write_workbook( + filename=output_path, + sheets=sheets, + theme=gptables.Theme(theme_path), + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) +``` + +This is shown to have changed text colours and sizes as compared to the `gptables` defaults: + +![](../static/howto_theme_basic.png) + +This is combined into an extendible code block below. + +??? note "global formatting" + ```python + import pandas as pd + import gptables as gpt + + penguins_data = pd.read_csv("penguins.csv") + + penguins_table = gpt.GPTable( + table=penguins_data, + table_name="penguins_statistics", + title="The Palmer Penguins Dataset", + subtitles=["This is the first subtitle", + "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", + ) + + penguins_sheets = {"Penguins": penguins_table} + + gpt.write_workbook( + filename="gpt_custom_theme.xlsx", + sheets=penguins_sheets, + theme=gpt.Theme(example_theme_basic.yaml), + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + ``` + +## Element-wise formatting + +Theme files can also be used to modify specific elements, such as the titles and subtitles +on the cover: + +```yaml +global: + font_size: 13 + font_name: Arial + font_color: '#AD0000' + +cover_title: + font_size: 20 + +cover_subtitle: + font_size: 18 + +data: + text_wrap: 1 + +description_order: + - instructions + - source + - legend + - scope +``` + +This is shown to have formatted the sizes of the cover elements, with the global settings +dictating the other font sizes and colour throughout the workbook. + +![](../static/howto_theme_cover.png) + +Additional options available for element-wise formatting can be found in the +[theme](https://github.com/ONSdigital/gptables/blob/main/gptables/themes/gptheme.yaml) file. diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..823dbd1f --- /dev/null +++ b/docs/index.md @@ -0,0 +1,33 @@ +# Good Practice Tables (gptables) + +[![Actions build status](https://github.com/best-practice-and-impact/gptables/workflows/continuous-integration/badge.svg)](https://github.com/best-practice-and-impact/gptables/actions)[![PyPI release](https://badge.fury.io/py/gptables.svg)](https://badge.fury.io/py/gptables) + +`gptables` produces Excel spreadsheets that follow much of the +[Analysis Function guidance](https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/) on +releasing statistics in spreadsheets. This aims to implement digital accessibility standards like +[WCAG 2.2](https://www.w3.org/TR/WCAG22/) as well as follow other good practice, such as the use of notes on cover sheets. `gptables` helps users by creating spreadsheets consistently and more quickly than implementing this manually. + +## Features +Some of the key features of `gptables` outputs are: + +* A minimalist presentation style +* Coloured and underlined hyperlinks +* Default text formatting in legible fonts and sizes +* Formatted tables of contents and cover sheets +* The ability to use custom themes to adapt `gptables` outputs to your needs + +![](static/getting_started_before_and_after.png) + +**Note**: This package does not create perfectly accessible spreadsheets. Users should refer to the releasing statistics in spreadsheets [guidance](https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/) and the [spreadsheet accessibility checklist](https://analysisfunction.civilservice.gov.uk/policy-store/making-spreadsheets-accessible-a-brief-checklist-of-the-basics/) to +ensure the standards are completely met. + +## Get in touch +Get in touch at [ASAP@ons.gov.uk](mailto:ASAP@ons.gov.uk) if you use `gptables` - we'd love to feature your work on the [examples](reference/examples.md) page, and get any feedback on how we make `gptables` even better. + +Got a feature request, or found a bug? Raise an [issue](https://github.com/ONSdigital/gptables/issues) on GitHub. + +## Related Packages +`gptables` is built on top of [`XlsxWriter`](https://xlsxwriter.readthedocs.io/index.html), bringing with it much of the `XlsxWriter` functionality. +Users may also be interested in [aftables](https://best-practice-and-impact.github.io/aftables/), an R native equivalent to +`gptables`, and [csvcubed](https://onsdigital.github.io/csvcubed-docs/external/), a package for turning data and metadata into +machine-readable CSV-W files. diff --git a/docs/info/changelog.md b/docs/info/changelog.md new file mode 100644 index 00000000..f58dab44 --- /dev/null +++ b/docs/info/changelog.md @@ -0,0 +1,313 @@ +# Changelog + +All notable changes to the main branch of this project should be documented +clearly in this file. In progress (or intended changes) can also be listed +below under Unreleased. + +The changelog format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project tries its very best to adhere to +[Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +--- + +## Latest + +* **Date:** + 2025-11-11 + +**Added** + +* Merged pull request templates to create new one +* Requirements and installation steps to README +* Pre-commit hooks +* Code owners file +* More examples + +**Removed** + +* `contentsheet` notices are removed (see this changelog for the notice in v1.1.0) + +**Changed** + +* a11ytables renamed to aftables throughout +* Updated README +* Updated pull request template +* Docs moved from readthedocs to mkdocs +* Docs are restructured +* Fixed auto column widths + +### v1.2.0 + +* **Date:** + 2025-01-07 + +**Added** + +* Column width customisation to cover sheet +* Incorporated rich text functionaliy into the cover page by using List[dict, str] to format text in the intro, about, and contact sections. +* Rich text format lists can now be of length 2. +* gptables now supports python 3.11 +* Official disclaimer included at the bottom of the README and PyPI index +* pyproject.toml is now used to manage the packaging +* Bugfix to allow writing of tables containing only integers +* New examples to more simply explain things to new users +* Update documentation for additional formatting + +**Removed** + +* CI for Python 3.6 - 3.8 on Linux, as no longer supported by GitHub action `setup-python` +* Dataclasses as a dependency due to no longer supporting Python 3.6 +* setup.py as this has been replaced by the more modern pyproject.toml + +**Changed** + +* Default theme now includes cover sheet text wrapping set to True +* CI changed for trusted publishing + +### v1.1.0 + +* **Date:** + 2022-07-27 + +**Added** + +* New tests for `gptables` +* Validation for tables with null or whitespace rows, cells or column names +* Support for note references in `GPTable.instructions` attribute +* Accessibility checklist based on [Analysis Function guidance](https://analysisfunction.civilservice.gov.uk/policy-store/making-spreadsheets-accessible-a-brief-checklist-of-the-basics/) and [a11ytables documentation](https://co-analysis.github.io/a11ytables/articles/checklist.html) +* Contributers page in documentation + +**Changed** + +* Validation of GPTable text attributes. Error will be raised if `title` or + `instructions` is `None`, or if an entry in the `subtitle` or + `legend` lists is `None`. +* Running package tests now requires pytest 6.2.5+, to support Python 3.10 +* CI configuration to run tests on both Linux and Windows with Python 3.6-3.10 + and calculate coverage based on core functionality and utilites +* Restructure and rewording of changelog +* Package author and maintainer information + +**Deprecated** + +* `contentsheet` parameter of `write_workbook` will be removed in v2 of + gptables. Please use `contentsheet_label` instead. +* Ability to reference notes within `GPTable.table.columns` will be removed + in v2 of gptables. Please use `GPTable.table_notes` to ensure references + are correctly placed and ordered. + +**Fixed** + +* `contentsheet_label` parameter added to `write_workbook`. Previously + parameter was included in documentation but was misnamed in function. +* `auto_width` now functions as expected for columns with links or rich text + columns using Python 3.6 and 3.7, as well as for numeric columns using + Python 3.6+ +* Trailing whitespace is no longer added when `units` or `table_notes` are + added to column headers +* Providing `table_notes` will no longer break additional formatting +* Rich text in `instructions` property will no longer raise an error +* Note order now takes into account custom `description_order` in `Theme` +* Special character only cell validation now includes underscores +* Image alt text appears when building user documentation +* CI deploys documentation in full + +### v1.0.0 + +* **Date:** + 2022-06-04 + +**Added** + +Cover: + +* links formatted using the markdown format of `"[display text](link)"` will be rendered with the display text showing and the link applying for the corresponding cell. Links must start with `http://`, `https://`, `ftp://`, `mailto:`, `internal:` or `external:` + +Table of contents: + +* contents page added to workbook by default. Can be disabled or customised by supplying `contentsheet_label` and `contentsheet_options` parameters to `produce_workbook` or `write_workbook`. +* links can be used in text elements passed to `contentsheet_options`, see above + +Notes: + +* notes page added to workbook if `notes_table` is provided. Can be customised by supplying `notesheet_label` and `notesheet_options` parameters to `produce_workbook` or `write_workbook`. +* links can be used in `notes_table` and text elements passed to `notesheet_options`, see above + +Data tables: + +* `GPTable.table` will be marked up as a worksheet table in Excel +* `table_name` property added to `GPTable` class. This must be provided for accessibility. +* `instructions` property added to `GPTable` class. If this is not provided, a default value will be used. +* `table_notes` property added to `GPTable` class. This allows note references to be added to the column header row. If used, they will be positioned below the column name and units. +* validation for `GPTable.table` column names - all columns must be named and the names must be unique +* links can be used in `GPTable.table` and text elements + +Theme: + +* `instructions_format` added to `Theme`. This can be used to customise the format of the `GPTable.instructions` element. + +Examples: + +* example added to demonstrate the use of a custom theme YAML + +**Changed** + +API functions: + +* `auto_width` property of `produce_workbook` and `write_workbook` now defaults to `True` rather than `False` + +Notes: + +* notes are now numbered according to position in workbook, starting from cell A1 of the first data sheet. Previously, notes were ordered independently for each worksheet +* note references in text elements are moved to the end of the text. This is to make them more accessible and avoid disrupting the text. + +Data tables: + +* `units` are now written on a new line with the the corresponding column heading cell, instead of above the table +* `units` property of `GPTable` is now optional, and should be provided as `dict` (`str` no longer supported) +* `scope` property of `GPTable` is now optional, as this information may be included in title or subtitles +* `source` property of `GPTable` is now optional, as this information should be included in cover sheet if it is the same across sheets +* `legend` property as `GPTable` is now optional + +Theme: + +* default theme changed to be more accessible, inparticular, font sizes increased to at least 12pt and font colour set to automatic. Note: compatibility issues with LibreOffice and automatic font colour +* `footer_order` property of `Theme` replaced by `description_order`, as corresponding metadata have been moved from below to above table. Valid elements are now `instructions`, `source`, `legend` and `scope`. + +Examples: + +* examples updated to reflect new functionality + +**Removed** + +API functions: + +* `quick_and_dirty` function removed, as it is inaccessible and does not demonstrate good practice +* `disable_footer_parentheses` removed, as footer is inaccessible and parenetheses not good practice + +Cover: + +* `additional_elements` property removed from `Cover` class. This is because table of contents is now generated on contentsheet not cover. + +Notes: + +* `notes` and `annotations` properties removed from `GPTable` class. Notes are no longer displayed on data worksheets + +Data tables: + +* `include_index_column_headings` property removed from `GPTable` class, index column headers now always written, for accessibility + +Theme: + +* `annotations_format` and `notes_format` options removed from `Theme`, as `annotations` and `notes` no longer written to data worksheets +* `missing_value` option removed from `Theme`. Unavailable or white-space table entries are now written as blank cells, and the user is invited to consider the GSS guidance on symbols and shorthand in spreadsheets + +**Fixed** + +* incorrect version numbers in changelog +* minor typos in docs + +### v0.4.0 + +* **Date:** + 2021-01-30 + +**Fixed** + +* bug where setting a GPTable’s scope to `None` resulted in the units also not being displayed +* depreciation warning when running tests + +**Added** + +* `include_index_column_headings` option to `GPTable`, so that users can display index column headers if they wish. Defaults to `False` for backwards compatibility. + +### v0.3.2 + +* **Date:** + 2020-08-24 + +**Fixed** + +* bug in Cover post_init where `additional_elements` is None (it’s default value…) +* more minor typos in docs +* incorrect version numbers in changelog + +### v0.3.1 + +* **Date:** + 2020-08-24 + +**Fixed** + +* incorrect `if __name__ == "__main__"` in example files +* minor typos in docs + +### v0.3.0 + +* **Date:** + 2020-08-24 + +**Added** + +* `Cover` dataclass, to provide text elements for cover pages. Provided via `cover` parameter of API functions. +* `write_cover` and associated `GPWorksheet` methods, for writing a cover page as the first sheet in a GPWorkbook +* additional `Theme` attributes for `Cover` text elements +* documentation for `Cover` class and example usage + +**Fixed** + +* loads of typos in documentation +* broken CI deployment of docs - code includes were not working + +### v0.2.0 + +* **Date:** + 2020-07-10 + +**Fixed** + +* stacking of parentheses around footer elements when a `GPTable` was used more than once +* duplication of `missing_value` in legend when multiple missing values were present +* rst syntax in docs and readme (some bits of Markdown were hanging around) + +**Added** + +* “quick and dirty” API function, for when you just want tables and you want them now +* functionality to automatically determine column widths - available via `auto_width` parameter in API functions +* ability to disable addition of parenetheses to footer element text + +**Changed** + +* removed `num_format` property from `data` element of default theme +* Updated documentation of examples +* Completely updated online documentation, so that the package might actually be usable + +### v0.1.3 + +* **Date:** + 2020-03-06 + +**Fixed** + +* missing files in binary distribution. v0.1.1 and v0.1.2 will be deleted from + PyPI to prevent use of broken distributions. + +**Added** + +* this changelog to the documentation! + +**Changed** + +* README to reflect description of package. Dropped developer install + instructions. + +### v0.1.1 + +* **Date:** + 2020-03-05 + +**Added** + +* gptables package - see README and documentation for usage +* build and deployment of [documentation](https://best-practice-and-impact.github.io/gptables/) +* deployment to [PyPI](https://pypi.org/project/gptables/) diff --git a/docs/info/contributing.md b/docs/info/contributing.md new file mode 100644 index 00000000..1b7b0e6a --- /dev/null +++ b/docs/info/contributing.md @@ -0,0 +1,36 @@ +# Contributing + +When contributing to this repository, please first discuss the change you wish +to make using [GitHub Issues](https://github.com/ONSdigital/gptables/issues). Alternatively, +get in touch with the Analysis Standards and Pipelines Team at the Office for National Statistics +via [email](mailto:ASAP@ons.gov.uk). + +## Pull request process + +1. Branch from the `dev` branch with a descriptive name, e.g. `feature-xyx`, `bug-xyz`, or by referencing an issue number. +1. Update relevant documentation: + - Update Markdown (`.md`) files in the `docs/` folder if necessary. + - Ensure function and class docstrings are clear and complete - documentation is generated from these using `mkdocs` and `mkdocstrings`. + - Update the `README.md` file if needed. +1. Once you are ready for review please open a pull request to the + `dev` branch. + - Please fill in the template as much as possible so the changes are clear to a colleague without + prior knowledge of the changes. + - This will be merged by maintainers following their approval. + +Thank you for contributing to `gptables`! + +## Code style + +- We name variables using few nouns in lowercase, e.g. `mapping_names` + or `increment`. +- We name functions using verbs in lowercase, e.g. `map_variables_to_names` or + `change_values`. +- We use the [numpydoc](https://numpydoc.readthedocs.io/en/latest/format.html) + format for documenting features using docstrings. +- Code formatting and linting are enforced via pre-commit hooks. Please install them by running `pre-commit install` in the root directory of the repository. + +## Testing + +- We use pytest for testing. All tests must pass. +- Please ensure that you have added tests for any new features or bug fixes, or raised an issue prompting the team to add this. diff --git a/docs/info/contributors.md b/docs/info/contributors.md new file mode 100644 index 00000000..9819c033 --- /dev/null +++ b/docs/info/contributors.md @@ -0,0 +1,8 @@ +# Contributors + +`gptables` is maintained by the Analysis Standards and Pipelines team at the Office +for National Statistics. + +Our thanks goes to users and stakeholders who have contributed and continue to give feedback on `gptables`. + +If you'd like to contribute contact the team at [ASAP@ons.gov.uk](mailto:ASAP@ons.gov.uk), or get involved in the [GitHub Issue](https://github.com/ONSdigital/gptables/issues) discussions. diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 6247f7e2..00000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -if "%1" == "" goto help - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.http://sphinx-doc.org/ - exit /b 1 -) - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/reference/checklist.md b/docs/reference/checklist.md new file mode 100644 index 00000000..00bb5e6f --- /dev/null +++ b/docs/reference/checklist.md @@ -0,0 +1,72 @@ +# Accessibility checklist + +The tables below indicate your accessibility responsibilities when publishing +statistics in spreadsheets. It is based on the Analysis Function [checklist of +the basics](https://analysisfunction.civilservice.gov.uk/policy-store/making-spreadsheets-accessible-a-brief-checklist-of-the-basics/) and heavily inspired by the [aftables documentation](https://best-practice-and-impact.github.io/aftables/articles/checklist.html). + +If you need further advice on applying accessibility and presentation standards, you can contact your department's [GSS Presentation Champion](https://analysisfunction.civilservice.gov.uk/government-statistical-service-and-statistician-group/champion-networks/presentation-champions/). Presentation Champions work across government to promote best practice in the presentation of statistics and can direct you to appropriate guidance and support. + +#### NOTE +The tables show which checklist items are automatically met by +gptables. This applies to workbooks created using the default `gptheme` +and may not apply if custom themes or additional formatting are used. + +## Table + +| Description | Essential? | Status | Explanation | +|--------------------------------------------------------------|--------------|-----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Mark up tables | Essential | Implemented | Tables in a `GPWorkbook`, including notes table and table of contents
are marked as tables by default. | +| Give tables meaningful names | Desirable | Partially implemented | Pass a meaningful name to the `GPTable.table_name` property. | +| Remove merged cells, split cells and nested tables | Essential | Implemented | Merged cell, split cells and nested tables are not supported by gptables. | +| Remove blank rows and columns within tables | Essential | Partially implemented | Blank rows or columns in a column will raise an error. User should
remove them and any apply any desired additional formatting. | +| All tables should have one tagged header row | Essential | Implemented | The column names in a `GPTable.table` will be tagged as the header. | +| Wrap text within cells | Essential | Partially implemented | Using `auto_width = True` (default value) will enable all text to be
visible. This feature is experimental and some customisation may be
desired. | +| Avoid adding filters and freeze panes | Desirable | Implemented | Filters and freeze panes are not supported by gptables. | +| Only leave cells with no data empty in certain circumstances | Essential | Partially implemented | If cells are null or whitespace, users are prompted to explain why in the
`GPTables.instructions` property. If there is more than one reason
for missingness, use the [appropriate shorthand](https://analysisfunction.civilservice.gov.uk/policy-store/symbols-in-tables-definitions-and-help/) and explain in the
`GPTables.legend` property. | +| Avoid hiding rows or columns | Desirable | Implemented | Hiding rows or columns is not supported by gptables. | + +## Footnotes + +| Description | Essential? | Status | Explanation | +|--------------------------------------------------------|--------------|-----------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Do not use symbols or superscript to signpost to notes | Essential | Implemented | Notes marked with `$$note_ref$$` will be formatted as `[note n]`,
where `n` is the order the note appears in the workbook. | +| Use the word ‘note’ when referring to footnotes | Desirable | Implemented | As mentioned above, notes are formatted as `[note n]`. | +| Avoid putting note markers in specific cells | Desirable | Partially implemented | The `$$note_ref$$` functionality is not supported within data cells
in tables. It is the user’s responsibility to not add notes manually to
data cells. | +| Put note text in a notes table on a notes worksheet | Desirable | Implemented | If users provide a `notes_table` when producing or writing a workbook,
a notes worksheet will be created. | + +## Formatting + +| Description | Essential? | Status | Explanation | +|-------------------------------------------------------------------------------------------|--------------|-----------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| All written content needs to meet the accessibility guidelines | Essential | Not implemented | It is the package user’s responsibility to make sure that text follows
the Analysis Function guidance on [making written content accessible](https://analysisfunction.civilservice.gov.uk/policy-store/making-analytical-publications-accessible/#section-3). | +| Links must be accessible | Essential | Partially implemented | Users should provide descriptive hyperlink text using the
`(display text)[link]` syntax. | +| Format text to make it accessible | Desirable | Implemented | The default theme meets the accessibility guidance on formatting text. | +| All worksheets should have descriptive titles which are properly tagged
and formatted | Essential | Partially implemented | Provide descriptive titles to the `GPTable.title` and `subtitles`
properties. Note: heading tagging in Excel does not meet the standard
required of webpage heading tagging. | +| Avoid using symbols in general | Desirable | Partially implemented | An error will be raised if table cells only contain symbols. It is the
user’s responsibility to make sure symbol use within text is appropriate. | +| Do not use headers and footers, floating text boxes or floating toolbars | Essential | Implemented | These components are not supported by gptables. | +| Do not use visual devices to divide data regions | Desirable | Implemented | Using gptables without additional formatting does not use such visual devices. | +| Do not use a background fill | Desirable | Implemented | The gptables default theme does not apply a background fill. | +| Do not use colour as the only way to convey a message | Essential | Implemented | The default theme without additional formatting does not apply colour. | +| When using colour for emphasis check the contrast | Essential | Not implemented | If using colour via additional formatting or a custom theme, it is the
user’s responsibility to check the colour contrast. | +| Avoid images in spreadsheets | Desirable | Implemented | Adding images is not supported by gptables. | +| Remove macros | Desirable | Implemented | Macros are not supported by gptables. | + +## Structure + +| Description | Essential? | Status | Explanation | +|-----------------------------------------------------------|--------------|-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Give worksheets unique names or numbers | Essential | Implemented | Worksheet names come from the `sheets = {"label": gptable}` property.
If names are duplicated, the final `label: gptable` pair will be used. | +| Remove blank worksheets | Essential | Implemented | Blank worksheets are not supported by gptables. | +| Use cells in column A wisely | Essential | Implemented | `GPTable` attributes are written to column A. Title and subtitles are
first. The order of the remaining descriptive attributes can be
customised by creating a custom theme with a different `description_order`. | +| Position tables against the left-hand edges of each sheet | Essential | Implemented | gptables writes tables starting in column A. | +| Avoid putting content below a table | Desirable | Implemented | Writing content below a table is not supported in gptables>=1.0.0. | +| Avoid worksheets with multiple tables | Desirable | Implemented | Writing multiple tables per sheet is not supported in gptables. | + +## Before publishing + +| Description | Essential? | Status | Explanation | +|-----------------------------------------------------------------------------------|--------------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Run a spelling and grammar check | Essential | Not implemented | gptables does not check spelling and grammar, this is the user’s
responsibility. | +| Use the accessibility checker | Desirable | Not implemented | gptables does not have a built-in accessibility checker. Whilst all
efforts have been taken to make outputs accessible, the final
responsibility sits with the user. | +| Add document information | Essential | Not implemented | gptables does not add title or language information to the document,
this responsibility sits with the user. Note: the document properties
available depend on the user’s operating system and may not meet
the standard required for webpages. | +| Ensure the cursor is in cell A1 of the first worksheet when doing your final save | Essential | Implemented | Workbooks written using gptables will have the cursor in the first cell.
Note: if the workbook is subsequently opened and saved, it is the user’s
responsibility to check that the cursor has not been moved. | diff --git a/docs/reference/examples.md b/docs/reference/examples.md new file mode 100644 index 00000000..869bca31 --- /dev/null +++ b/docs/reference/examples.md @@ -0,0 +1,143 @@ +# gptables in use + +This page provides examples of gptables in use with real data processes. + +If you'd like your work featured here, reach out to us at ASAP@ons.gov.uk! + +## Labour market overview tables +This example replicates the Labour Market overview [accessible +spreadsheet](https://analysisfunction.civilservice.gov.uk/policy-store/further-resources-for-releasing-statistics-in-spreadsheets/) example by the Analysis Function, based +on data from December 2020. + +??? "Labour market overview tables code" + ```python + + from pathlib import Path + import pandas as pd + import gptables as gpt + + # Read data and arrange + parent_dir = Path(__file__).parent + + labour_market_data = pd.read_csv(parent_dir / "survey_data.csv") + labour_market_data.dropna( + axis=0, how="all", inplace=True + ) # Remove empty rows in the data + labour_market_data.dropna( + axis=1, how="all", inplace=True + ) # Remove columns rows in the data + col_names = [ + "Time period and dataset code row", + "Number of people", + "Economically active", + "Employment level", + "Unemployment level", + "Economically inactive", + "Economically active rate", + "Employment rate", + "Unemployment rate", + "Economically inactive rate", + ] + labour_market_data.columns = col_names + + + # Define table elements + table_name = "Labour_market_overview_accessibility_example_Nov21" + title = "Number and percentage of population aged 16 and over in each labour market activity group, UK, seasonally adjusted" + subtitles = [ + "This worksheet contains one table. Some cells refer to notes which can be found on the notes worksheet." + ] + units = { + 1: "thousands", + 2: "thousands", + 3: "thousands", + 4: "thousands", + 5: "thousands", + 6: "%", + 7: "%", + 8: "%", + 9: "%", + } + table_notes = { + 2: "$$note 1$$", + 3: "$$note 2$$", + 4: "$$note 2$$", + 5: "$$note 3$$", + 7: "$$note 4$$", + 8: "$$note 4$$", + 9: "$$note 4$$", + } + scope = "Labour Market" + source = "Source: Office for National Statistics" + index = {2: 0} # Column 0 is a level 2 index + additional_formatting = [ + { + "row": { + "rows": [1], + "format": {"bold": True, "font_size": 14}, + } + } + ] + + # Define our GPTable + survey_table = gpt.GPTable(table=labour_market_data, **kwargs) + + sheets = {"sheet 1a": survey_table} + + cover = gpt.Cover( + cover_label="Cover", + title="Labour market overview data tables, UK, December 2020 (accessibility example)", + intro=[ + "This spreadsheet contains a selection of the data tables published alongside the Office for National Statistics' Labour market overview for December 2020. We have edited these data tables and the accompanying cover sheet, table of contents and notes worksheet to meet the legal accessibility regulations. It is intended to be an example of an accessible spreadsheet. The data tables and accompanying information have not been quality assured. Please see the original statistical release if you are looking for accurate data.", + "[Labour market overview, UK: December 2020](https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/bulletins/uklabourmarket/december2020)", + ], + about=[ + [{"bold": True, "font_size": 14}, "Publication dates"], + "This data tables in this spreadsheet were originally published at 7:00am 15 December 2020", + "The next publication was published at 7:00am 26 January 2021.", + [{"bold": True, "font_size": 14}, "Note on weighting methodology"], + "Due to the coronavirus (COVID19) pandemic, all face to face interviewing for the Labour Force Survey was suspended and replaced with telephone interviewing. This change in mode for first interviews has changed the non-response bias of the survey, affecting interviews from March 2020 onwards. All data included in this spreadsheet have now been updated and are based on latest weighting methodology.", + "More information about the impact of COVID19 on the Labour Force Survey", + "Dataset identifier codes", + "The four-character identification codes appearing in the tables are the ONS' references for the data series.", + [{"bold": True, "font_size": 14}, "Comparing quarterly changes"], + "When comparing quarterly changes ONS recommends comparing with the previous non-overlapping three-month average time period, for example, compare Apr to Jun with Jan to Mar, not with Mar to May.", + [{"bold": True, "font_size": 14}, "Units, notes and no data"], + "Some cells in the tables refer to notes which can be found in the notes worksheet. Note markers are presented in square brackets, for example: [note 1].", + "Some cells have no data, when this is the case the words 'no data' are presented in square brackets, for example: '[no data]'. An explanation of why there is no data is given in the notes worksheet, see the column headings for which notes you should refer to.", + "Some column headings give units, when this is the case the units are presented in round brackets to differentiate them from note markers.", + [ + {"bold": True, "font_size": 14}, + "Historic publication dates for labour market statistics", + " ", + ], + "The monthly labour market statistics release was first published in April 1998. Prior to April 1998 there was no integrated monthly release and the Labour Force Survey estimates were published separately, on different dates, from other labour market statistics. From April 2018 the usual publication day for the release was changed from Wednesday to Tuesday.", + [{"bold": True, "font_size": 14}, "More labour market data"], + "Other labour market datasets are available on the ONS website.", + "Labour market statistics time series dataset on the ONS website.", + ], + contact=[ + "Tel: 01633455400", + "Email: [labour.market@ons.gov.uk](mailto:labour.market@ons.gov.uk)", + ], + ) + + # Notesheet + notes_table = pd.read_csv(parent_dir / "survey_data_notes.csv") + notes_table.dropna(axis=0, how="all", inplace=True) # Remove empty rows in the data + notes_table.dropna(axis=1, how="all", inplace=True) # Remove columns rows in the data + notes_table.columns = ["Note reference", "Note text"] + + output_path = parent_dir / "python_survey_data_gptable.xlsx" + gpt.write_workbook( + filename=output_path, + sheets=sheets, + cover=cover, + notes_table=notes_table, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + auto_width=True, + gridlines="show_all", + cover_gridlines=True, + ) + print("Output written at: ", output_path) + ``` diff --git a/docs/reference/glossary.md b/docs/reference/glossary.md new file mode 100644 index 00000000..b3816415 --- /dev/null +++ b/docs/reference/glossary.md @@ -0,0 +1,19 @@ +# Glossary of Terms + +## Cover +A cover sheet of a spreadsheet. Contains information about the dataset(s) in the Excel workbook, +such as publication dates and methodology notes. Handled by the [`Cover`](../api/classes/cover.md) class. + +## Element +A [part of a table](../getting_started/elements.md), such as a title or subtitle. Relevant when defining values of elements in a `GPTable`. + +## GPTable +A [`Good Practice Table object`](../api/classes/gptable.md), which has data and elements such as title, subtitles, and column units +associated with it. + +## Placeholder +A [note](../getting_started/tutorial.md#adding-notes) reference denoted by double dollar notation, e.g. `$$placeholder_text$$`. These are +replaced by numbers when the workbook is written to file. + +## Theme +A [.yaml file](../how_to/custom_theme.md) describing pre-set formatting options. diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 9a44861f..00000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -gptables -sphinx>=2 -sphinx_rtd_theme diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst deleted file mode 100644 index f7efeaeb..00000000 --- a/docs/source/changelog.rst +++ /dev/null @@ -1,311 +0,0 @@ -********* -Changelog -********* - -All notable changes to the master branch of this project should be documented -clearly in this file. In progress (or intended changes) can also be listed -below under Unreleased. - -The changelog format is based on `Keep a Changelog `_, -and this project tries its very best to adhere to -`Semantic Versioning `_. - ------------------------------------------------------------------------- - -Unreleased -=================== -:Date: 2025-04-25 - -**Added** - -* Added a 'closed issue log' - -**Changed** - -* a11ytables renamed to aftables -* Updated pull request template - -Released (PyPI) -=============== - -v1.2.0 ------- -:Date: 2025-01-07 - -**Added** - -* Column width customisation to cover sheet -* Incorporated rich text functionaliy into the cover page by using List[dict, str] to format text in the intro, about, and contact sections. -* Rich text format lists can now be of length 2. -* gptables now supports python 3.11 -* Official disclaimer included at the bottom of the README and PyPI index -* pyproject.toml is now used to manage the packaging -* Bugfix to allow writing of tables containing only integers -* New examples to more simply explain things to new users -* Update documentation for additional formatting - -**Removed** - -* CI for Python 3.6 - 3.8 on Linux, as no longer supported by GitHub action ``setup-python`` -* Dataclasses as a dependency due to no longer supporting Python 3.6 -* setup.py as this has been replaced by the more modern pyproject.toml - -**Changed** - -* Default theme now includes cover sheet text wrapping set to True -* CI changed for trusted publishing - - -v1.1.0 ------- -:Date: 2022-07-27 - -**Added** - -* New tests for ``gptables`` -* Validation for tables with null or whitespace rows, cells or column names -* Support for note references in ``GPTable.instructions`` attribute -* Accessibility checklist based on `Analysis Function guidance`_ and `a11ytables documentation`_ -* Contributers page in documentation - -.. _`Analysis Function guidance`: https://analysisfunction.civilservice.gov.uk/policy-store/making-spreadsheets-accessible-a-brief-checklist-of-the-basics/ -.. _`a11ytables documentation`: https://co-analysis.github.io/a11ytables/articles/checklist.html - -**Changed** - -* Validation of GPTable text attributes. Error will be raised if ``title`` or - ``instructions`` is ``None``, or if an entry in the ``subtitle`` or - ``legend`` lists is ``None``. -* Running package tests now requires pytest 6.2.5+, to support Python 3.10 -* CI configuration to run tests on both Linux and Windows with Python 3.6-3.10 - and calculate coverage based on core functionality and utilites -* Restructure and rewording of changelog -* Package author and maintainer information - -**Deprecated** - -* ``contentsheet`` parameter of ``write_workbook`` will be removed in v2 of - gptables. Please use ``contentsheet_label`` instead. -* Ability to reference notes within ``GPTable.table.columns`` will be removed - in v2 of gptables. Please use ``GPTable.table_notes`` to ensure references - are correctly placed and ordered. - -**Fixed** - -* ``contentsheet_label`` parameter added to ``write_workbook``. Previously - parameter was included in documentation but was misnamed in function. -* ``auto_width`` now functions as expected for columns with links or rich text - columns using Python 3.6 and 3.7, as well as for numeric columns using - Python 3.6+ -* Trailing whitespace is no longer added when ``units`` or ``table_notes`` are - added to column headers -* Providing ``table_notes`` will no longer break additional formatting -* Rich text in ``instructions`` property will no longer raise an error -* Note order now takes into account custom ``description_order`` in ``Theme`` -* Special character only cell validation now includes underscores -* Image alt text appears when building user documentation -* CI deploys documentation in full - - -v1.0.0 ------- -:Date: 2022-06-04 - -**Added** - -Cover: - -* links formatted using the markdown format of ``"[display text](link)"`` will be rendered with the display text showing and the link applying for the corresponding cell. Links must start with ``http://``, ``https://``, ``ftp://``, ``mailto:``, ``internal:`` or ``external:`` - -Table of contents: - -* contents page added to workbook by default. Can be disabled or customised by supplying ``contentsheet_label`` and ``contentsheet_options`` parameters to ``produce_workbook`` or ``write_workbook``. -* links can be used in text elements passed to ``contentsheet_options``, see above - -Notes: - -* notes page added to workbook if ``notes_table`` is provided. Can be customised by supplying ``notesheet_label`` and ``notesheet_options`` parameters to ``produce_workbook`` or ``write_workbook``. -* links can be used in ``notes_table`` and text elements passed to ``notesheet_options``, see above - -Data tables: - -* ``GPTable.table`` will be marked up as a worksheet table in Excel -* ``table_name`` property added to ``GPTable`` class. This must be provided for accessibility. -* ``instructions`` property added to ``GPTable`` class. If this is not provided, a default value will be used. -* ``table_notes`` property added to ``GPTable`` class. This allows note references to be added to the column header row. If used, they will be positioned below the column name and units. -* validation for ``GPTable.table`` column names - all columns must be named and the names must be unique -* links can be used in ``GPTable.table`` and text elements - -Theme: - -* ``instructions_format`` added to ``Theme``. This can be used to customise the format of the ``GPTable.instructions`` element. - -Examples: - -* example added to demonstrate the use of a custom theme YAML - - -**Changed** - -API functions: - -* ``auto_width`` property of ``produce_workbook`` and ``write_workbook`` now defaults to ``True`` rather than ``False`` - -Notes: - -* notes are now numbered according to position in workbook, starting from cell A1 of the first data sheet. Previously, notes were ordered independently for each worksheet -* note references in text elements are moved to the end of the text. This is to make them more accessible and avoid disrupting the text. - -Data tables: - -* ``units`` are now written on a new line with the the corresponding column heading cell, instead of above the table -* ``units`` property of ``GPTable`` is now optional, and should be provided as ``dict`` (``str`` no longer supported) -* ``scope`` property of ``GPTable`` is now optional, as this information may be included in title or subtitles -* ``source`` property of ``GPTable`` is now optional, as this information should be included in cover sheet if it is the same across sheets -* ``legend`` property as ``GPTable`` is now optional - -Theme: - -* default theme changed to be more accessible, inparticular, font sizes increased to at least 12pt and font colour set to automatic. Note: compatibility issues with LibreOffice and automatic font colour -* ``footer_order`` property of ``Theme`` replaced by ``description_order``, as corresponding metadata have been moved from below to above table. Valid elements are now ``instructions``, ``source``, ``legend`` and ``scope``. - -Examples: - -* examples updated to reflect new functionality - -**Removed** - -API functions: - -* ``quick_and_dirty`` function removed, as it is inaccessible and does not demonstrate good practice -* ``disable_footer_parentheses`` removed, as footer is inaccessible and parenetheses not good practice - -Cover: - -* ``additional_elements`` property removed from ``Cover`` class. This is because table of contents is now generated on contentsheet not cover. - -Notes: - -* ``notes`` and ``annotations`` properties removed from ``GPTable`` class. Notes are no longer displayed on data worksheets - -Data tables: - -* ``include_index_column_headings`` property removed from ``GPTable`` class, index column headers now always written, for accessibility - -Theme: - -* ``annotations_format`` and ``notes_format`` options removed from ``Theme``, as ``annotations`` and ``notes`` no longer written to data worksheets -* ``missing_value`` option removed from ``Theme``. Unavailable or white-space table entries are now written as blank cells, and the user is invited to consider the GSS guidance on symbols and shorthand in spreadsheets - -**Fixed** - -* incorrect version numbers in changelog -* minor typos in docs - - -v0.4.0 ------- -:Date: 2021-01-30 - -**Fixed** - -* bug where setting a GPTable's scope to ``None`` resulted in the units also not being displayed -* depreciation warning when running tests - -**Added** - -* ``include_index_column_headings`` option to ``GPTable``, so that users can display index column headers if they wish. Defaults to ``False`` for backwards compatibility. - - -v0.3.2 ------- -:Date: 2020-08-24 - - -**Fixed** - -* bug in Cover post_init where ``additional_elements`` is None (it's default value...) -* more minor typos in docs -* incorrect version numbers in changelog - - -v0.3.1 ------- -:Date: 2020-08-24 - - -**Fixed** - -* incorrect ``if __name__ == "__main__"`` in example files -* minor typos in docs - - -v0.3.0 ------- -:Date: 2020-08-24 - -**Added** - -* ``Cover`` dataclass, to provide text elements for cover pages. Provided via ``cover`` parameter of API functions. -* ``write_cover`` and associated ``GPWorksheet`` methods, for writing a cover page as the first sheet in a GPWorkbook -* additional ``Theme`` attributes for ``Cover`` text elements -* documentation for ``Cover`` class and example usage - -**Fixed** - -* loads of typos in documentation -* broken CI deployment of docs - code includes were not working - - -v0.2.0 ------- -:Date: 2020-07-10 - -**Fixed** - -* stacking of parentheses around footer elements when a ``GPTable`` was used more than once -* duplication of ``missing_value`` in legend when multiple missing values were present -* rst syntax in docs and readme (some bits of Markdown were hanging around) - -**Added** - -* "quick and dirty" API function, for when you just want tables and you want them now -* functionality to automatically determine column widths - available via ``auto_width`` parameter in API functions -* ability to disable addition of parenetheses to footer element text - -**Changed** - -* removed ``num_format`` property from ``data`` element of default theme -* Updated documentation of examples -* Completely updated online documentation, so that the package might actually be usable - - -v0.1.3 ------- -:Date: 2020-03-06 - -**Fixed** - -* missing files in binary distribution. v0.1.1 and v0.1.2 will be deleted from - PyPI to prevent use of broken distributions. - -**Added** - -* this changelog to the documentation! - - -**Changed** - -* README to reflect description of package. Dropped developer install - instructions. - - -v0.1.1 ------- -:Date: 2020-03-05 - -**Added** - -* gptables package - see README and documentation for usage -* build and deployment of `documentation `_ -* deployment to `PyPI `_ diff --git a/docs/source/checklist.rst b/docs/source/checklist.rst deleted file mode 100644 index 61824984..00000000 --- a/docs/source/checklist.rst +++ /dev/null @@ -1,251 +0,0 @@ -*********************** -Accessibility checklist -*********************** - -The tables below indicate your accessibility responsibilities when publishing -statistics in spreadsheets. It is based on the Analysis Function `checklist of -the basics`_ and heavily inspired by the `aftables documentation`_. - -.. _`checklist of the basics`: https://analysisfunction.civilservice.gov.uk/policy-store/making-spreadsheets-accessible-a-brief-checklist-of-the-basics/ -.. _`aftables documentation`: https://best-practice-and-impact.github.io/aftables/articles/checklist.html - -.. note:: The tables show which checklist items are automatically met by - gptables. This applies to workbooks created using the default ``gptheme`` - and may not apply if custom themes or additional formatting are used. - -Table ------ - -.. list-table:: - :header-rows: 1 - :widths: 24 17 19 40 - - * - Description - - Essential? - - Status - - Explanation - * - Mark up tables - - Essential - - Implemented - - Tables in a ``GPWorkbook``, including notes table and table of contents - are marked as tables by default. - * - Give tables meaningful names - - Desirable - - Partially implemented - - Pass a meaningful name to the ``GPTable.table_name`` property. - * - Remove merged cells, split cells and nested tables - - Essential - - Implemented - - Merged cell, split cells and nested tables are not supported by gptables. - * - Remove blank rows and columns within tables - - Essential - - Partially implemented - - Blank rows or columns in a column will raise an error. User should - remove them and any apply any desired additional formatting. - * - All tables should have one tagged header row - - Essential - - Implemented - - The column names in a ``GPTable.table`` will be tagged as the header. - * - Wrap text within cells - - Essential - - Partially implemented - - Using ``auto_width = True`` (default value) will enable all text to be - visible. This feature is experimental and some customisation may be - desired. - * - Avoid adding filters and freeze panes - - Desirable - - Implemented - - Filters and freeze panes are not supported by gptables. - * - Only leave cells with no data empty in certain circumstances - - Essential - - Partially implemented - - If cells are null or whitespace, users are prompted to explain why in the - ``GPTables.instructions`` property. If there is more than one reason - for missingness, use the `appropriate shorthand`_ and explain in the - ``GPTables.legend`` property. - * - Avoid hiding rows or columns - - Desirable - - Implemented - - Hiding rows or columns is not supported by gptables. - -.. _`appropriate shorthand`: https://analysisfunction.civilservice.gov.uk/policy-store/symbols-in-tables-definitions-and-help/ - - -Footnotes ---------- - -.. list-table:: - :header-rows: 1 - :widths: 24 17 19 40 - - * - Description - - Essential? - - Status - - Explanation - * - Do not use symbols or superscript to signpost to notes - - Essential - - Implemented - - Notes marked with ``$$note_ref$$`` will be formatted as ``[note n]``, - where ``n`` is the order the note appears in the workbook. - * - Use the word 'note' when referring to footnotes - - Desirable - - Implemented - - As mentioned above, notes are formatted as ``[note n]``. - * - Avoid putting note markers in specific cells - - Desirable - - Partially implemented - - The ``$$note_ref$$`` functionality is not supported within data cells - in tables. It is the user's responsibility to not add notes manually to - data cells. - * - Put note text in a notes table on a notes worksheet - - Desirable - - Implemented - - If users provide a ``notes_table`` when producing or writing a workbook, - a notes worksheet will be created. - - -Formatting ----------- - -.. list-table:: - :header-rows: 1 - :widths: 24 17 19 40 - - * - Description - - Essential? - - Status - - Explanation - * - All written content needs to meet the accessibility guidelines - - Essential - - Not implemented - - It is the package user's responsibility to make sure that text follows - the Analysis Function guidance on `making written content accessible`_. - * - Links must be accessible - - Essential - - Partially implemented - - Users should provide descriptive hyperlink text using the - ``(display text)[link]`` syntax. - * - Format text to make it accessible - - Desirable - - Implemented - - The default theme meets the accessibility guidance on formatting text. - * - All worksheets should have descriptive titles which are properly tagged - and formatted - - Essential - - Partially implemented - - Provide descriptive titles to the ``GPTable.title`` and ``subtitles`` - properties. Note: heading tagging in Excel does not meet the standard - required of webpage heading tagging. - * - Avoid using symbols in general - - Desirable - - Partially implemented - - An error will be raised if table cells only contain symbols. It is the - user's responsibility to make sure symbol use within text is appropriate. - * - Do not use headers and footers, floating text boxes or floating toolbars - - Essential - - Implemented - - These components are not supported by gptables. - * - Do not use visual devices to divide data regions - - Desirable - - Implemented - - Using gptables without additional formatting does not use such visual devices. - * - Do not use a background fill - - Desirable - - Implemented - - The gptables default theme does not apply a background fill. - * - Do not use colour as the only way to convey a message - - Essential - - Implemented - - The default theme without additional formatting does not apply colour. - * - When using colour for emphasis check the contrast - - Essential - - Not implemented - - If using colour via additional formatting or a custom theme, it is the - user's responsibility to check the colour contrast. - * - Avoid images in spreadsheets - - Desirable - - Implemented - - Adding images is not supported by gptables. - * - Remove macros - - Desirable - - Implemented - - Macros are not supported by gptables. - -.. _`making written content accessible`: https://analysisfunction.civilservice.gov.uk/policy-store/making-analytical-publications-accessible/#section-3 - - -Structure ---------- - -.. list-table:: - :header-rows: 1 - :widths: 24 17 19 40 - - * - Description - - Essential? - - Status - - Explanation - * - Give worksheets unique names or numbers - - Essential - - Implemented - - Worksheet names come from the ``sheets = {"label": gptable}`` property. - If names are duplicated, the final ``label: gptable`` pair will be used. - * - Remove blank worksheets - - Essential - - Implemented - - Blank worksheets are not supported by gptables. - * - Use cells in column A wisely - - Essential - - Implemented - - ``GPTable`` attributes are written to column A. Title and subtitles are - first. The order of the remaining descriptive attributes can be - customised by creating a custom theme with a different ``description_order``. - * - Position tables against the left-hand edges of each sheet - - Essential - - Implemented - - gptables writes tables starting in column A. - * - Avoid putting content below a table - - Desirable - - Implemented - - Writing content below a table is not supported in gptables>=1.0.0. - * - Avoid worksheets with multiple tables - - Desirable - - Implemented - - Writing multiple tables per sheet is not supported in gptables. - - -Before publishing ------------------ - -.. list-table:: - :header-rows: 1 - :widths: 24 17 19 40 - - * - Description - - Essential? - - Status - - Explanation - * - Run a spelling and grammar check - - Essential - - Not implemented - - gptables does not check spelling and grammar, this is the user's - responsibility. - * - Use the accessibility checker - - Desirable - - Not implemented - - gptables does not have a built-in accessibility checker. Whilst all - efforts have been taken to make outputs accessible, the final - responsibility sits with the user. - * - Add document information - - Essential - - Not implemented - - gptables does not add title or language information to the document, - this responsibility sits with the user. Note: the document properties - available depend on the user's operating system and may not meet - the standard required for webpages. - * - Ensure the cursor is in cell A1 of the first worksheet when doing your final save - - Essential - - Implemented - - Workbooks written using gptables will have the cursor in the first cell. - Note: if the workbook is subsequently opened and saved, it is the user's - responsibility to check that the cursor has not been moved. diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index ef573c27..00000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,98 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys -import alabaster -sys.path.insert(0, os.path.abspath('../')) - -# -- Project information ----------------------------------------------------- - -project = 'gptables' -copyright = ' Crown Copyright' -author = 'David Foster, Alexander Newton, Rowan Hemsi, Jacob Cole, Dan Shiloh and Jaehee Ryoo' - -# The full version, including alpha/beta/rc tags -with open(os.path.abspath("../../VERSION")) as f: - release = f.read() - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.viewcode', - 'sphinx.ext.todo', - 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon' - ] - - -todo_include_todos = False - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# The master toctree document. -master_doc = 'index' - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'alabaster' - -custom_fonts = '"Raleway", "HelveticaNeue", "Helvetica Neue", Helvetica, Arial, sans-serif' -html_theme_options = { - "description": "An opinionated python package for spreadsheet production.", - 'fixed_sidebar': 'true', - 'caption_font_family': custom_fonts, - 'font_family': custom_fonts, - 'head_font_family': custom_fonts, - "github_user": "best-practice-and-impact", - "github_repo": "gptables", - "github_button": True, - "github_type": "watch", - "github_count": False, - "sidebar_includehidden": True, - "show_relbar_bottom": True, - "page_width": "60rem", - "sidebar_width": "15rem", - } - -html_show_sourcelink = False - - -html_sidebars = { - '**': [ - 'about.html', - 'navigation.html', - 'relations.html', - 'searchbox.html' - ] - - } - - - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = [] diff --git a/docs/source/contributers.rst b/docs/source/contributers.rst deleted file mode 100644 index b210863b..00000000 --- a/docs/source/contributers.rst +++ /dev/null @@ -1,19 +0,0 @@ -Contributers -============ - -Code was contributed to this package by: - -* David Foster - -* Alexander Newton - -* Rowan Hemsi - -* Jaehee Ryoo - -* Jacob Cole - -* Daniel Shiloh - -Thanks also goes to Hannah Thomas and the users and stakeholders who gave -feedback on how to improve ``gptables``. \ No newline at end of file diff --git a/docs/source/doc.api.rst b/docs/source/doc.api.rst deleted file mode 100644 index e53ed585..00000000 --- a/docs/source/doc.api.rst +++ /dev/null @@ -1,59 +0,0 @@ -API functions -============= - -.. note:: ``auto_width`` functionality is experimental - any feedback is welcome! - It currently does not account for alternative fonts, font sizes or font wrapping. - - -Table of contents ------------------ - -By default, the API functions will add a table of contents sheet to your -Excel workbook. This will contain a single table with two columns. The first -column will contain the worksheet label and link for each worksheet in the -workbook. The second column will contain a description of the sheet contents. -By default, this is the title of the ``GPTable`` in that sheet. This -description can be customised by passing additional elements from the -``GPTable`` into the ``contentsheet_options`` parameter. This parameter also -allows for customisation of the table of contents ``title``, ``subtitles``, -``table_name``, ``instructions`` and ``column_names``. - -To customise the worksheet label, pass the new label into the -``contentsheet_label`` parameter. This table of contents functionality can be -disabled by setting this parameter to ``False``. - -See this in practice under :ref:`Example Usage`. - - -Notes sheet ------------ -A notes sheet will be generated if the API functions are provided with a -``notes_table``. The first column of the ``notes_table`` should contain a -meaningful reference for each note. This reference can then be used in the -worksheets - see the GPTable documentation for more details. When the notes -sheet is produced, this column will be replaced by the order the notes are -referenced in throughout the workbook. - -The second column should contain the text for each note. Optional additional -columns can be used for useful links, formatted as ``"[display text](link)"``. - -The notes sheet can be customised using the ``notesheet_options`` parameter. -Values for the ``title``, ``table_name`` and ``instructions`` can be provided -here. To customise the worksheet label, pass the new label into the -``notesheet_label`` parameter. - -If a ``notes_table`` is not provided, the notes sheet will not be generated. - -See this in practice under :ref:`Example Usage`. - - -``write_workbook`` function ---------------------------- - -.. autofunction:: gptables.core.api.write_workbook - - -``produce_workbook`` function ------------------------------ - -.. autofunction:: gptables.core.api.produce_workbook diff --git a/docs/source/doc.cover.rst b/docs/source/doc.cover.rst deleted file mode 100644 index 948f0533..00000000 --- a/docs/source/doc.cover.rst +++ /dev/null @@ -1,26 +0,0 @@ -Cover -===== - -Cover sheets can be used to provide information that is general to all tables in a Workbook. - -.. note:: - - Cover sheets are added as the first sheet in the Worbook when written by ``gptables``. - This is important when applying additional formatting to other Worksheets by their index in the Workbook. - -Mapping -------- - -To include a cover sheet, map your text elements to the attributes of a ``Cover`` object and pass this object to the ``cover`` parameter of either :func:`~.core.api.produce_workbook` or :func:`~.core.api.write_workbook`. -Text attributes which take a list (most except for title) will write one element per cell vertically in the Worksheet. - -Formatting ----------- - -Formatting of cover sheet text is managed by the ``cover_`` attributes of the Workbook's :class:`~.core.theme.Theme`. - -``Cover`` Class ---------------- - -.. automodule:: gptables.core.cover - :members: diff --git a/docs/source/doc.gptable.rst b/docs/source/doc.gptable.rst deleted file mode 100644 index 0717c1d0..00000000 --- a/docs/source/doc.gptable.rst +++ /dev/null @@ -1,323 +0,0 @@ -GPTable -======= - -Mapping -------- - -The ``GPTable`` Class is used to map your data and metadata to table elements. -The supported table elements are represented like this in the output `.xlsx` file: - -.. figure:: static/table_mapping.png - :figclass: align-center - :alt: Cells A1 to A6 contain the title, subtitles, instructions, legend, source and scope. These parameters are mapped individually. The next row contains the column headings. Within the same row but on a new line are the units. The table note references are within the same row on a new line under the units. In columns 1, 2 and 3 of the next row down are index levels 1, 2 and 3. In the next columns are the data. Column headings, indices and data are supplied as a pandas DataFrame. Units and table note references are mapped individually. - - -Notes ---------------------- - -Notes are text elements that appear on the separately generated ``Notesheet``. - -Notes can be referenced in the ``title``, ``subtitles``, ``scope``, ``source`` -and ``legend`` elements. Notes corresponding to entries in the data can be -referenced using the ``table_notes`` element. This will add a note reference to -the relevant column heading. Note references cannot be added to data cells, as -inserting references here would reduce the usability of the data. We use double -dollar symbols (``$$``) to denote notes in text. For example, a note could be -referenced as ``"My table title $$Reference$$"``. - -References in text are replaced with numbers, in increasing order from the top- -left corner of the first sheet containing a data table. - -See this in practice under :ref:`Example Usage`. - -.. note:: Deprecated in v1.1.0: Ability to reference notes within - ``GPTable.table.columns`` will be removed in v2 of gptables. Please use - ``GPTable.table_notes`` to ensure references are correctly placed and ordered. - -Links ------ - -Links can added to text using the format ``[display text](link)``. Links are -supported in the ``title``, ``subtitles``, ``scope``, ``source``and ``legend`` -elements. They will also be applied to cells within the data table that use -this format. Links should start with one of the following prefixes: -``http://``, ``https://``, ``ftp://``, ``mailto:``, ``internal:`` or -``external:``. For more information about the usage of the local URIs, see the -`XlsxWriter documentation`_. - -.. _`XlsxWriter documentation`: https://xlsxwriter.readthedocs.io/worksheet.html#worksheet-write-url - -.. note:: Excel does not support links being applied to specific words within - cells. The link will be applied to the whole cell, not just the - display text. - -Rich Text ---------- - -Rich text is text that contains mixed formatting. You shouldn't use formatting -to represent data or important information, as most formatting is neither -accessible nor machine readable. You can still use to make things look -appealing for sighted people. - -Rich text is supported in the ``title``, ``subtitles``, ``scope``, ``source`` -and ``legend`` elements. Where you would normally provide a string to a -parameter, you can instead provide a list of strings and dictionaries. -Dictionaries in this list should contain valid `XlsxWriter format properties`_ -and values. The formatting defined in these dictionaries will be applied to the -next string in the list. This formatting is applied in addition to the -formatting of that element specified in the :class:`~.core.theme.Theme`. - -.. _`XlsxWriter format properties`: https://xlsxwriter.readthedocs.io/format.html#format-methods-and-format-properties - -``["It is ", {"bold": True}, "inevitable"]`` would give you "It is **inevitable**". - -See this in practice under :ref:`Example Usage`. - -.. note:: Rich text is not currently supported if the cell also contains note - references or links. This may be changed in the future if there is - sufficient user need, so please raise an issue if this is functionality - you need. - - -Additional formatting ---------------------- - -In some cases you may want to apply one-off formatting on specific rows, columns or cells of the data. -As mentioned above, this formatting should not be used to represent data or important information. - -Bespoke formatting can be applied to an individual ``GPTable`` via the ``additional_formatting`` parameter, -when creating a ``GPTable`` instance. This parameter takes a list of dictionaries, where each dictionary -defines formatting for one or more rows, columns or cells. - -These dictionaries have a single key indicating the type of selection, from "column", "row" or "cell". -Their value is another dictionary, which specifies the indexing, formatting and whether row and column -indexes are included in the selection. - -Indexing supports selection of columns by name or 0-indexed number, but rows and cells can only -be indexed by number. Numeric indexing refers to position within the data element of the table (column -headings, row indexes and data), not position in the output Excel sheet. - - -This ``additional_formatting`` parameter is best demonstrated by example: - -.. code:: python - - additional_formatting = [ - # Align data center, but not column indexes - {"column": - {"columns": ["some_column", "another_column"], # str, int or list of either - "format": {"align": "center"}, - "include_names": False # Whether to include column headings (optional) - } - }, - - # Align column left, including column index - {"column": - {"columns": [3], - "format": {"left": 1}, - "include_names": True - } - }, - - # Underline the bottom of the table, including row index - {"row": - {"rows": -1, # Numbers only, but can refer to last row using -1 - "format": {"bottom": 1}, # Underline row - "include_names": True # Whether to include row indexes - } - }, - - # A bad example, turning a single cell's font red - {"cell": - {"cells": (3, 3), # tuple or list of tuples (numbers only) - "format": {"font_color": "red"} - } - } - ] - -Formatting methods -^^^^^ - -The following tables show the Excel format categories, along with an example demonstrating the syntax required -for use in gptables. Some formatting methods use indexing to map to Excel's built-in formats. This information -can be found in the applicable sections below. - -^^^^ -Font formatting -^^^^ - -This table demonstrates the font formatting methods available. You can find all options -for `underline styles in the XlsxWriter documentation`_. - -.. _`underline styles in the XlsxWriter documentation`: https://xlsxwriter.readthedocs.io/format.html#format-set-underline - -.. list-table:: - :header-rows: 1 - :widths: 19 30 - :align: left - - * - Description - - Example usage - * - Font type - - {"font_name": "Arial"} - * - Font size - - {"font_size": 30} - * - Font colour - - {"font_color": "red"} - * - Bold - - {"bold": True} - * - Italic - - {"italic": True} - * - Underline - - {"underline": 1} - * - Strikeout - - {"font_strikeout": True} - * - Super/Subscript - - | {"font_script": 1} # Superscript - | {"font_script": 2} # Subscript -.. -^^^^^^ -Number formatting -^^^^^^ - -This table demonstrates how to set the numeric format using indexing and string arguments. You can find all -options for `numeric formats in the XlsxWriter documentation`_. - -.. _`numeric formats in the XlsxWriter documentation`: https://xlsxwriter.readthedocs.io/format.html#format-set-num-format - -.. list-table:: - :header-rows: 1 - :widths: 19 30 - :align: left - - * - Description - - Example usage - * - Numeric format - - | {"num_format": 1} # Format index - | {"num_format": "d mmm yyyy"} # Format string -.. - -^^^^^^^^^^^ -Protection formatting -^^^^^^^^^^^ - -This table demonstrates the protection methods available. - -.. list-table:: - :header-rows: 1 - :widths: 19 30 - :align: left - - * - Description - - Example usage - * - Lock cells - - {"locked": True} - * - Hide formulas - - {"hidden": True} -.. - -^^^^^^^^^^^^ -Alignment formatting -^^^^^^^^^^^^ - -This table demonstrates the alignment formatting options available. You can find all options for -`horizontal and vertical alignment in the XlsxWriter documentation`_. - -.. _`horizontal and vertical alignment in the XlsxWriter documentation`: https://xlsxwriter.readthedocs.io/format.html#format-set-align - -.. list-table:: - :header-rows: 1 - :widths: 19 30 - :align: left - - * - Description - - Example usage - * - Horizontal align - - {"align": "center"} - * - Vertical align - - {"align": "vcenter"} - * - Rotation - - {"rotation": 30} - * - Text wrap - - {"text_wrap": True} - * - Center across - - {"set_center_across": True} - * - Indentation - - {"indentation":2} - * - Shrink to fit - - {"shrink": True} -.. - -^^^^^^^^^^^^^^^^ -Pattern formatting -^^^^^^^^^^^^^^^^ - -This table demonstrates the pattern formatting options available. - -.. list-table:: - :header-rows: 1 - :widths: 19 30 - :align: left - - * - Description - - Example usage - * - Cell pattern - - {"pattern": 1} - * - Background colour - - {"bg_color": "white"} - * - Foreground colour - - {"fg_color": "white"} -.. - -^^^^^^^^^^^^^^^^^^ -Border formatting -^^^^^^^^^^^^^^^^^^ - -This table demonstrates the border formatting options available. You can find all options -for `border styles in the XlsxWriter documentation`_. - -.. _`border styles in the XlsxWriter documentation`: https://xlsxwriter.readthedocs.io/format.html#format-set-border - -.. list-table:: - :header-rows: 1 - :widths: 19 30 - :align: left - - * - Description - - Example usage - * - Cell border - - {"border": 1} - * - Bottom border - - {"bottom": 1} - * - Top border - - {"top": 1} - * - Left border - - {"left": 1} - * - Right border - - {"right": 1} - * - Border colour - - {"border_color": "red"} - * - Bottom colour - - {"bottom_color":"#FF0000"} - * - Top colour - - {"top_color": "red"} - * - Left colour - - {"left_color": "#FF0000"} - * - Right colour - - {"right_color": "red"} - -.. - -For any formatting beyond this, if the package should support it then please raise an issue -or create a pull request. Otherwise, you will need to modify the underlying -:class:`~.core.wrappers.GPWorkbook` or :class:`~.core.wrappers.GPWorksheet` objects -before they are written to Excel. - -See this in practice under :ref:`Example Usage`. - - -``GPTable`` Class ------------------ - -.. automodule:: gptables.core.gptable - :members: GPTable diff --git a/docs/source/doc.theme.rst b/docs/source/doc.theme.rst deleted file mode 100644 index f8dad035..00000000 --- a/docs/source/doc.theme.rst +++ /dev/null @@ -1,54 +0,0 @@ -Theme -===== - -``Theme`` Configuration ------------------------ - -The easiest way to design your own theme is to create a -YAML configuration file. You should take a copy of our default theme -configuration file and adjust it to suit your needs. -When designing a theme, please consult the `Analysis Function guidance`_ to -ensure your new formatting is accessible. - -.. _`Analysis Function guidance`: https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/ - -Most of the top level names in the config file represent table elements or their metadata. -The parameters passed below these names are `XlsxWriter format properties`_, so you -should check out their documentation to find the appropriate properties and valid -options for your formatting. - -``global`` refers to the base format that all others will be built upon. -Any format parameter that is repeated for a specific element will override the global format for that element. -The three ``cover_`` format names refer to elements of the cover page generated from a :class:`~.core.cover.Cover`. - -.. _`XlsxWriter format properties`: https://xlsxwriter.readthedocs.io/format.html#format-methods-and-format-properties - -.. note:: All top levels names must exist in the config file. Where no properties need to be passed, leave empty after the colon. - -The final name in the config file is a special attribute which doed not take -XlsxWriter properties. It does the following: - -* ``description_order`` - specify the order of description elements. - Must contain a list including ``instructions``, ``legend``, ``source`` and ``scope``, - in the order that you would like them to appear. - -The configuration file for our default theme looks like this: - -.. literalinclude:: ../../gptables/themes/gptheme.yaml - :language: yaml - - -For minor adjustments to a theme, a deepcopy can be taken before using the -``Theme`` methods below to update the ``Theme``'s attributes. - -``Theme`` objects can altenatively be configured using dictionaries, with the same -structure as the configuration files. - -An example using a personalised theme YAML file can be found under :ref:`Example Usage`. - - -``Theme`` Class ---------------- - -.. automodule:: gptables.core.theme - :members: diff --git a/docs/source/doc.wrappers.rst b/docs/source/doc.wrappers.rst deleted file mode 100644 index bb561229..00000000 --- a/docs/source/doc.wrappers.rst +++ /dev/null @@ -1,31 +0,0 @@ -XlsxWriter wrappers -=================== - -These Classes are only likely used following use -of the :func:`~.core.api.produce_workbook` API function, -which returns a :class:`~.core.wrappers.GPWorkbook` object. - -You may use these objects to carry out modification of any aspects of the -workbook or individual worksheets that are outside of the scope of ``GPTables``. -To see this in practice, see the additional formatting example under `usage`. -Please also see the XlsxWriter documentation on their Workbook_ and Worksheet_ Classes, -which are super-classses of those below, for details on further modfication. - -.. _Workbook: https://xlsxwriter.readthedocs.io/workbook.html -.. _Worksheet: https://xlsxwriter.readthedocs.io/worksheet.html - - -The methods which we've extended these Classes with are not shown here, but feel -free to check out the source code to see how ``gptables`` works under the hood. - - -``GPWorkbook`` Class --------------------- - -.. autoclass:: gptables.core.wrappers.GPWorkbook - - -``GPWorksheet`` Class ---------------------- - -.. autoclass:: gptables.core.wrappers.GPWorksheet \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index db375e08..00000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,75 +0,0 @@ -******************************* -Good Practice Tables (gptables) -******************************* - -.. image:: https://github.com/best-practice-and-impact/gptables/workflows/continuous-integration/badge.svg - :target: https://github.com/best-practice-and-impact/gptables/actions - :alt: Actions build status - -.. image:: https://badge.fury.io/py/gptables.svg - :target: https://badge.fury.io/py/gptables - :alt: PyPI release - -``gptables`` produces `.xlsx` files from your ``pandas`` dataframes in -either python or R (using reticulate_). You define the mapping from your -data to elements of the table and ``gptables`` does the rest. - -.. _reticulate: https://rstudio.github.io/reticulate/ - -Table element mapping: - -.. figure:: static/table_mapping.png - :figclass: align-center - :alt: Cells A1 to A6 contain the title, subtitles, instructions, legend, source and scope. These parameters are mapped individually. The next row contains the column headings. Within the same row but on a new line are the units. The table note references are within the same row on a new line under the units. In columns 1, 2 and 3 of the next row down are index levels 1, 2 and 3. In the next columns are the data. Column headings, indices and data are supplied as a pandas DataFrame. Units and table note references are mapped individually. - - -``gptables`` uses the official `guidance on good practice spreadsheets`_. -It advocates a strong adherence to the guidance by restricting the range of possible operations. -The default formatting theme ``gptheme`` accommodates many use cases. -However, the :class:`~.core.theme.Theme` Class allows development of custom themes, where alternative formatting is required. - -``gptables`` is developed and maintained by the `Analysis Function`_. It can be -installed from `PyPI`_ or `GitHub`_. The source code is maintained on GitHub. -Users may also be interested in `aftables`_, an R native equivalent to -``gptables``, and `csvcubed`_, a package for turning data and metadata into -machine-readable CSV-W files. - -.. _`guidance on good practice spreadsheets`: https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/ -.. _`Analysis Function`: https://analysisfunction.civilservice.gov.uk/ -.. _`PyPI`: https://pypi.org/project/gptables/ -.. _`GitHub`: https://github.com/best-practice-and-impact/gptables -.. _`aftables`: https://best-practice-and-impact.github.io/aftables/ -.. _`csvcubed`: https://gss-cogs.github.io/csvcubed-docs/external/ - - -5 Simple Steps --------------- - -1. You map your data to the elements of a :class:`~.core.gptable.GPTable`. - -2. You can define the format of each element with a custom :class:`~.core.theme.Theme`, or simply use the default - gptheme. - -3. Optionally design a :class:`~.core.cover.Cover` to provide information that relates to all of the tables in your Workbook. - -4. Optionally upload a ``notes_table`` with information about any notes. - -5. You :func:`~.core.api.write_workbook` to win. - - -**Note**: This package is not intending to create perfectly accessible spreadsheets but will help with the bulk of the work needed. Users of this packages should refer back to the `main spreadsheet guidance `_ or the `spreadsheet accessibility checklist `_ after using it to make sure nothing has been missed. Please email `Analysis.Function@ons.gov.uk `_ if you use the package so we can monitor use and the outputs produced. - - -.. toctree:: - :maxdepth: 2 - :hidden: - - doc.api.rst - usage.rst - doc.gptable.rst - doc.theme.rst - doc.cover.rst - doc.wrappers.rst - checklist.rst - changelog.rst - contributers.rst - diff --git a/docs/source/static/table_mapping.png b/docs/source/static/table_mapping.png deleted file mode 100644 index fdbe4eb3..00000000 Binary files a/docs/source/static/table_mapping.png and /dev/null differ diff --git a/docs/source/usage.rst b/docs/source/usage.rst deleted file mode 100644 index fd67f3ca..00000000 --- a/docs/source/usage.rst +++ /dev/null @@ -1,72 +0,0 @@ -.. _Example Usage: - -Example Usage -============= - -This section demonstrates usage of the gptables API functions and core Classes. - -For source code and data used in these examples, please see the -examples_ directory of the package. - -.. _examples: https://github.com/best-practice-and-impact/gptables/tree/main/gptables/examples - -.. todo:: Replace datasets in examples with open online datasets - - -.. automodule:: gptables.examples.penguins_minimal - -.. literalinclude:: ../../gptables/examples/penguins_minimal.py - :language: python - :lines: 16- - -.. automodule:: gptables.examples.penguins_minimal_alternate - -.. literalinclude:: ../../gptables/examples/penguins_minimal_alternate.py - :language: python - :lines: 16- - -.. literalinclude:: ../../gptables/examples/penguins_multiple_sheets.py - :language: python - :lines: 16- - -.. automodule:: gptables.examples.penguins_theme - -.. literalinclude:: ../../gptables/examples/penguins_theme.py - :language: python - :lines: 16- - -.. automodule:: gptables.examples.penguins_cover - -.. literalinclude:: ../../gptables/examples/penguins_cover.py - :language: python - :lines: 14- - -.. automodule:: gptables.examples.penguins_notes - -.. literalinclude:: ../../gptables/examples/penguins_notes.py - :language: python - :lines: 15- - -.. automodule:: gptables.examples.penguins_additional_formatting - -.. literalinclude:: ../../gptables/examples/penguins_additional_formatting.py - :language: python - :lines: 25- - - - - -.. automodule:: gptables.examples.survey_data - -.. literalinclude:: ../../gptables/examples/survey_data.py - :language: python - :lines: 8- - - -R Usage -------- - -Use of ``gptables`` in R requires use of python via the `reticulate `_ package. - -However we recommend use of the `aftables `_ -R package, maintained by the Presentation Champions Data Visualisation Tools subgroup. \ No newline at end of file diff --git a/docs/static/cover_sheet.png b/docs/static/cover_sheet.png new file mode 100644 index 00000000..c7bffbd3 Binary files /dev/null and b/docs/static/cover_sheet.png differ diff --git a/docs/static/getting_started_before_and_after.png b/docs/static/getting_started_before_and_after.png new file mode 100644 index 00000000..73c20bef Binary files /dev/null and b/docs/static/getting_started_before_and_after.png differ diff --git a/docs/static/howto_additional_formatting.png b/docs/static/howto_additional_formatting.png new file mode 100644 index 00000000..ec458929 Binary files /dev/null and b/docs/static/howto_additional_formatting.png differ diff --git a/docs/static/howto_additional_formatting_text.png b/docs/static/howto_additional_formatting_text.png new file mode 100644 index 00000000..1c41dbcd Binary files /dev/null and b/docs/static/howto_additional_formatting_text.png differ diff --git a/docs/static/howto_theme_basic.png b/docs/static/howto_theme_basic.png new file mode 100644 index 00000000..254707e8 Binary files /dev/null and b/docs/static/howto_theme_basic.png differ diff --git a/docs/static/howto_theme_cover.png b/docs/static/howto_theme_cover.png new file mode 100644 index 00000000..7994814a Binary files /dev/null and b/docs/static/howto_theme_cover.png differ diff --git a/docs/static/table_mapping.png b/docs/static/table_mapping.png new file mode 100644 index 00000000..44446eb7 Binary files /dev/null and b/docs/static/table_mapping.png differ diff --git a/docs/static/table_of_contents_additional_elements.png b/docs/static/table_of_contents_additional_elements.png new file mode 100644 index 00000000..39d698ca Binary files /dev/null and b/docs/static/table_of_contents_additional_elements.png differ diff --git a/docs/static/table_of_contents_customisation.png b/docs/static/table_of_contents_customisation.png new file mode 100644 index 00000000..1f62a334 Binary files /dev/null and b/docs/static/table_of_contents_customisation.png differ diff --git a/docs/static/table_of_contents_default.png b/docs/static/table_of_contents_default.png new file mode 100644 index 00000000..0b4008e2 Binary files /dev/null and b/docs/static/table_of_contents_default.png differ diff --git a/docs/static/tutorial_adding_notes.png b/docs/static/tutorial_adding_notes.png new file mode 100644 index 00000000..f0bc0b89 Binary files /dev/null and b/docs/static/tutorial_adding_notes.png differ diff --git a/gptables/__init__.py b/gptables/__init__.py index 396da316..2a12ba76 100644 --- a/gptables/__init__.py +++ b/gptables/__init__.py @@ -1,17 +1,15 @@ -from gptables.core.theme import Theme +# flake8: noqa + +from gptables.core.api import ( + produce_workbook, + write_workbook, +) from gptables.core.cover import Cover from gptables.core.gptable import GPTable +from gptables.core.theme import Theme from gptables.core.wrappers import GPWorkbook - from gptables.utils.unpickle_themes import gptheme - -from gptables.core.api import ( - # API functions - produce_workbook, - write_workbook, - ) - __doc__ = """ ******************************* Good Practice Tables (gptables) @@ -27,8 +25,7 @@ ``gptables`` is an opinionated python package for spreadsheet production. -It produces ``.xlsx`` files from your ``pandas`` dataframes or using -``reticulate`` in R. You define the mapping from your data to elements of the +It produces ``.xlsx`` files from your ``pandas`` dataframes. You define the mapping from your data to elements of the table. It does the rest. ``gptables`` uses the official `guidance on good practice spreadsheets`_ @@ -37,6 +34,8 @@ cases. However, the ``Theme`` object allows development of custom themes, where other formatting is required. +R users should check out `aftables`_, an R native equivalent to ``gptables``. + ``gptables`` is developed and maintained by the `Analysis Function`_. .. _`guidance on good practice spreadsheets`: https://analysisfunction.civilservice.gov.uk/policy-store/releasing-statistics-in-spreadsheets/ diff --git a/gptables/core/api.py b/gptables/core/api.py index d3deb224..94b33d1d 100644 --- a/gptables/core/api.py +++ b/gptables/core/api.py @@ -1,27 +1,33 @@ import warnings -import pandas as pd from pathlib import Path +from typing import Any, Dict, Optional, Union + +import pandas as pd -from gptables import GPWorkbook, GPTable +from gptables.core.cover import Cover +from gptables.core.gptable import GPTable +from gptables.core.theme import Theme +from gptables.core.wrappers import GPWorkbook def produce_workbook( - filename, - sheets, - theme = None, - cover = None, - contentsheet_label = "Contents", - contentsheet_options = {}, - notes_table = None, - notesheet_label = "Notes", - notesheet_options = {}, - auto_width = True, - gridlines = "hide_all", - cover_gridlines = False - ): + filename: str, + sheets: Dict[str, "GPTable"], + theme: Optional["Theme"] = None, + cover: Optional["Cover"] = None, + contentsheet_label: str = "Contents", + contentsheet_options: Optional[Dict[str, Any]] = None, + notes_table: Optional[pd.DataFrame] = None, + notesheet_label: str = "Notes", + notesheet_options: Optional[Dict[str, Any]] = None, + auto_width: Union[bool, Dict[str, bool]] = True, + gridlines: str = "hide_all", + cover_gridlines: bool = False, +) -> GPWorkbook: """ - Produces a GPWorkbook, ready to be written to the specified `.xlsx` file - using the ``.close()`` method. + Produces a formatted workbook. + + Can be written to an `.xlsx` file as specified in `filename` using `.close()`. Parameters ---------- @@ -49,21 +55,25 @@ def produce_workbook( notesheet_options : dict, optional dictionary of notesheet customisation parameters. Valid keys are `table_name`, `title` and `instructions`. - auto_width : bool, optional - indicate if column widths should be automatically determined. True - by default. + auto_width : bool or dict, optional + If bool, applies to all sheets. If dict, should map sheet labels to bools. gridlines : string, optional - option to hide or show gridlines on worksheets. "show_all" - don't - hide gridlines, "hide_printed" - hide printed gridlines only, or + option to hide or show gridlines on worksheets. "show_all" - don't + hide gridlines, "hide_printed" - hide printed gridlines only, or "hide_all" - hide screen and printed gridlines. cover_gridlines : bool, optional - indication if gridlines should apply to the cover worksheet. False + indication if gridlines should apply to the cover worksheet. False by default. - + Returns ------- workbook : gptables.GPWorkbook """ + if contentsheet_options is None: + contentsheet_options = {} + if notesheet_options is None: + notesheet_options = {} + if isinstance(filename, Path): filename = filename.as_posix() @@ -82,11 +92,19 @@ def produce_workbook( contentsheet = {} if contentsheet_label is not None: if contentsheet_options: - valid_keys = ["additional_elements", "column_names", - "table_name", "title", "subtitles", "instructions"] + valid_keys = [ + "additional_elements", + "column_names", + "table_name", + "title", + "subtitles", + "instructions", + ] if not all(key in valid_keys for key in contentsheet_options.keys()): - msg = ("Valid `contentsheet_options` keys are 'additional_elements'," - "'column_names', 'table_name', 'title', 'subtitles', 'instructions'") + msg = ( + "Valid `contentsheet_options` keys are 'additional_elements'," + "'column_names', 'table_name', 'title', 'subtitles', 'instructions'" + ) raise ValueError(msg) contents_gptable = wb.make_table_of_contents(sheets, **contentsheet_options) contentsheet = {contentsheet_label: contents_gptable} @@ -103,36 +121,32 @@ def produce_workbook( sheets = {**contentsheet, **notesheet, **sheets} for label, gptable in sheets.items(): ws = wb.add_worksheet(label, gridlines=gridlines) - ws.write_gptable(gptable, auto_width, wb._annotations) - + if isinstance(auto_width, dict): + sheet_auto_width = auto_width.get(label, True) + else: + sheet_auto_width = auto_width + ws.write_gptable(gptable, sheet_auto_width, wb._annotations) + return wb def write_workbook( - filename, - sheets, - theme = None, - cover = None, - contentsheet = None, - contentsheet_label = "Contents", - contentsheet_options = {}, - notes_table = None, - notesheet_label = "Notes", - notesheet_options = {}, - auto_width = True, - gridlines = "hide_all", - cover_gridlines = False - ): - + filename: str, + sheets: Dict[str, "GPTable"], + theme: Optional["Theme"] = None, + cover: Optional["Cover"] = None, + contentsheet: Optional[str] = None, + contentsheet_label: str = "Contents", + contentsheet_options: Optional[Dict[str, Any]] = None, + notes_table: Optional[pd.DataFrame] = None, + notesheet_label: str = "Notes", + notesheet_options: Optional[Dict[str, Any]] = None, + auto_width: Union[bool, Dict[str, bool]] = True, + gridlines: str = "hide_all", + cover_gridlines: bool = False, +) -> None: """ - Writes a GPWorkbook to the specified `.xlsx` file. - - This is an alternative main function that will take in data and theme - information. It calls upon the package to write a formatted `.xlsx` - file to the specified path. - - .. note:: Deprecated in v1.1.0: `contentsheet` will be removed - in v2, it is replaced by `contentsheet_label` + Writes a formatted Excel workbook to `filename`. Parameters ---------- @@ -152,7 +166,7 @@ def write_workbook( dictionary of contentsheet customisation parameters. Valid keys are `additional_elements`, `column_names`, `table_name`, `title`, `subtitles` and `instructions` - note_table : pd.DataFrame, optional + notes_table : pd.DataFrame, optional table with notes reference, text and (optional) link columns. If None, notes sheet will not be generated. notesheet_label : str, optional @@ -164,21 +178,22 @@ def write_workbook( indicate if column widths should be automatically determined. True by default. gridlines : string, optional - option to hide or show gridlines on worksheets. "show_all" - don't - hide gridlines, "hide_printed" - hide printed gridlines only, or + option to hide or show gridlines on worksheets. "show_all" - don't + hide gridlines, "hide_printed" - hide printed gridlines only, or "hide_all" - hide screen and printed gridlines. cover_gridlines : bool, optional - indication if gridlines should apply to the cover worksheet. False + indication if gridlines should apply to the cover worksheet. False by default. - contentsheet : str - alias for contentsheet_label, deprecated in v1.1.0 Returns ------- None """ - if contentsheet is not None: - contentsheet_label = contentsheet + + if contentsheet_options is None: + contentsheet_options = {} + if notesheet_options is None: + notesheet_options = {} wb = produce_workbook( filename, @@ -192,6 +207,6 @@ def write_workbook( notesheet_options, auto_width, gridlines, - cover_gridlines - ) + cover_gridlines, + ) wb.close() diff --git a/gptables/core/cover.py b/gptables/core/cover.py index 02888d92..ef27741b 100644 --- a/gptables/core/cover.py +++ b/gptables/core/cover.py @@ -3,30 +3,38 @@ from gptables.core.gptable import FormatList + @dataclass -class Cover(): +class Cover: """ - dataclass for storing cover sheet text. + Stores cover sheet properties. Attributes ---------- title : str - cover page title + Cover page title intro : List[str, list], optional - introductory text + Introductory text about : List[str, list], optional - about/notes text + About/notes text contact : List[str, list], optional - contact details text - cover_label : str - cover page tab label, defaults to Cover + Contact details text + cover_label : str + Cover page tab label, defaults to "Cover" width: int - width of the column, defaults to 85 + Width of the column, defaults to 85 """ - - def __init__(self, title: str, intro: List = None, about: List = None, - contact: List = None, cover_label: str = "Cover", width: int = 85): - + + def __init__( + self, + title: str, + intro: List = None, + about: List = None, + contact: List = None, + cover_label: str = "Cover", + width: int = 85, + ) -> None: + self.title = title self.intro = self._parse_formatting(intro) self.about = self._parse_formatting(about) @@ -35,20 +43,23 @@ def __init__(self, title: str, intro: List = None, about: List = None, self.width = width # TODO: Add input validation (e.g. empty list) - + @staticmethod - def _parse_formatting(attribute): + def _parse_formatting(attribute) -> List: """Check attribute for a list. If there is a list then cast the list to a FormatList in attribute. Parameters ---------- attribute : List[str, list] - + Returns ------- List[str, FormatList] """ if isinstance(attribute, list): - attribute = [FormatList(text) if isinstance(text, list) else text for text in attribute] + attribute = [ + FormatList(text) if isinstance(text, list) else text + for text in attribute + ] return attribute diff --git a/gptables/core/gptable.py b/gptables/core/gptable.py index ff10bbe0..5caf9fae 100644 --- a/gptables/core/gptable.py +++ b/gptables/core/gptable.py @@ -1,23 +1,23 @@ -import pandas as pd import re +from typing import Any, Dict, List, Optional, Union + +import pandas as pd from xlsxwriter.format import Format + class GPTable: """ - A Good Practice Table. Stores a table and metadata for writing a table - to excel. + A Good Practice Table. - .. note:: Deprecated in v1.1.0: Ability to reference notes within - ``GPTable.table.columns`` will be removed in v2 of gptables. Please use - ``GPTable.table_notes`` to ensure references are correctly placed and ordered. + Stores data, text content, and content metadata for writing a table to Excel. Attributes ---------- table : pandas.DataFrame table to be written to an Excel workbook table_name : str - name for table. Should be unique with no spaces and always begin with a - letter, an underscore character, or a backslash. Use letters, numbers, + name for table. Should be unique with no spaces and always begin with a + letter, an underscore character, or a backslash. Use letters, numbers, periods, and underscore characters for the rest of the name. title : str title of the table @@ -42,52 +42,57 @@ class GPTable: table-specific formatting for columns, rows or individual cells """ - def __init__(self, - table, - table_name, - title, - scope=None, - source=None, - units=None, - table_notes=None, - subtitles=[], - instructions="", - legend=[], - index_columns={2:0}, - additional_formatting=[], - ): - + def __init__( + self, + table: pd.DataFrame, + table_name: str, + title: str, + scope: Optional[str] = None, + source: Optional[str] = None, + units: Optional[Dict[Any, Any]] = None, + table_notes: Optional[Dict[Any, Any]] = None, + subtitles: Optional[List[Any]] = [], + instructions: str = "", + legend: Optional[List[Any]] = [], + index_columns: Optional[Dict[int, int]] = None, + additional_formatting: Optional[List[Dict[str, Any]]] = [], + ) -> None: + # Attributes self.title = None self.subtitles = [] - + self.units = None # str or {units (str):column index (int)} dict self.table_notes = None # str or {units (str):column index (int)} dict - + self._VALID_INDEX_LEVELS = [1, 2, 3] self.index_levels = 0 self.index_columns = {} # {index level (int): column index (int)} - self._column_headings = set() # Non-index column headings + self._column_headings = set() # Non-index column headings self.table = pd.DataFrame() self.table_name = None self.data_range = [0] * 4 - + self.scope = None self.source = None self.legend = [] self._annotations = [] - + self.additional_formatting = [] - + + if additional_formatting is None: + additional_formatting = [] + + self.additional_formatting = additional_formatting + # Valid format labels from XlsxWriter self._valid_format_labels = [ - attr.replace("set_", "") - for attr in Format().__dir__() - if attr.startswith('set_') - and callable(getattr(Format(), attr)) - ] - - # Call methods to set attributes + attr.replace("set_", "") + for attr in Format().__dir__() + if attr.startswith("set_") and callable(getattr(Format(), attr)) + ] + + # Call methods to set attributes self.set_title(title) self.set_subtitles(subtitles) self.set_instructions(instructions) @@ -98,22 +103,35 @@ def __init__(self, self.set_source(source) self.set_legend(legend) self._set_data_range() - - def set_table(self, new_table, new_index_columns = None, new_units = None, new_table_notes = None): + def set_table( + self, + new_table: pd.DataFrame, + new_index_columns: Optional[Dict[int, int]] = None, + new_units: Optional[Dict[Any, Any]] = None, + new_table_notes: Optional[Dict[Any, Any]] = None, + ) -> None: """ Set the `table`, `index_columns`, `units` and `table_notes` attributes. Overwrites existing values for these attributes. """ if not isinstance(new_table, pd.DataFrame): raise TypeError("`table` must be a pandas DataFrame") - + + if any("$$" in str(h) for h in new_table.columns): + raise ValueError( + "Notes inside column headers are no longer supported. " + "Use GPTable.table_notes for column notes instead." + ) + default_index = pd.Index(range(new_table.shape[0])) if not all(new_table.index == default_index) and not new_table.empty: - msg = ("`table` index must not contain index data. It can be reset" - " before adding to a GPTable (see DataFrame.reset_index())." - " Please ensure that index data is stored in the first 1-3" - " columns of `table` and is indicated in `index_columns`.") + msg = ( + "`table` index must not contain index data. It can be reset" + " before adding to a GPTable (see DataFrame.reset_index())." + " Please ensure that index data is stored in the first 1-3" + " columns of `table` and is indicated in `index_columns`." + ) raise ValueError(msg) self.table = new_table.reset_index(drop=True) @@ -131,10 +149,9 @@ def set_table(self, new_table, new_index_columns = None, new_units = None, new_t if new_table_notes is None: new_table_notes = self.table_notes - self.set_table_notes(new_table_notes) - + self.table_notes = new_table_notes - def set_index_columns(self, new_index_columns): + def set_index_columns(self, new_index_columns: Dict[int, int]) -> None: """ Set the `index_columns` attribute. Overwrites any existing values. A dict must be supplied. This dict should map index level to a @@ -143,44 +160,50 @@ def set_index_columns(self, new_index_columns): """ if isinstance(new_index_columns, dict): # Check if levels and values are valid - valid_levels = all(level in self._VALID_INDEX_LEVELS for level in new_index_columns.keys()) + valid_levels = all( + level in self._VALID_INDEX_LEVELS for level in new_index_columns.keys() + ) if not valid_levels: - msg = ("`index_columns` dictionary keys must be valid index" - f" levels: {self._VALID_INDEX_LEVELS}") + msg = ( + "`index_columns` dictionary keys must be valid index" + f" levels: {self._VALID_INDEX_LEVELS}" + ) raise ValueError(msg) - + if not all(isinstance(col, int) for col in new_index_columns.values()): # Convert col name to numeric index for key, value in new_index_columns.items(): col_iloc = self.table.columns.get_loc(value) new_index_columns.update({key: col_iloc}) - + column_indexes = [col for col in new_index_columns.values()] - + valid_columns = all(self._valid_column_index(col) for col in column_indexes) if not valid_columns: - msg = ("Out of range - `index_columns` dictionary values must" - "be valid, 0-indexed column numbers") + msg = ( + "Out of range - `index_columns` dictionary values must" + "be valid, 0-indexed column numbers" + ) raise ValueError(msg) - + self.index_levels = len(new_index_columns.keys()) - + self.index_columns = new_index_columns self._set_column_headings() else: - msg = ("`index_columns` must be a dict mapping a valid index level" - " to a 0-indexed column number") + msg = ( + "`index_columns` must be a dict mapping a valid index level" + " to a 0-indexed column number" + ) raise ValueError(msg) - - def _valid_column_index(self, column_index): + def _valid_column_index(self, column_index: int) -> bool: """ Check if `column_index` is valid, given the `table` shape. """ return column_index in range(self.table.shape[1]) - - def _set_column_headings(self): # TODO: check custom formatting in headers + def _set_column_headings(self) -> None: # TODO: check custom formatting in headers """ Sets the `column_headings` attribute to the set of column indexes that are not assigned to `index_columns`. @@ -188,48 +211,44 @@ def _set_column_headings(self): # TODO: check custom formatting in headers index_cols = set(self.index_columns.values()) self._column_headings = {x for x in range(self.table.shape[1])} - index_cols - - def _validate_all_column_names_have_text(self): + def _validate_all_column_names_have_text(self) -> None: """ Validate that all column names in header row have text. """ for column_name in self.table.columns: if pd.isna(column_name): - msg = ("Null column name found in table data - column names must all have text") + msg = "Null column name found in table data - column names must all have text" raise ValueError(msg) elif len(column_name) > 0: continue else: - msg = ("Empty column name found in table data - column names must all have text") + msg = "Empty column name found in table data - column names must all have text" raise ValueError(msg) - - def _validate_no_duplicate_column_names(self): + def _validate_no_duplicate_column_names(self) -> None: """ Validate that there are no duplicate column names in table data. """ if len(self.table.columns) != len(set(self.table.columns)): - msg = ("Duplicate column names found in table data - column names must be unique") + msg = "Duplicate column names found in table data - column names must be unique" raise ValueError(msg) - - def set_table_name(self, new_table_name): + def set_table_name(self, new_table_name: str) -> None: """ Set the `table_name` attribute. """ if not isinstance(new_table_name, str): - msg = ("table_name should be provided as a string") + msg = "table_name should be provided as a string" raise TypeError(msg) elif len(new_table_name) != len("".join(new_table_name.split())): - msg = ("Whitespace found in table_name, remove or replace with underscores") + msg = "Whitespace found in table_name, remove or replace with underscores" raise ValueError(msg) else: self.table_name = new_table_name - - def set_title(self, new_title): + def set_title(self, new_title: Any) -> None: """ Set the `title` attribute. """ @@ -240,8 +259,7 @@ def set_title(self, new_title): self.title = new_title - - def add_subtitle(self, new_subtitle): + def add_subtitle(self, new_subtitle: Any) -> None: """ Add a single subtitle to the existing list of `subtitles`. """ @@ -252,8 +270,9 @@ def add_subtitle(self, new_subtitle): self.subtitles.append(new_subtitle) - - def set_subtitles(self, new_subtitles, overwrite=True): + def set_subtitles( + self, new_subtitles: Optional[List[Any]], overwrite: bool = True + ) -> None: """ Set a list of subtitles to the `subtitles` attribute. Overwrites existing ist of subtitles by default. If `overwrite` is False, new list @@ -263,22 +282,27 @@ def set_subtitles(self, new_subtitles, overwrite=True): new_subtitles = [] if not isinstance(new_subtitles, (list)): - msg =("`subtitles` must be provided as a list containing strings" - " and/or lists of strings and format dictionaries" - " (rich text)") + msg = ( + "`subtitles` must be provided as a list containing strings" + " and/or lists of strings and format dictionaries" + " (rich text)" + ) raise TypeError(msg) for text in new_subtitles: self._validate_text(text, "subtitles") - new_subtitles = [FormatList(text) if isinstance(text, list) else text for text in new_subtitles] + new_subtitles = [ + FormatList(text) if isinstance(text, list) else text + for text in new_subtitles + ] if overwrite: self.subtitles = new_subtitles else: self.subtitles += new_subtitles - - def set_instructions(self, new_instructions): + + def set_instructions(self, new_instructions: Any) -> None: """ Set `instructions` attribute. """ @@ -291,12 +315,11 @@ def set_instructions(self, new_instructions): else: self.instructions = new_instructions - - def set_scope(self, new_scope): + def set_scope(self, new_scope: Any) -> None: """ Set the `scope` attribute. """ - if new_scope == None: + if new_scope is None: new_scope = "" return @@ -307,13 +330,19 @@ def set_scope(self, new_scope): self.scope = new_scope - - def set_units(self, new_units): # TODO: custom formatting in units? + def set_units( + self, new_units: Optional[Dict[Any, Any]] + ) -> None: # TODO: custom formatting in units? """ Adds units to column headers. Units should be in the format {column: units_text}. Column can be column name or 0-indexed column - number in `table`. - """ + number in `table`. + """ + if any("$$" in str(h) for h in self.table.columns): + raise ValueError( + "Notes inside column headers are no longer supported. " + "Use GPTable.table_notes for column notes instead." + ) if isinstance(new_units, dict) and len(new_units) > 0: for value in new_units.values(): self._validate_text(value, "units") @@ -329,24 +358,34 @@ def set_units(self, new_units): # TODO: custom formatting in units? new_units[n] = new_units.pop(unmodified_headers[n]) # Convert numeric keys to column names - new_headers_keys = [headers[key] if isinstance(key, int) else key for key in new_units.keys()] - new_headers_values = [f"{key}\n({value})" for key, value in zip(new_headers_keys, new_units.values())] + new_headers_keys = [ + headers[key] if isinstance(key, int) else key + for key in new_units.keys() + ] + new_headers_values = [ + f"{key}\n({value})" + for key, value in zip(new_headers_keys, new_units.values()) + ] new_headers = dict(zip(new_headers_keys, new_headers_values)) - self.table = self.table.rename(columns = new_headers) + self.table = self.table.rename(columns=new_headers) if len(self.additional_formatting) > 0: self._update_column_names_in_additional_formatting(new_headers) - elif not new_units is None: - msg = ("`units` attribute must be a dictionary or None" - " ({column: units_text})") - + elif new_units is not None: + msg = ( + "`units` attribute must be a dictionary or None" + " ({column: units_text})" + ) + raise TypeError(msg) self.units = new_units - def _update_column_names_in_additional_formatting(self, col_names): + def _update_column_names_in_additional_formatting( + self, col_names: Dict[Any, Any] + ) -> None: """ Parameters ---------- @@ -362,53 +401,29 @@ def _update_column_names_in_additional_formatting(self, col_names): format = list(dictionary.values())[0] # new_name if name==old_name else name for name in col_names - format["columns"] = [col_names[name] if name in list(col_names.keys()) else name for name in format["columns"]] + format["columns"] = [ + col_names[name] if name in list(col_names.keys()) else name + for name in format["columns"] + ] self.additional_formatting = formatting_list - def set_table_notes(self, new_table_notes): # TODO: custom formatting in column headers? + def set_table_notes( + self, new_table_notes: Optional[Dict[Any, Any]] + ) -> None: # TODO: custom formatting in column headers? """ Adds note references to column headers. `table_notes` should be in the format {column: "$$note_reference$$"}. Column can be column name or 0-indexed column number in `table`. - """ - if isinstance(new_table_notes, dict) and len(new_table_notes) > 0: - for value in new_table_notes.values(): - self._validate_text(value, "table_notes") - - headers = self.table.columns.values.tolist() - - # Check if units have already been added to headers... - unmodified_headers = [header.split("\n")[0] for header in headers] - - # ...if so, apply any notes applied to headers without units, to headers with units - for n in range(len(unmodified_headers)): - if unmodified_headers[n] in list(new_table_notes.keys()): - new_table_notes[n] = new_table_notes.pop(unmodified_headers[n]) - - # Convert numeric keys to column names - new_headers_keys = [headers[key] if isinstance(key, int) else key for key in new_table_notes.keys()] - new_headers_values = [f"{key}\n{value}" for key, value in zip(new_headers_keys, new_table_notes.values())] - new_headers = dict(zip(new_headers_keys, new_headers_values)) - - self.table = self.table.rename(columns = new_headers) - - if len(self.additional_formatting) > 0: - self._update_column_names_in_additional_formatting(new_headers) - - elif not new_table_notes is None: - msg = ("`table_notes` attribute must be a dictionary or None" - " ({column: '$$note_reference$$'})") - raise TypeError(msg) + """ self.table_notes = new_table_notes - - def set_source(self, new_source): + def set_source(self, new_source: Any) -> None: """ Set the source attribute to the specified str. """ - if new_source == None: + if new_source is None: new_source = "" return @@ -418,9 +433,8 @@ def set_source(self, new_source): new_source = FormatList(new_source) self.source = new_source - - def add_legend(self, new_legend): + def add_legend(self, new_legend: Any) -> None: """ Add a single legend entry to the existing `legend` list. """ @@ -430,41 +444,44 @@ def add_legend(self, new_legend): new_legend = FormatList(new_legend) self.legend.append(new_legend) - - def set_legend(self, new_legend, overwrite=True): + def set_legend( + self, new_legend: Optional[List[Any]], overwrite: bool = True + ) -> None: """ Set a list of legend entries to the `legend` attribute. Overwrites - existing legend entries by default. If overwrite is False, new entries + existing legend entries by default. If overwrite is False, new entries are appended to the `legend` list. """ if new_legend is None: self.legend = [] return if not isinstance(new_legend, list): - msg = ("`legend` must be provided as a list of text elements") + msg = "`legend` must be provided as a list of text elements" raise TypeError(msg) for text in new_legend: self._validate_text(text, "legend") - new_legend = [FormatList(text) if isinstance(text, list) else text for text in new_legend] + new_legend = [ + FormatList(text) if isinstance(text, list) else text for text in new_legend + ] if overwrite: self.legend = new_legend else: self.legend += new_legend - def _set_annotations(self, description_order): + def _set_annotations(self, description_order: List[str]) -> None: """ Set a list of note references to the `_annotations` attribute. """ elements = [ - "title", - "subtitles", - *description_order, - "units", - "table_notes", - ] + "title", + "subtitles", + *description_order, + "units", + "table_notes", + ] ordered_refs = [] @@ -480,13 +497,12 @@ def _set_annotations(self, description_order): # remove duplicates from ordered_refs and assign to self._annotations self._annotations = list(dict.fromkeys(ordered_refs)) - - def _get_references_from_attr(self, data): + def _get_references_from_attr(self, data: Any) -> List[str]: """ Finds references in a string or list/dict of strings. Works recursively on list elements and dict values. Other types are ignored. Returns ordered list of references from attribute. - + Parameters ---------- data : string or list/dict of strings @@ -514,12 +530,12 @@ def _get_references_from_attr(self, data): return ordered_refs # Deprecated as of v1.1.0 - instead use `table_notes` to add references to column headers - def _get_references_from_table(self): + def _get_references_from_table(self) -> List[str]: """ Get note references in the table column headings and index columns. """ table = self.table - + ordered_refs = [] column_references = self._get_references_from_attr(table.columns.to_list()) ordered_refs.extend(column_references) @@ -527,18 +543,19 @@ def _get_references_from_table(self): index_columns = self.index_columns.values() for col in index_columns: index_column = table.iloc[:, col] - index_column_references = self._get_references_from_attr(index_column.to_list()) + index_column_references = self._get_references_from_attr( + index_column.to_list() + ) ordered_refs.extend(index_column_references) return ordered_refs - @staticmethod - def _get_references(string): + def _get_references(string: str) -> List[str]: """ Given a single string, return occurrences of note references (denoted by flanking dollar signs [$$reference$$]). - + Parameters ---------- string : str @@ -556,66 +573,71 @@ def _get_references(string): return ordered_refs - - def set_additional_formatting(self, new_formatting): + def set_additional_formatting(self, new_formatting: List[Dict[str, Any]]) -> None: """ Set a dictionary of additional formatting to be applied to this table. """ if not isinstance(new_formatting, list): - msg = ("`additional_formatting` must be a list of dictionaries") + msg = "`additional_formatting` must be a list of dictionaries" raise TypeError(msg) keys = [key for item in new_formatting for key in item.keys()] for key in keys: if key not in ["column", "row", "cell"]: - msg = (f"`{key}` is not a supported format type. Please use" - " `column`, `row` or `cell`") + msg = ( + f"`{key}` is not a supported format type. Please use" + " `column`, `row` or `cell`" + ) raise ValueError(msg) - + self._validate_format_labels(new_formatting) - + self.additional_formatting = new_formatting - - def _validate_format_labels(self, format_list): + def _validate_format_labels(self, format_list: List[Dict[str, Any]]) -> None: """ Validate that format labels are valid property of XlsxWriter Format. """ - labels = [label - for item in format_list - for key in item.keys() - for label in item[key]["format"] - ] + labels = [ + label + for item in format_list + for key in item.keys() + for label in item[key]["format"] + ] for label in labels: if label not in self._valid_format_labels: - msg = (f"`{label}` is not a valid XlsxWriter Format property") + msg = f"`{label}` is not a valid XlsxWriter Format property" raise ValueError(msg) - - def _set_data_range(self): + def _set_data_range(self) -> None: """ Get the top-left and bottom-right cell reference of the table data. """ - #TODO: ugly code - row_offset = sum([ - int(self.title is not None), - int(self.scope is not None), - int(self.source is not None), - ]) + 1 #corresponds to instructions which are included by default + # TODO: ugly code + row_offset = ( + sum( + [ + int(self.title is not None), + int(self.scope is not None), + int(self.source is not None), + ] + ) + + 1 + ) # corresponds to instructions which are included by default if self.subtitles is not None: row_offset += len(self.subtitles) if self.legend is not None: row_offset += len(self.legend) - + self.data_range = [ row_offset, 0, self.table.shape[0] + row_offset, - self.table.shape[1] - 1 + self.table.shape[1] - 1, ] @staticmethod - def _validate_text(obj, attr): + def _validate_text(obj: Any, attr: str) -> None: """ Validate that an object contains valid text elements. These are either strings or list of strings and dictionaries. @@ -626,27 +648,33 @@ def _validate_text(obj, attr): if isinstance(obj, list): for element in obj: if not isinstance(element, (str, dict)): - msg = (f"{attr} text should be provided as strings or" - " lists of strings and dictionaries (rich-text)." - f" {type(element)} are not valid rich text" - " elements.") + msg = ( + f"{attr} text should be provided as strings or" + " lists of strings and dictionaries (rich-text)." + f" {type(element)} are not valid rich text" + " elements." + ) raise TypeError(msg) else: - msg = (f"{attr} text should be provided as strings or lists of" - f" strings and dictionaries (rich-text). {type(obj)} are" - " not valid text elements.") + msg = ( + f"{attr} text should be provided as strings or lists of" + f" strings and dictionaries (rich-text). {type(obj)} are" + " not valid text elements." + ) raise TypeError(msg) + class FormatList: """ Class for storing list of alternating string and dictionary objects. Dictionaries specify additional formatting to be applied to the following string. """ - def __init__(self, list): - self.list = list + + def __init__(self, items: List[Union[str, Dict[str, Any]]]) -> None: + self.list = items self._set_string_property() - def _set_string_property(self): + def _set_string_property(self) -> None: string = "" for entry in self.list: if isinstance(entry, str): diff --git a/gptables/core/theme.py b/gptables/core/theme.py index e0cf93d1..5441f8c3 100644 --- a/gptables/core/theme.py +++ b/gptables/core/theme.py @@ -1,34 +1,34 @@ -from xlsxwriter.format import Format -from gptables.core.gptable import GPTable -import yaml from functools import wraps +from typing import Any, Callable, Dict, List, Optional, Union + +import yaml +from xlsxwriter.format import Format + -def validate_single_format(f): +def validate_single_format(f: Callable) -> Callable: @wraps(f) - def wrapper(cls, format_dict): + def wrapper(cls, format_dict: Dict[str, Any]) -> Any: """ Decorator to validate that input is a dictionary dictionary. """ if not isinstance(format_dict, dict): actual_type = type(format_dict) - msg = ("Formats must be supplied as a dictionary, not" - f"{actual_type}") + msg = "Formats must be supplied as a dictionary, not" f"{actual_type}" raise ValueError(msg) - + for fmt in format_dict.keys(): cls._validate_format_label(fmt) return f(cls, format_dict) + return wrapper + class Theme: """ - A class that defines a set of format attributes for use in xlsxwriter. + Defines format attributes for table elements for use with XlsxWriter. - This class associates a dict of format attributes with table elements. - - See XlsxWriter - `format properties `_ - for valid options. + For valid values, see XlsxWriter `Format` properties: + __. Attributes ---------- @@ -41,41 +41,38 @@ class Theme: title_format : dict subtitle_format : dict - + instructions_format : dict scope_format : dict - + column_heading_format : dict - + index_1_format : dict - + index_2_format : dict - + index_3_format : dict - + data_format : dict source_format : dict - + legend_format : dict description_order : list """ - def __init__( - self, - config=None, - ): + def __init__(self, config: Optional[Union[Dict[str, Any], str]] = None) -> None: """ Initialise theme object. Parameters ---------- config : dict or .yaml/.yml file - theme specification + theme specification """ - ## Formats + # Formats self._format_attributes = [ "cover_title_format", "cover_subtitle_format", @@ -91,34 +88,31 @@ def __init__( "data_format", "source_format", "legend_format", - ] - + ] + for attr in self._format_attributes: setattr(self, attr, {}) - - ## Other attributes + + # Other attributes self.description_order = [] - + # Valid Them format attributes self._valid_attrs = [ - x.replace("_format", "") - for x in self._format_attributes - ] + ["global"] + x.replace("_format", "") for x in self._format_attributes + ] + ["global"] # Valid XlsxWriter Format attributes self._valid_format_labels = [ - attr.replace("set_", "") - for attr in Format().__dir__() - if attr.startswith('set_') - and callable(getattr(Format(), attr)) - ] - + attr.replace("set_", "") + for attr in Format().__dir__() + if attr.startswith("set_") and callable(getattr(Format(), attr)) + ] + if config: self.apply_config(config) - @staticmethod - def _parse_config(config): + def _parse_config(config: Union[Dict[str, Any], str]) -> Dict[str, Any]: """ Parse yaml configuration to dictionary. """ @@ -127,17 +121,16 @@ def _parse_config(config): raise ValueError("Theme configuration files must be YAML") with open(config, "r") as file: cfg = yaml.safe_load(file) - + elif isinstance(config, dict): cfg = config - + else: raise ValueError("Theme configuration must be a dict or YAML file") - - return cfg + return cfg - def _validate_config(self, config): + def _validate_config(self, config: Dict[str, Any]) -> None: """ Assert that format dictionary lower level keys are valid XlsxWriter Format attributes. @@ -147,29 +140,27 @@ def _validate_config(self, config): attr_config = config[attr] or {} for fmt in attr_config.keys(): self._validate_format_label(fmt) - - def _validate_format_label(self, format_name): + def _validate_format_label(self, format_name: str) -> None: """ Assert that format is a valid XlsxWriter Format attribute. """ if format_name not in self._valid_format_labels: raise ValueError(f"`{format_name}` is not a valid format label") - - def apply_config(self, config): + def apply_config(self, config: Union[Dict[str, Any], str]) -> None: """ Update multiple Theme attributes using a YAML or dictionary config. This enables extension of build in Themes. """ cfg = self._parse_config(config) self._validate_config(cfg) - + # Update all when global used if "global" in cfg.keys(): default_format = cfg.pop("global") self._update_all_formats(default_format) - + # Update with individual methods for key, value in cfg.items(): if key == "description_order": @@ -179,9 +170,8 @@ def apply_config(self, config): getattr(self, "update_" + key + "_format")(value) else: raise ValueError(f"`{key}` is not a valid Theme attribute") - - def _update_all_formats(self, global_dict): + def _update_all_formats(self, global_dict: Dict[str, Any]) -> None: """ Updates all theme attributes with a global format dictionary. """ @@ -189,185 +179,163 @@ def _update_all_formats(self, global_dict): if attr.endswith("_format"): getattr(self, "update_" + attr)(global_dict) - @validate_single_format - def update_column_heading_format(self, format_dict): + def update_column_heading_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `column_heading_format` attribute. Where keys already exist, existing items are replaced. """ self.column_heading_format.update(format_dict) - @validate_single_format - def update_index_1_format(self, format_dict): + def update_index_1_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `index_1_format` attribute. Where keys already exist, existing items are replaced. """ self.index_1_format.update(format_dict) - - @validate_single_format - def update_index_2_format(self, format_dict): + @validate_single_format + def update_index_2_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `index_2_format` attribute. Where keys already exist, existing items are replaced. """ self.index_2_format.update(format_dict) - @validate_single_format - def update_index_3_format(self, format_dict): + def update_index_3_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `index_3_format` attribute. Where keys already exist, existing items are replaced. """ self.index_3_format.update(format_dict) - @validate_single_format - def update_data_format(self, format_dict): + def update_data_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `data_format` attribute. Where keys already exist, existing items are replaced. """ self.data_format.update(format_dict) - @validate_single_format - def update_cover_title_format(self, format_dict): + def update_cover_title_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `cover_title_format` attribute. Where keys already exist, existing items are replaced. """ self.cover_title_format.update(format_dict) - - + @validate_single_format - def update_cover_subtitle_format(self, format_dict): + def update_cover_subtitle_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `cover_subtitle_format` attribute. Where keys already exist, existing items are replaced. """ self.cover_subtitle_format.update(format_dict) - @validate_single_format - def update_cover_text_format(self, format_dict): + def update_cover_text_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `cover_text_format` attribute. Where keys already exist, existing items are replaced. """ self.cover_text_format.update(format_dict) - @validate_single_format - def update_title_format(self, format_dict): + def update_title_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `title_format` attribute. Where keys already exist, existing items are replaced. """ self.title_format.update(format_dict) - @validate_single_format - def update_subtitle_format(self, format_dict): + def update_subtitle_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `subtitle_format` attribute. Where keys already exist, existing items are replaced. """ self.subtitle_format.update(format_dict) - @validate_single_format - def update_instructions_format(self, format_dict): + def update_instructions_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `instructions_format` attribute. Where keys already exist, existing items are replaced. """ self.instructions_format.update(format_dict) - - @validate_single_format - def update_scope_format(self, format_dict): + @validate_single_format + def update_scope_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `scope_format` attribute. Where keys already exist, existing items are replaced. """ self.scope_format.update(format_dict) - @validate_single_format - def update_location_format(self, format_dict): + def update_location_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `location_format` attribute. Where keys already exist, existing items are replaced. """ self.location_format.update(format_dict) - @validate_single_format - def update_source_format(self, format_dict): + def update_source_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `source_format` attribute. Where keys already exist, existing items are replaced. """ self.source_format.update(format_dict) - @validate_single_format - def update_legend_format(self, format_dict): + def update_legend_format(self, format_dict: Dict[str, Any]) -> None: """ Update the `legend_format` attribute. Where keys already exist, existing items are replaced. """ self.legend_format.update(format_dict) - - def update_description_order(self, order_list): + def update_description_order(self, order_list: List[str]) -> None: """ Update the `description_order` attribute. Overrides existing order. """ if not isinstance(order_list, list): - msg = ("`description_order` must be a list of description element names") + msg = "`description_order` must be a list of description element names" raise TypeError(msg) valid_elements = ["instructions", "source", "legend", "scope"] if not all(element in valid_elements for element in order_list): - msg = (f"`description_order` elements must be in {valid_elements}") + msg = f"`description_order` elements must be in {valid_elements}" raise ValueError(msg) self.description_order = order_list - - def print_attributes(self): + def print_attributes(self) -> None: """ Print all current format attributes and values to the console. """ obj_attr = [ - attr for attr in self.__dir__() - if not attr.startswith('_') - and not callable(getattr(self, attr)) - ] + attr + for attr in self.__dir__() + if not attr.startswith("_") and not callable(getattr(self, attr)) + ] for attr in obj_attr: print(attr, ":", getattr(self, attr)) - - def __eq__(self, other): + def __eq__(self, other: object) -> bool: """ Comparison operator, for testing. """ # don't attempt to compare against unrelated types if not isinstance(other, Theme): return False - + obj_attr = [ - attr for attr in self.__dir__() - if not attr.startswith('_') - and not callable(getattr(self, attr)) - ] - return all([ - getattr(self, attr) == getattr(other, attr) - for attr in obj_attr - ]) - \ No newline at end of file + attr + for attr in self.__dir__() + if not attr.startswith("_") and not callable(getattr(self, attr)) + ] + return all([getattr(self, attr) == getattr(other, attr) for attr in obj_attr]) diff --git a/gptables/core/wrappers.py b/gptables/core/wrappers.py index 57068361..93e68284 100644 --- a/gptables/core/wrappers.py +++ b/gptables/core/wrappers.py @@ -1,26 +1,28 @@ -import os import re import warnings -import pandas as pd -import numpy as np from copy import deepcopy +from math import ceil +import numpy as np +import pandas as pd +from xlsxwriter.utility import cell_autofit_width from xlsxwriter.workbook import Workbook from xlsxwriter.worksheet import Worksheet from gptables.core.cover import Cover +from gptables.utils.unpickle_themes import gptheme +from .gptable import FormatList, GPTable from .theme import Theme -from .gptable import GPTable, FormatList -from gptables.utils.unpickle_themes import gptheme class GPWorksheet(Worksheet): """ - Wrapper for an XlsxWriter Worksheet object. Provides a method for writing + Wrapper for an `XlsxWriter.Worksheet` object. Provides a method for writing a good practice table (GPTable) to a Worksheet. """ - def write_cover(self, cover): + + def write_cover(self, cover: "Cover") -> None: """ Write a cover page to the Worksheet. Uses text from a Cover object and details of the Workbook contents. @@ -38,24 +40,33 @@ def write_cover(self, cover): pos = self._write_element(pos, cover.title, theme.cover_title_format) if cover.intro is not None: - pos = self._write_element(pos, "Introductory information", theme.cover_subtitle_format) + pos = self._write_element( + pos, "Introductory information", theme.cover_subtitle_format + ) pos = self._write_element_list(pos, cover.intro, theme.cover_text_format) if cover.about is not None: - pos = self._write_element(pos, "About these data", theme.cover_subtitle_format) + pos = self._write_element( + pos, "About these data", theme.cover_subtitle_format + ) pos = self._write_element_list(pos, cover.about, theme.cover_text_format) if cover.contact is not None: pos = self._write_element(pos, "Contact", theme.cover_subtitle_format) pos = self._write_element_list(pos, cover.contact, theme.cover_text_format) - + self.set_column(0, 0, cover.width) - def write_gptable(self, gptable, auto_width, reference_order=[]): + def write_gptable( + self, + gptable: "GPTable", + auto_width: bool, + reference_order: list = [], + ) -> None: """ Write data from a GPTable object to the worksheet using the workbook Theme object for formatting. - + Parameters ---------- gptable : gptables.GPTable @@ -70,11 +81,11 @@ def write_gptable(self, gptable, auto_width, reference_order=[]): """ if not isinstance(gptable, GPTable): raise TypeError("`gptable` must be a gptables.GPTable object") - - if len(gptable._annotations)>0 and len(reference_order)==0: + + if len(gptable._annotations) > 0 and len(reference_order) == 0: msg = "reference_order must be provided if gptable contains annotations" raise ValueError(msg) - + theme = self.theme # Write each GPTable element using appropriate Theme attr @@ -85,39 +96,32 @@ def write_gptable(self, gptable, auto_width, reference_order=[]): gptable = deepcopy(gptable) - pos = self._write_element( - pos, - gptable.title, - theme.title_format - ) + pos = self._write_element(pos, gptable.title, theme.title_format) - pos = self._write_element_list( - pos, - gptable.subtitles, - theme.subtitle_format - ) + pos = self._write_element_list(pos, gptable.subtitles, theme.subtitle_format) description = theme.description_order for element in description: pos = getattr(self, "_write_" + element)( - pos, - getattr(gptable, element), - getattr(theme, element + "_format") - ) + pos, getattr(gptable, element), getattr(theme, element + "_format") + ) pos = self._write_table_elements( - pos, - gptable, - auto_width, - ) - + pos, + gptable, + auto_width, + ) - def _reference_annotations(self, gptable, reference_order): + def _reference_annotations( + self, + gptable: "GPTable", + reference_order: list, + ) -> None: """ Replace note references with numbered references and move to end of element. Acts on `title`, `subtitles`, `table` and `notes` attributes of a GPTable. References are numbered from top left of spreadsheet, working across each row. - + Parameters ---------- gptable : gptables.GPTable @@ -132,52 +136,70 @@ def _reference_annotations(self, gptable, reference_order): description_order = self.theme.description_order elements = [ - "title", - "subtitles", - *description_order, - ] + "title", + "subtitles", + *description_order, + ] # Loop through elements, replacing references in strings for attr in elements: attr_current = getattr(gptable, attr) setattr( - gptable, - attr, - self._replace_reference_in_attr( - attr_current, - reference_order - ) - ) + gptable, + attr, + self._replace_reference_in_attr(attr_current, reference_order), + ) self._reference_table_annotations(gptable, reference_order) - - def _reference_table_annotations(self, gptable, reference_order): + def _reference_table_annotations( + self, + gptable: "GPTable", + reference_order: list, + ) -> None: """ Reference annotations in the table column headings and index columns. """ - table = getattr(gptable, "table") - - table.columns = self._replace_reference_in_attr( - [x for x in table.columns], - reference_order - ) - - index_columns = gptable.index_columns.values() + table = gptable.table.copy() - for col in index_columns: - table.iloc[:, col] = table.iloc[:, col].apply( - lambda x: self._replace_reference_in_attr(x, reference_order) - ) + notes = getattr(gptable, "table_notes", {}) or {} + if notes: + headers = list(table.columns) + rename_map = {} - setattr(gptable, "table", table) + for key, note_token in notes.items(): + + idx = key if isinstance(key, int) else table.columns.get_loc(key) + old = headers[idx] + rendered = self._replace_reference_in_attr(note_token, reference_order) + + if not ( + isinstance(old, str) + and old.splitlines() + and old.splitlines()[-1] == rendered + ): + new = f"{old}\n{rendered}" + rename_map[old] = new + if rename_map: + table = table.rename(columns=rename_map) + + index_columns = gptable.index_columns.values() + for col in index_columns: + table.iloc[:, col] = table.iloc[:, col].apply( + lambda x: self._replace_reference_in_attr(x, reference_order) + ) + gptable.table = table - def _replace_reference_in_attr(self, data, reference_order): + def _replace_reference_in_attr( + self, + data: object, + reference_order: list, + ) -> object: """ Replaces references in a string or list/dict of strings. Works recursively on list elements and dict values. Other types are returned without modification. - + Parameters ---------- data : any type @@ -195,35 +217,27 @@ def _replace_reference_in_attr(self, data, reference_order): data = self._replace_reference(data, reference_order) if isinstance(data, list): for n in range(len(data)): - data[n] = self._replace_reference_in_attr( - data[n], - reference_order - ) + data[n] = self._replace_reference_in_attr(data[n], reference_order) if isinstance(data, dict): for key in data.keys(): - data[key] = self._replace_reference_in_attr( - data[key], - reference_order - ) + data[key] = self._replace_reference_in_attr(data[key], reference_order) if isinstance(data, FormatList): data_list = data.list for n in range(len(data_list)): data_list[n] = self._replace_reference_in_attr( - data_list[n], - reference_order - ) + data_list[n], reference_order + ) data = FormatList(data_list) return data - @staticmethod - def _replace_reference(string, reference_order): + def _replace_reference(string: str, reference_order: list) -> str: """ Given a single string, record occurrences of new references (denoted by flanking dollar signs [$$reference$$]) and replace with number reference reflecting order of detection. - + Parameters ---------- string : str @@ -245,15 +259,14 @@ def _replace_reference(string, reference_order): return string - - def _parse_urls(self, sheet): + def _parse_urls(self, sheet: object) -> None: """ Convert markdown URL formatting into URL, string tuple - + Parameters ---------- sheet : gptables.GPTable, gptables.Cover - object containing data with urls + object containing data with urls """ if isinstance(sheet, GPTable): elements = [ @@ -263,7 +276,7 @@ def _parse_urls(self, sheet): "source", "scope", "units", - ] + ] elif isinstance(sheet, Cover): elements = [ "title", @@ -276,16 +289,16 @@ def _parse_urls(self, sheet): for attr in elements: attr_current = getattr(sheet, attr) setattr( - sheet, - attr, - self._replace_url_in_attr( - attr_current, - ) - ) + sheet, + attr, + self._replace_url_in_attr( + attr_current, + ), + ) if isinstance(sheet, GPTable): self._parse_table_urls(sheet) - - def _parse_table_urls(self, gptable): + + def _parse_table_urls(self, gptable: "GPTable") -> None: """ Parse URLs in table. """ @@ -301,13 +314,13 @@ def _parse_table_urls(self, gptable): table.iloc[r, c] = cell setattr(gptable, "table", table) - - def _replace_url_in_attr(self, data): + + def _replace_url_in_attr(self, data: object) -> object: """ Replaces urls in a string or list/dict of strings. Works recursively on list elements and dict values. Other types are returned without modification. - + Parameters ---------- data : any type @@ -318,24 +331,23 @@ def _replace_url_in_attr(self, data): if isinstance(data, list): for n in range(len(data)): data[n] = self._replace_url_in_attr( - data[n], - ) + data[n], + ) if isinstance(data, dict): for key in data.keys(): data[key] = self._replace_url_in_attr( - data[key], - ) + data[key], + ) return data - @staticmethod - def _replace_url(string): + def _replace_url(string: str) -> object: """ - Given a single string, record occurrences of markdown - style urls (formatted as `"[url](display_text)"`) and + Given a single string, record occurrences of markdown + style urls (formatted as `"[url](display_text)"`) and replace with tuples of `(url, string)` - + Parameters ---------- string : str @@ -346,16 +358,18 @@ def _replace_url(string): string or dict if no markdown style urls found, returns sting if found, return dictionary with key `string` and value `url`, - where markdown style url in `string` is replaced with `display_text` + where markdown style url in `string` is replaced with `display_text` """ - f_url_pattern = r"\[.+\]\(.+\)" # "[display_text](url)" + f_url_pattern = r"\[.+\]\(.+\)" # "[display_text](url)" f_urls = re.findall(f_url_pattern, string) - + if len(f_urls) == 0: return string - + if len(f_urls) > 1: - msg = "More than one link found in cell. Excel only permits one link per cell" + msg = ( + "More than one link found in cell. Excel only permits one link per cell" + ) raise ValueError(msg) else: f_url = f_urls[0] @@ -367,11 +381,10 @@ def _replace_url(string): return {string: url} - - def _write_element(self, pos, element, format_dict): + def _write_element(self, pos: list, element: object, format_dict: dict) -> list: """ Write a single text element of a GPTable to the GPWorksheet. - + Parameters ---------- element : str or list @@ -389,14 +402,15 @@ def _write_element(self, pos, element, format_dict): if element: self._smart_write(*pos, element, format_dict) pos[0] += 1 - - return pos + return pos - def _write_element_list(self, pos, element_list, format_dict): + def _write_element_list( + self, pos: list, element_list: list, format_dict: dict + ) -> list: """ Writes a list of elements row-wise. - + Parameters ---------- element_list : list @@ -415,51 +429,49 @@ def _write_element_list(self, pos, element_list, format_dict): if element_list: for element in element_list: pos = self._write_element(pos, element, format_dict) - - return pos + return pos - def _write_instructions(self, pos, element, format_dict): + def _write_instructions( + self, pos: list, element: object, format_dict: dict + ) -> list: """ Alias for writting description elements by name. """ return self._write_element(pos, element, format_dict) - - def _write_source(self, pos, element, format_dict): + def _write_source(self, pos: list, element: object, format_dict: dict) -> list: """ Alias for writting description elements by name. """ return self._write_element(pos, element, format_dict) - - def _write_scope(self, pos, element, format_dict): + def _write_scope(self, pos: list, element: object, format_dict: dict) -> list: """ Alias for writting description elements by name. """ return self._write_element(pos, element, format_dict) - - def _write_legend(self, pos, element_list, format_dict): + def _write_legend(self, pos: list, element_list: list, format_dict: dict) -> list: """ Alias for writting description elements by name. """ return self._write_element_list(pos, element_list, format_dict) - - def _write_notes(self, pos, element_list, format_dict): + def _write_notes(self, pos: list, element_list: list, format_dict: dict) -> list: """ Alias for writting description elements by name. """ return self._write_element_list(pos, element_list, format_dict) - - def _write_table_elements(self, pos, gptable, auto_width): + def _write_table_elements( + self, pos: list, gptable: "GPTable", auto_width: bool + ) -> list: """ Writes the table and units elements of a GPTable. Uses the Workbook Theme, plus any additional formatting associated with the GPTable. - + Parameters ---------- gptable : gptables.GPTable @@ -476,113 +488,108 @@ def _write_table_elements(self, pos, gptable, auto_width): new position to write next element from """ # Convert whitespace only cells to None - gptable.table.replace({r'^\s*$': None}, inplace=True, regex=True) + gptable.table.replace({r"^\s*$": None}, inplace=True, regex=True) if gptable.table.isna().values.all(): - msg = (f""" + msg = f""" {gptable.table_name} contains only null or whitespace cells. Please provide alternative table containing data. - """) + """ raise ValueError(msg) if gptable.table.isna().all(axis=1).any(): - msg = (f""" + msg = f""" Empty or null row found in {gptable.table_name}. Please remove blank rows before passing data to GPTable. - """) + """ raise ValueError(msg) if gptable.table.isna().values.any(): - msg = (f""" + msg = f""" Empty or null cell found in {gptable.table_name}. The reason for missingness should be included in the `GPTable.instructions` attribute. There should only be one reason otherwise a shorthand should be provided in the `instructions` or `legend` attribute. Guidance on shorthand can be found at: https://analysisfunction.civilservice.gov.uk/policy-store/symbols-in-tables-definitions-and-help/ - """) + """ warnings.warn(msg) # Raise error if any table element is only special characters - if gptable.table.astype("string").stack().str.contains('^[^a-zA-Z0-9]*$').any(): - msg = (f""" + if gptable.table.astype("string").stack().str.contains("^[^a-zA-Z0-9]*$").any(): + msg = f""" Cell found in {gptable.table_name} containing only special characters, replace with alphanumeric characters before inputting to GPTable. Guidance on symbols in tables can be found at: https://analysisfunction.civilservice.gov.uk/policy-store/symbols-in-tables-definitions-and-help/ - """) + """ raise ValueError(msg) # Get theme theme = self.theme - + # Reset position to left col on next row pos[1] = 0 - - ## Create data array + + # Create data array index_levels = gptable.index_levels index_columns = [col for col in gptable.index_columns.values()] data = pd.DataFrame(gptable.table, copy=True) - + # Create row containing column headings data.loc[-1] = data.columns data.index = data.index + 1 data.sort_index(inplace=True) - - ## Create formats array + + # Create formats array # pandas.DataFrame did NOT want to hold dictionaries, so be wary - formats = pd.DataFrame().reindex_like(data) + formats = pd.DataFrame().reindex_like(data).astype(object) dict_row = [{} for n in range(formats.shape[1])] for row in range(formats.shape[0]): dict_row = [{} for n in range(formats.shape[1])] formats.iloc[row] = dict_row - - ## Add Theme formatting to formats dataframe + + # Add Theme formatting to formats dataframe format_headings_from = 0 self._apply_format( - formats.iloc[0, format_headings_from:], - theme.column_heading_format - ) - - self._apply_format( - formats.iloc[1:, index_levels:], - theme.data_format - ) - + formats.iloc[0, format_headings_from:], theme.column_heading_format + ) + + self._apply_format(formats.iloc[1:, index_levels:], theme.data_format) + index_level_formats = [ - theme.index_1_format, - theme.index_2_format, - theme.index_3_format - ] + theme.index_1_format, + theme.index_2_format, + theme.index_3_format, + ] for level, col in gptable.index_columns.items(): self._apply_format( formats.iloc[1:, col], - index_level_formats[level - 1] # Account for 0-indexing - ) + index_level_formats[level - 1], # Account for 0-indexing + ) self._apply_column_alignments(data, formats, index_columns) - ## Add additional table-specific formatting from GPTable + # Add additional table-specific formatting from GPTable self._apply_additional_formatting( - formats, - gptable.additional_formatting, - gptable.index_levels - ) - - ## Write table + formats, gptable.additional_formatting, gptable.index_levels + ) + + # Write table pos = self._write_array(pos, data, formats) - ## Set columns widths + # Set columns widths if auto_width: widths = self._calculate_column_widths(data, formats) self._set_column_widths(widths) self._mark_data_as_worksheet_table(gptable, formats) - - return pos + return pos - def _apply_column_alignments(self, data_table, formats_table, index_columns): + def _apply_column_alignments( + self, data_table: pd.DataFrame, formats_table: pd.DataFrame, index_columns: list + ) -> None: """ Add column alignment to format based on datatype @@ -598,41 +605,51 @@ def _apply_column_alignments(self, data_table, formats_table, index_columns): # look for shorthand notation, usually a few letters in square brackets # will also find note markers eg [Note 1] # Using np.nan instead on None for backwards compatibility with pandas <=1.4 - data_table_copy = data_table.replace( - regex=r"\[[\w\s]+\]", - value = np.nan, - ) + with pd.option_context("future.no_silent_downcasting", True): + data_table_copy = data_table.replace( + regex=r"\[[\w\s]+\]", + value=np.nan, + ).infer_objects(copy=False) + if ( + data_table_copy.columns.to_list() + == data_table_copy.iloc[0, :].to_list() + ): + # drop first row which contains column names + data_table_copy = data_table_copy.iloc[1:] data_table_copy = data_table_copy.convert_dtypes() column_types = data_table_copy.dtypes + if index_columns == [] and pd.api.types.is_numeric_dtype( + column_types[data_table.columns[0]] + ): + column_types[data_table.columns[0]] = "str" for column in data_table.columns: if data_table.columns.get_loc(column) in index_columns: alignment_dict = {"align": "left"} elif pd.api.types.is_numeric_dtype(column_types[column]): - alignment_dict = {"align" : "right"} + alignment_dict = {"align": "right"} else: alignment_dict = {"align": "left"} self._apply_format(formats_table[column], alignment_dict) - def _apply_additional_formatting( - self, - formats_table, - additional_formatting, - index_levels - ): + self, + formats_table: pd.DataFrame, + additional_formatting: list, + index_levels: int, + ) -> None: """ Apply row, column and cell formatting to dataframe of formats. """ for item in additional_formatting: fmt_type = list(item.keys())[0] format_desc = item[fmt_type] - + if fmt_type == "cell": formatting = format_desc["format"] cell_ilocs = format_desc["cells"] @@ -640,24 +657,16 @@ def _apply_additional_formatting( cell_ilocs = [cell_ilocs] for row, col in cell_ilocs: formats_table_slice = formats_table.iloc[row, col] - - self._apply_format( - formats_table_slice, - formatting - ) - return None - - if fmt_type == "column": + + elif fmt_type == "column": cols_iloc = [ - formats_table.columns.get_loc(col) - if isinstance(col, str) - else col - for col in format_desc["columns"] - ] + formats_table.columns.get_loc(col) if isinstance(col, str) else col + for col in format_desc["columns"] + ] row_start = 0 if "include_names" in format_desc.keys(): row_start = 0 if format_desc["include_names"] else 1 - + formats_table_slice = formats_table.iloc[row_start:, cols_iloc] formatting = format_desc["format"] @@ -666,21 +675,19 @@ def _apply_additional_formatting( col_start = 0 if "include_names" in format_desc.keys(): col_start = 0 if format_desc["include_names"] else index_levels - + formats_table_slice = formats_table.iloc[rows_iloc, col_start:] formatting = format_desc["format"] - - self._apply_format( - formats_table_slice, - formatting - ) + self._apply_format(formats_table_slice, formatting) - def _write_array(self, pos, data, formats): + def _write_array( + self, pos: list, data: pd.DataFrame, formats: pd.DataFrame + ) -> list: """ Write a two-dimensional array to the current Worksheet, starting from the specified position. - + Parameters ---------- data : pandas.DataFrame @@ -690,7 +697,7 @@ def _write_array(self, pos, data, formats): to each cell of data pos : list the position of the top left cell to start writing the array from - + Returns ------- pos : list @@ -698,7 +705,7 @@ def _write_array(self, pos, data, formats): """ if data.shape != formats.shape: raise ValueError("data and formats arrays must be of equal shape") - + rows, cols = data.shape for row in range(rows): for col in range(cols): @@ -706,18 +713,16 @@ def _write_array(self, pos, data, formats): cell_format_dict = formats.iloc[row, col] self._smart_write( - pos[0] + row, - pos[1] + col, - cell_data, - cell_format_dict - ) - + pos[0] + row, pos[1] + col, cell_data, cell_format_dict + ) + pos = [pos[0] + rows, 0] - - return pos + return pos - def _mark_data_as_worksheet_table(self, gptable, formats_dataframe): + def _mark_data_as_worksheet_table( + self, gptable: "GPTable", formats_dataframe: pd.DataFrame + ) -> None: """ Marks the data to be recognised as a Worksheet Table in Excel. @@ -738,25 +743,29 @@ def _mark_data_as_worksheet_table(self, gptable, formats_dataframe): ] column_headers = [ - {'header': header, 'header_format': header_format} + {"header": header, "header_format": header_format} for header, header_format in zip(column_list, formats_list) ] - self.add_table(*data_range, - {'header_row': True, - 'autofilter': False, - 'columns': column_headers, - 'style': None, - 'name': gptable.table_name - }) - + self.add_table( + *data_range, + { + "header_row": True, + "autofilter": False, + "columns": column_headers, + "style": None, + "name": gptable.table_name, + }, + ) - def _smart_write(self, row, col, data, format_dict, *args): + def _smart_write( + self, row: int, col: int, data: object, format_dict: dict, *args + ) -> None: """ Depending on the input data, this function will write rich strings or use the standard `write()` method. For rich strings, the base format is merged with each rich format supplied within data. - + Parameters ---------- row : int @@ -772,7 +781,7 @@ def _smart_write(self, row, col, data, format_dict, *args): (first) value as URL. format_dict : dict Dictionary containing base format for the string. - + Returns ------- None @@ -785,7 +794,9 @@ def _smart_write(self, row, col, data, format_dict, *args): self._smart_write(row, col, data, format_dict, *args) elif any([isinstance(element, FormatList) for element in data]): - self._write_with_newlines_and_custom_formats(wb, row, col, data, format_dict, *args) + self._write_with_newlines_and_custom_formats( + wb, row, col, data, format_dict, *args + ) else: self._write_with_newlines(wb, row, col, data, format_dict, *args) @@ -801,7 +812,7 @@ def _smart_write(self, row, col, data, format_dict, *args): elif isinstance(data, dict): self._write_dict_as_url(wb, row, col, data, format_dict, *args) - + elif pd.isna(data): self.write_blank(row, col, None, wb.add_format(format_dict)) @@ -809,8 +820,9 @@ def _smart_write(self, row, col, data, format_dict, *args): # Write handles all other write types dynamically self.write(row, col, data, wb.add_format(format_dict), *args) - - def _write_with_newlines_and_custom_formats(self, wb, row, col, data, format_dict, *args): + def _write_with_newlines_and_custom_formats( + self, wb: Workbook, row: int, col: int, data: list, format_dict: dict, *args + ) -> None: """ Take list of FormatList (and str), join with newline characters and smart write """ @@ -829,37 +841,36 @@ def _write_with_newlines_and_custom_formats(self, wb, row, col, data, format_dic element_stings = [item for item in element if isinstance(item, str)] first_string = element_stings[0] new_string = "\n" + first_string - element_with_newline = [new_string if item == first_string else item for item in element] + element_with_newline = [ + new_string if item == first_string else item for item in element + ] else: element_with_newline = ["\n" + str(element)] data_with_newlines.extend(element_with_newline) self._write_with_custom_formats( - wb, - row, - col, - FormatList(data_with_newlines), - format_dict, - *args + wb, row, col, FormatList(data_with_newlines), format_dict, *args ) - - def _write_with_newlines(self, wb, row, col, data, format_dict, *args): + def _write_with_newlines( + self, wb: Workbook, row: int, col: int, data: list, format_dict: dict, *args + ) -> None: """ Take list of str, join with newline character and write """ data_string = "\n".join(data) - self.write( - row, - col, - data_string, - wb.add_format(format_dict), - *args - ) - + self.write(row, col, data_string, wb.add_format(format_dict), *args) - def _write_with_custom_formats(self, wb, row, col, data, format_dict, *args): + def _write_with_custom_formats( + self, + wb: Workbook, + row: int, + col: int, + data: "FormatList", + format_dict: dict, + *args, + ) -> None: data_with_custom_formats = [] for item in data.list: # Convert dicts to Format (with merge onto base format) @@ -871,74 +882,65 @@ def _write_with_custom_formats(self, wb, row, col, data, format_dict, *args): data_with_custom_formats.append(item) data_with_all_formats = [] - for n in range(len(data_with_custom_formats)-1): + for n in range(len(data_with_custom_formats) - 1): data_with_all_formats.append(data_with_custom_formats[n]) if isinstance(data_with_custom_formats[n], str): - if isinstance(data_with_custom_formats[n+1], str): + if isinstance(data_with_custom_formats[n + 1], str): data_with_all_formats.append(wb.add_format(format_dict)) data_with_all_formats.append(data_with_custom_formats[-1]) self.write_rich_string( - row, - col, - *data_with_all_formats, - wb.add_format(format_dict), - *args + row, col, *data_with_all_formats, wb.add_format(format_dict), *args ) - - def _write_dict_as_url(self, workbook, row, col, data, format_dict, *args): + def _write_dict_as_url( + self, + workbook: Workbook, + row: int, + col: int, + data: dict, + format_dict: dict, + *args, + ) -> None: url = list(data.values())[0] display_text = list(data.keys())[0] url_format = format_dict.copy() - url_format.update({"underline": True, "font_color": "blue"}) + url_format.update( + {"underline": True, "font_color": "blue"} + ) # blue == #0000FF - passes WCAG AA contrast check self.write_url( - row, - col, - url, - workbook.add_format(url_format), - display_text, - *args + row, col, url, workbook.add_format(url_format), display_text, *args ) - @staticmethod - def _apply_format(format_table_slice, format_dict): + def _apply_format(format_table_slice: object, format_dict: dict) -> None: """ Update all cells of a given dataframe slice with the format dictionary. Handles dict, series or dataframes. """ if isinstance(format_table_slice, pd.Series): - (format_table_slice - .apply(lambda d: d.update(format_dict)) - ) + (format_table_slice.apply(lambda d: d.update(format_dict))) elif isinstance(format_table_slice, pd.DataFrame): # Vectorised for 2D - (format_table_slice - .apply(np.vectorize(lambda d: d.update(format_dict))) - ) + (format_table_slice.apply(np.vectorize(lambda d: d.update(format_dict)))) elif isinstance(format_table_slice, dict): format_table_slice.update(format_dict) - - def _set_column_widths(self, widths): + def _set_column_widths(self, widths: list) -> None: """ Set the column widths using a list of widths. """ for col_number in range(len(widths)): - self.set_column( - col_number, - col_number, - widths[col_number] - ) - + self.set_column_pixels(col_number, col_number, widths[col_number]) - def _calculate_column_widths(self, table, formats_table): + def _calculate_column_widths( + self, table: pd.DataFrame, formats_table: pd.DataFrame + ) -> list: """ - Calculate Excel column widths using maximum length of strings - and the maximum font size in each column of the data table. + Calculate Excel column widths using xlsxwriter's cell_autofit_width for each cell, + and take the maximum per column. Parameters ---------- @@ -947,124 +949,91 @@ def _calculate_column_widths(self, table, formats_table): formats_table: pd.DataFrame formats table to retrieve font size from - Returns + Returns ------- col_widths : list width to apply to Excel columns """ cols = table.shape[1] - max_lengths = [ - table.iloc[:, col].apply(self._longest_line_length).max() - for col in range(cols) - ] - - max_font_sizes = [ - formats_table.iloc[:, col] - .apply(lambda x: x.get("font_size") or 10).max() - for col in range(cols) - ] - - col_widths = [ - self._excel_string_width(l, f) - for l, f in zip(max_lengths, max_font_sizes) - ] + col_widths = [] + for col in range(cols): + cell_widths = [] + for row in range(table.shape[0]): + cell_val = table.iloc[row, col] + longest_line = self._get_longest_line(cell_val) + format_dict = formats_table.iloc[row, col] + scaling_factor = self._get_scaling_factor(format_dict, longest_line) + width = ceil(cell_autofit_width(longest_line) * scaling_factor) + cell_widths.append(width) + col_widths.append(max(cell_widths) if cell_widths else 0) return col_widths - - @staticmethod - def _excel_string_width(string_len, font_size): - """ - Calculate the rough length of a string in Excel character units. - This crude estimate does not account for font name or other font format - (e.g. wrapping). - - Parameters - ---------- - string_len : int - length of string to calculate width in Excel for - font_size : int - size of font - - Returns - ------- - excel_width : float - width of equivalent string in Excel - """ - if string_len == 0: - excel_width = 0 - else: - excel_width = string_len * ((font_size * 0.12) - 0.09) - - return excel_width - - - def _longest_line_length(self, cell_val): - """ - Calculate the length of the longest line within a cell. - If the cell contains a string, the longest length between line breaks is returned. - If the cell contains a float or integer, the longest length is calculated from the cell_value cast to a string. - If the cell contains a link formatted as {display_text: link}, the longest length is calculated from the display text. - If the cell contains a list of strings, the length of the longest string in the list is returned. - Expects new lines to be marked with "\n", "\r\n" or new lines in multiline strings. + def _get_scaling_factor(self, format_dict: dict, text: str) -> float: + """Return scaling factor for width based on font size, bold formatting, + and capitalisation.""" + font_size = ( + format_dict.get("font_size", 11) if isinstance(format_dict, dict) else 11 + ) + bold = ( + format_dict.get("bold", False) if isinstance(format_dict, dict) else False + ) - Parameters - ---------- - cell_val: - cell value + if text and isinstance(text, str): + num_upper = sum(1 for c in text if c.isupper()) + upper_ratio = num_upper / len(text) if len(text) > 0 else 0 + else: + upper_ratio = 0 + capitalisation_factor = 1.0 + 0.15 * upper_ratio + return (font_size / 11) * (1.1 if bold else 1.0) * capitalisation_factor - Returns - ------- - max_length: int - the length of the longest line within the string - """ - split_strings = """ -|\r\n|\n""" + def _get_longest_line(self, cell_val: object) -> str: + """Return the longest line in a cell value split by newline.""" + cell_val_str = self._get_cell_string(cell_val) + return max(cell_val_str.split("\n"), key=len) + def _get_cell_string(self, cell_val: object) -> str: + """Return the contents from any cell value as a string.""" if isinstance(cell_val, str): - max_length = max([len(line) for line in re.split(split_strings, cell_val)]) + return cell_val elif isinstance(cell_val, (float, int)): - max_length = self._longest_line_length(str(cell_val)) + return str(cell_val) elif isinstance(cell_val, dict): - max_length = self._longest_line_length(list(cell_val)[0]) + return "\n".join([self._get_cell_string(k) for k in cell_val.keys()]) elif isinstance(cell_val, FormatList): - max_length = self._longest_line_length(cell_val.string) + return self._get_cell_string(cell_val.string) elif isinstance(cell_val, list): - if isinstance(cell_val[0], (dict, FormatList)): - max_length = self._longest_line_length(cell_val[0]) - else: - max_length = max([len(line) for line in cell_val]) + return "\n".join([self._get_cell_string(item) for item in cell_val]) else: - max_length = 0 - - return max_length + return str(cell_val) if cell_val else "" class GPWorkbook(Workbook): """ - Wrapper for and XlsxWriter Workbook object. The Worksheets class has been - replaced by an alternative with a method for writting GPTable objects. + Wrapper for an `XlsxWriter.Workbook` object. """ - def __init__(self, filename=None, options={}): + def __init__(self, filename: str = None, options: dict = {}) -> None: super(GPWorkbook, self).__init__(filename=filename, options=options) self.theme = None self._annotations = None # Set default theme self.set_theme(gptheme) - def add_worksheet(self, name=None, gridlines="hide_all"): + def add_worksheet( + self, name: str = None, gridlines: str = "hide_all" + ) -> "GPWorksheet": """ Overwrite add_worksheet() to create a GPWorksheet object. - + Parameters ---------- name : str (optional) name of the the worksheet to be created gridlines : string, optional - option to hide or show gridlines on worksheets. "show_all" - don't - hide gridlines, "hide_printed" - hide printed gridlines only, or - "hide_all" - hide screen and printed gridlines. - + option to hide or show gridlines on worksheets. "show_all" - don't + hide gridlines, "hide_printed" - hide printed gridlines only, or + "hide_all" - hide screen and printed gridlines. + Returns ------- worksheet : gptables.GPWorksheet @@ -1073,36 +1042,34 @@ def add_worksheet(self, name=None, gridlines="hide_all"): worksheet = super(GPWorkbook, self).add_worksheet(name, GPWorksheet) worksheet.theme = self.theme worksheet._workbook = self # Create reference to wb, for formatting - - worksheet.hide_gridlines({ - "show_all": 0, - "hide_printed": 1, - "hide_all": 2 - }[gridlines] + + worksheet.hide_gridlines( + {"show_all": 0, "hide_printed": 1, "hide_all": 2}[gridlines] ) - - return worksheet + return worksheet - def set_theme(self, theme): + def set_theme(self, theme: "Theme") -> None: """ Sets the theme for all GPTable objects written to the Workbook. - + Parameters ---------- theme : gptables.Theme a Theme object containing the formatting to be applied to GPTable objects written to Worksheets within this Workbook - + Returns ------- None """ if not isinstance(theme, Theme): - raise TypeError(f"`theme` must be a gptables.Theme object, not: {type(theme)}") + raise TypeError( + f"`theme` must be a gptables.Theme object, not: {type(theme)}" + ) self.theme = theme - def _update_annotations(self, sheets): + def _update_annotations(self, sheets: dict) -> None: ordered_refs = [] for gptable in sheets.values(): gptable._set_annotations(self.theme.description_order) @@ -1113,14 +1080,14 @@ def _update_annotations(self, sheets): def make_table_of_contents( self, - sheets, - additional_elements = None, - column_names = None, - table_name = None, - title = None, - subtitles = None, - instructions = None, - ): + sheets: dict, + additional_elements: list = None, + column_names: list = None, + table_name: str = None, + title: str = None, + subtitles: list = None, + instructions: str = None, + ) -> "GPTable": """ Generate table of contents from sheet and optional customisation parameters. @@ -1144,9 +1111,9 @@ def make_table_of_contents( description of the page layout defaults to "This worksheet contains one table." - Return + Returns ------ - gpt.GPTable + gptables.GPTable """ if column_names is None: column_names = ["Sheet name", "Table description"] @@ -1160,12 +1127,14 @@ def make_table_of_contents( if additional_elements is not None: valid_elements = ["subtitles", "scope", "source", "instructions"] if not all(element in valid_elements for element in additional_elements): - msg = ("Cover `additional_elements` list can only contain" - "'subtitles', 'scope', 'source' and 'instructions'") + msg = ( + "Cover `additional_elements` list can only contain" + "'subtitles', 'scope', 'source' and 'instructions'" + ) raise ValueError(msg) contents_dict = {} - for label, gptable in sheets.items(): + for label, gptable in sheets.items(): contents_entry = [] contents_entry.append(self._strip_annotation_references(gptable.title)) @@ -1173,28 +1142,38 @@ def make_table_of_contents( for element in additional_elements: content = getattr(gptable, element) if element == "subtitles": - [contents_entry.append(self._strip_annotation_references(element)) for element in content] + [ + contents_entry.append( + self._strip_annotation_references(element) + ) + for element in content + ] else: - contents_entry.append(self._strip_annotation_references(content)) + contents_entry.append( + self._strip_annotation_references(content) + ) link = {label: f"internal:'{label}'!A1"} contents_dict[label] = [link, contents_entry] - contents_table = pd.DataFrame.from_dict(contents_dict, orient="index").reset_index(drop=True) + contents_table = pd.DataFrame.from_dict( + contents_dict, orient="index" + ).reset_index(drop=True) contents_table.columns = column_names return GPTable( - table=contents_table, - table_name=table_name, - title=title, - subtitles=subtitles, - instructions=instructions + table=contents_table, + table_name=table_name, + title=title, + subtitles=subtitles, + instructions=instructions, + index_columns={2: 0}, ) @staticmethod - def _strip_annotation_references(text): + def _strip_annotation_references(text: object) -> object: """ Strip annotation references (as $$ $$) from a str or list text element. """ @@ -1202,28 +1181,27 @@ def _strip_annotation_references(text): if isinstance(text, str): no_annotations = re.sub(pattern, "", text) elif isinstance(text, FormatList): - no_annotations = FormatList([ - re.sub(pattern, "", part) - if isinstance(part, str) else part - for part in text.list - ]) - elif isinstance(text, list): # TODO: this shouldn't get used - check and delete + no_annotations = FormatList( + [ + re.sub(pattern, "", part) if isinstance(part, str) else part + for part in text.list + ] + ) + elif isinstance(text, list): # TODO: this shouldn't get used - check and delete no_annotations = [ - re.sub(pattern, "", part) - if isinstance(part, str) else part + re.sub(pattern, "", part) if isinstance(part, str) else part for part in text - ] - - return no_annotations + ] + return no_annotations def make_notesheet( self, - notes_table, - table_name = None, - title = None, - instructions = None, - ): + notes_table: pd.DataFrame, + table_name: str = None, + title: str = None, + instructions: str = None, + ) -> "GPTable": """ Generate notes table sheets from notes table and optional customisation parameters. @@ -1239,11 +1217,10 @@ def make_notesheet( description of the page layout defaults to "This worksheet contains one table." - Return + Returns ------ - gpt.GPTable + gptables.GPTable """ - # set defaults if table_name is None: table_name = "notes_table" @@ -1257,22 +1234,24 @@ def make_notesheet( ordered_refs = self._annotations order_df = pd.DataFrame({"order": ordered_refs}) - + notes = notes_table.copy() notes = notes.rename(columns={notes.columns[0]: "order"}) ordered_notes = order_df.merge(notes, on="order", how="left") - + unreferenced_notes = notes[~notes["order"].isin(ordered_notes["order"])] if not unreferenced_notes.empty: - warnings.warn(f"The following notes are not referenced: {list(unreferenced_notes['order'])}") + warnings.warn( + f"The following notes are not referenced: {list(unreferenced_notes['order'])}" + ) ordered_notes = pd.concat([ordered_notes, unreferenced_notes]) # replace note references with note number - ordered_notes = (ordered_notes - .reset_index() + ordered_notes = ( + ordered_notes.reset_index() .rename(columns={"index": "Note number"}) .drop(columns=["order"]) ) @@ -1281,9 +1260,9 @@ def make_notesheet( ordered_notes["Note number"] = ordered_notes["Note number"] + 1 return GPTable( - table=ordered_notes, - table_name=table_name, - title=title, + table=ordered_notes, + table_name=table_name, + title=title, instructions=instructions, - index_columns={} + index_columns={}, ) diff --git a/gptables/examples/how_to_custom_theme.py b/gptables/examples/how_to_custom_theme.py new file mode 100644 index 00000000..7debdd75 --- /dev/null +++ b/gptables/examples/how_to_custom_theme.py @@ -0,0 +1,30 @@ +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +penguins_table = gpt.GPTable( + table=penguins_data, + table_name="penguins_statistics", + title="The Palmer Penguins Dataset", + subtitles=["This is the first subtitle", "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +penguins_sheets = {"Penguins": penguins_table} + +if __name__ == "__main__": + output_path = parent_dir / "gpt_custom_theme.xlsx" + theme_path = str(Path(__file__).parent.parent / "themes/example_theme_basic.yaml") + gpt.write_workbook( + filename=output_path, + sheets=penguins_sheets, + theme=gpt.Theme(theme_path), + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + print("Output written at: ", output_path) diff --git a/gptables/examples/how_to_theme_cover.py b/gptables/examples/how_to_theme_cover.py new file mode 100644 index 00000000..02a9c955 --- /dev/null +++ b/gptables/examples/how_to_theme_cover.py @@ -0,0 +1,54 @@ +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +penguins_table = gpt.GPTable( + table=penguins_data, + table_name="penguins_statistics", + title="The Palmer Penguins Dataset", + subtitles=["This is the first subtitle", "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +penguins_sheets = {"Penguins": penguins_table} + +penguins_cover = gpt.Cover( + cover_label="Cover", + title="Palmer Penguins Dataset", + intro=[ + "This spreadsheet contains a table of data obtained from the palmerpenguins package", + "This is intended to be a simple example of how to use the gptables package to create a spreadsheet with a cover sheet and data sheets.", + ], + about=[ + "Additional information about your publication can go here", + [{"bold": True}, "Publication dates"], + "Date published: 01 January 2025.", + "Next release: 01 January 2026.", + [{"bold": True}, "Methodology notes"], + "Information on methodology can be useful to users of your data", + [{"bold": True}, "Notes, blank cells and units"], + "Some cells in the tables refer to notes which can be found in the notes worksheet. Note markers are presented in square brackets, for example: [note 1].", + "Blank cells indicate no data. An explanation of why there is no data is given in the notes worksheet, see the column headings for which notes you should refer to.", + "Some column headings give units, when this is the case the units are presented in round brackets to differentiate them from note markers.", + ], + contact=[ + "Tel: 01234 567890", + "Email: [example@email.address](mailto: example@email.address)", + ], +) + +if __name__ == "__main__": + output_path = parent_dir / "gpt_theme_cover.xlsx" + theme_path = str(Path(__file__).parent.parent / "themes/example_theme_cover.yaml") + gpt.write_workbook( + filename=output_path, + sheets=penguins_sheets, + theme=gpt.Theme(theme_path), + cover=penguins_cover, + ) diff --git a/gptables/examples/howto_additional_formatting.py b/gptables/examples/howto_additional_formatting.py new file mode 100644 index 00000000..c910e1dd --- /dev/null +++ b/gptables/examples/howto_additional_formatting.py @@ -0,0 +1,53 @@ +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +formatted_subtitles = [ + "The first subtitle", + [{"bold": True}, "This", " is another subtitle"], +] + +sample_additional_formatting = [ + { + "column": { + "columns": ["Species", "Island"], + "format": { + "align": "center", + "italic": True, + }, + } + }, + {"column": {"columns": [3], "format": {"left": 1}}}, + { + "row": { + "rows": -1, + "format": { + "bottom": 1, + "indent": 2, + }, + } + }, +] + +penguins_table = gpt.GPTable( + table=penguins_data, + table_name="penguins_statistics", + title="The Palmer Penguins Dataset", + subtitles=formatted_subtitles, + scope="Penguins", + source="Palmer Station, Antarctica", + additional_formatting=sample_additional_formatting, +) + +penguins_sheets = {"Penguins": penguins_table} + +if __name__ == "__main__": + output_path = parent_dir / "gpt_additional_formatting.xlsx" + wb = gpt.produce_workbook(filename=output_path, sheets=penguins_sheets) + wb.close() + print("Output written at: ", output_path) diff --git a/gptables/examples/penguins_additional_formatting.py b/gptables/examples/penguins_additional_formatting.py deleted file mode 100644 index fce3e67b..00000000 --- a/gptables/examples/penguins_additional_formatting.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Penguins - Additional Formatting Example ------------------------------------- - -This example demonstrates additional formatting that is not supported in -the ``gptable.Theme``. - -Specific columns, rows and cells of the table elements (indexes, column headings and data) -can be formatted using the ``gptable.GPTable(..., additional_formatting = ...)`` parameter. -This parameter takes a list of dictionaries, allowing you to select as many rows, columns -or cells as you like. - -As with all formatting, supported arguments are desribed in the -`XlsxWriter documentation `_. - -Any formatting not possible through this means can be achieved using -``XlsxWriter`` `Workbook `_ -and `Worksheet `_ functionality. -A ``gptable.GPWorkbook`` object is returned when using the -``gptables.produce_workbook`` API function. -The ``GPWorkbook.worksheets()`` function returns a list of ``GPWorksheet`` objects, -which can also be modified. -""" - -import gptables as gpt -import pandas as pd -import numpy as np -from pathlib import Path - -## Read data and arrange -parent_dir = Path(__file__).parents[1] - -penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") - -#Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet - -## Define table elements -penguins_table_name = "penguins_statistics" -penguins_title = "Penguins" - -#Individual words/phrases can have formatting applied without the use of the additional_formatting argument -penguins_subtitles = [ - "The first subtitle", - [{"bold": True}, "Just", " another subtitle"] - ] -penguins_units = {key: "mm" for key in range(2,5)} -penguins_scope = "Penguins" - -## Define additional formatting -# Columns can be referenced by name or number -# Rows may only be referenced by number -# Column and row numbers refer to the table elements, including indexes and column headings -penguins_additional_formatting = [ - { - "column": { - "columns": ["Species", "Island"], # str, int or list of either - "format": {"align": "center","italic":True}, #The "Species" and "Island" columns are centre-aligned and made italic - } - }, - {"column": {"columns": [3], "format": {"left": 1}}}, #Gives the fourth column a left border - { - "row": { - "rows": -1, # Numbers only, but can refer to last row using -1 - "format": {"bottom": 1, "indent":2}, # Give the last row a border at the bottom of each cell and indents two levels - } - }, - ] - -kwargs = { - "table_name": penguins_table_name, - "title": penguins_title, - "subtitles": penguins_subtitles, - "units": penguins_units, - "scope": penguins_scope, - "source": None, - "additional_formatting": penguins_additional_formatting, - } - -## Define our GPTable -penguins_table = gpt.GPTable(table=penguins_data, **kwargs) - -## Use produce workbook to return GPWorkbook -if __name__ == "__main__": - output_path = parent_dir / "python_penguins_additional_formatting_gptable.xlsx" - wb = gpt.produce_workbook( - filename=output_path, sheets={"Penguins": penguins_table} - ) - - # Carry out additional modifications on the GPWorkbook or GPWorksheets - # This supports all `XlsxWriter` package functionality - ws = wb.worksheets()[0] - ws.set_row(0, 30) # Set the height of the first row - - #To format cells using the set_row or set_column functions we must use a workbook to create a format object - italic_format=wb.add_format({"italic":True}) - ws.set_column(2,3,10,italic_format) #Sets the width of the third and fourth column and makes them italic - - #Note that the first two arguments of set_column are the first and last columns (inclusive) you want to format as opposed - #to set_row which only affects a single row at a time (the first argument). - - # Finally use the close method to save the output - - wb.close() - print("Output written at: ", output_path) - \ No newline at end of file diff --git a/gptables/examples/penguins_cover.py b/gptables/examples/penguins_cover.py deleted file mode 100644 index 5799063d..00000000 --- a/gptables/examples/penguins_cover.py +++ /dev/null @@ -1,66 +0,0 @@ -""" -Penguins - Cover Page ------------------ - -This example demonstrates use of the ``gptables.Cover`` class to create a cover page. This example also -demonstrates the usage of the ``index_columns``. - -A gptables cover page contains a range of custom text elements, along with a hyperlinked table of contents. -Text elements are defined as a ``gptables.Cover`` instance, which is passed to the ``cover`` parameter of ``gptables.write_workbook()`` or ``gptables.produce_workbook()``. -""" - -import gptables as gpt -import pandas as pd -import numpy as np -from pathlib import Path - -## Read data -parent_dir = Path(__file__).parents[1] - -penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") - -#Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet - -## Define table elements -penguins_table_name = "penguins_statistics" -penguins_title = "The Penguins Dataset" -penguins_subtitles = [ - "This is the first subtitle", - "Just another subtitle" - ] -penguins_scope = "Penguins" -penguins_source = "Palmer Station, Antarctica" - -kwargs = { - "table_name": penguins_table_name, - "title": penguins_title, - "subtitles": penguins_subtitles, - "scope": penguins_scope, - "source": penguins_source, - "index_columns": {2: 0} # The level 2 index from our Pandas dataframe is put in the first (zeroth with Python indexing) column of the spreadsheet - } - -## Define our GPTable -penguins_table = gpt.GPTable(table=penguins_data, table_name="penguins_statistics", **kwargs) - -penguins_sheets = { - "Penguins": penguins_table -} - -penguins_cover = gpt.Cover( - cover_label="Cover", - title="A Workbook containing two copies of the data", - intro=["This is some introductory information", "And some more"], - about=["Even more info about my data", "And a little more"], - contact=["John Doe", "Tel: 345345345", "Email: [john.doe@snailmail.com](mailto:john.doe@snailmail.com)"], - ) - -## Use write_workbook to win! -if __name__ == "__main__": - output_path = parent_dir / "python_penguins_cover_gptable.xlsx" - gpt.write_workbook( - filename=output_path, - sheets=penguins_sheets, - cover=penguins_cover, - ) - print("Output written at: ", output_path) diff --git a/gptables/examples/penguins_data_end_to_end.py b/gptables/examples/penguins_data_end_to_end.py new file mode 100644 index 00000000..b33cbe66 --- /dev/null +++ b/gptables/examples/penguins_data_end_to_end.py @@ -0,0 +1,96 @@ +""" +Penguins - End-to-End Example +-------------------------------------------- + +This example demonstrates a full workflow: +- Data loading +- Cleaning (handling missing values, renaming columns, recoding values) +- Rounding measurement columns +- Converting between wide and long formats +- Preparing data for gptables +- Applying additional formatting + +""" + +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +# Read data +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +# --- Data Cleaning --- +# Drop rows with missing values in columns other than "Comments" +cols_to_check = [col for col in penguins_data.columns if col != "Comments"] +cleaned = penguins_data.dropna(subset=cols_to_check) + +# Rename columns +cleaned = cleaned.rename( + columns={ + "Culmen Length (mm)": "Bill Length (mm)", + "Culmen Depth (mm)": "Bill Depth (mm)", + } +) + +# Recode Sex column +cleaned["Sex"] = cleaned["Sex"].replace({"MALE": "M", "FEMALE": "F"}) + +# --- Rounding --- +# Round measurement columns to nearest integer +measurement_cols = [ + "Bill Length (mm)", + "Bill Depth (mm)", + "Flipper Length (mm)", + "Body Mass (g)", +] +for col in measurement_cols: + if col in cleaned.columns: + cleaned[col] = cleaned[col].round(0).astype("Int64") + +# --- Wide to Long Conversion --- +# Example: melt measurements into a long format +long_df = pd.melt( + cleaned, + id_vars=["Species", "Island", "Sex"], + value_vars=[ + "Bill Length (mm)", + "Bill Depth (mm)", + "Flipper Length (mm)", + "Body Mass (g)", + ], + var_name="Measurement", + value_name="Value", +) +# --- Prepare for gptables --- +table_name = "penguins_long_format" +title = "Penguins Dataset (Long Format Example)" +subtitles = ["Demonstrates data cleaning, rounding, and wide-to-long conversion."] +scope = "Penguins" +source = "Palmer Station, Antarctica" + +# Additional formatting: highlight 'Value' column +additional_formatting = [ + {"column": {"columns": ["Value"], "format": {"bg_color": "#DDEEFF"}}} +] + +penguins_table = gpt.GPTable( + table=long_df, + table_name=table_name, + title=title, + subtitles=subtitles, + scope=scope, + source=source, + additional_formatting=additional_formatting, +) + +if __name__ == "__main__": + output_path = parent_dir / "python_penguins_end_to_end_gptable.xlsx" + gpt.write_workbook( + filename=output_path, + sheets={"Penguins (Long Format)": penguins_table}, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + print("Output written at: ", output_path) diff --git a/gptables/examples/penguins_minimal.py b/gptables/examples/penguins_minimal.py deleted file mode 100644 index 27441f9c..00000000 --- a/gptables/examples/penguins_minimal.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Penguins - Minimal Example ----------------------- - -This example demonstrates use of the ``gptables.write_workbook`` function. -This API function is designed for production of consistently structured and formatted tables. - -Summary statistics from the penguins dataset are used to build a ``gptables.GPTable`` -object. Elements of metadata are provided to the corresponding parameters of the class. -Where you wish to provide no metadata in required parameters, use ``None``. - -Table formatting can be defined as a ``gptable.Theme``, which is passed to the API functions -using the ``theme`` parameter. Or you can rely on our default - gptheme. -""" - -import gptables as gpt -import pandas as pd -import numpy as np -from pathlib import Path - -## Read data -parent_dir = Path(__file__).parents[1] - -penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") - -#Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet - -## Define table elements -penguins_table_name = "penguins_statistics" -penguins_title = "The Penguins Dataset" -penguins_subtitles = [ - "This is the first subtitle", - "Just another subtitle" - ] -penguins_scope = "Penguins" -penguins_source = "Palmer Station, Antarctica" - -## Define our GPTable -penguins_table = gpt.GPTable(table=penguins_data, table_name=penguins_table_name, title=penguins_title, subtitles=penguins_subtitles, - scope=penguins_scope, source=penguins_source) - -#Every table must be associated to a sheet name for writing -penguins_sheets = {"Penguins": penguins_table} - -## Use write_workbook to win! -if __name__ == "__main__": - output_path = parent_dir / "python_penguins_gptable.xlsx" - gpt.write_workbook( - filename=output_path, - sheets=penguins_sheets, - contentsheet_options={"additional_elements": ["subtitles", "scope"]} - ) - print("Output written at: ", output_path) diff --git a/gptables/examples/penguins_minimal_alternate.py b/gptables/examples/penguins_minimal_alternate.py deleted file mode 100644 index 9803059d..00000000 --- a/gptables/examples/penguins_minimal_alternate.py +++ /dev/null @@ -1,53 +0,0 @@ -""" -Penguins - Minimal Example ----------------------- - -This example demonstrates another way to use the ``gptables.write_workbook`` function. -This code is equivalent to that in the above example. -""" - -import gptables as gpt -import pandas as pd -import numpy as np -from pathlib import Path - -## Read data -parent_dir = Path(__file__).parents[1] - -penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") - -#Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet - -## Define table elements -penguins_table_name = "penguins_statistics" -penguins_title = "The Penguins Dataset" -penguins_subtitles = [ - "This is the first subtitle", - "Just another subtitle" - ] -penguins_scope = "Penguins" -penguins_source = "Palmer Station, Antarctica" - -#Use kwargs to pass these to the appropriate parameters -kwargs = { - "table_name": penguins_table_name, - "title": penguins_title, - "subtitles": penguins_subtitles, - "scope": penguins_scope, - "source": penguins_source, - } - -penguins_table = gpt.GPTable(table=penguins_data, **kwargs) - -#Every table must be associated to a sheet name for writing -penguins_sheets = {"Penguins": penguins_table} - -## Use write_workbook to win! -if __name__ == "__main__": - output_path = parent_dir / "python_penguins_gptable.xlsx" - gpt.write_workbook( - filename=output_path, - sheets=penguins_sheets, - contentsheet_options={"additional_elements": ["subtitles", "scope"]} - ) - print("Output written at: ", output_path) diff --git a/gptables/examples/penguins_multiple_sheets.py b/gptables/examples/penguins_multiple_sheets.py deleted file mode 100644 index d1fcc867..00000000 --- a/gptables/examples/penguins_multiple_sheets.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -Penguins - Multiple Pages ------------------ - -This example demonstrates how to create a workbook with multiple sheets. Note that it -will auto-generate a table of contents. -""" - -import gptables as gpt -import pandas as pd -import numpy as np -from pathlib import Path -from copy import deepcopy - -## Read data -parent_dir = Path(__file__).parents[1] - -penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") - -#Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet - -## Define table elements -penguins_table_name = "penguins_statistics" -penguins_title = "The Penguins Dataset" -penguins_subtitles = [ - "This is the first subtitle", - "Just another subtitle" - ] -penguins_scope = "Penguins" -penguins_source = "Palmer Station, Antarctica" - -kwargs = { - "table_name": penguins_table_name, - "title": penguins_title, - "subtitles": penguins_subtitles, - "scope": penguins_scope, - "source": penguins_source - } - -## Define our GPTable -penguins_table = gpt.GPTable(table=penguins_data, table_name="penguins_statistics", **kwargs) - -penguins_table_copy = deepcopy(penguins_table) -penguins_table_copy.set_title("A copy of the first sheet") -penguins_table_copy.set_table_name("penguins_statistics_copy") #All tables in a single workbook must have a unique name - -penguins_sheets = { - "Penguins": penguins_table, - "Copy of Penguins": penguins_table_copy -} - -## Use write_workbook to win! -if __name__ == "__main__": - output_path = parent_dir / "python_penguins_cover_gptable.xlsx" - gpt.write_workbook( - filename=output_path, - sheets=penguins_sheets - ) - print("Output written at: ", output_path) diff --git a/gptables/examples/penguins_notes.py b/gptables/examples/penguins_notes.py deleted file mode 100644 index 3f3915af..00000000 --- a/gptables/examples/penguins_notes.py +++ /dev/null @@ -1,77 +0,0 @@ -""" -Penguins - Notes Example ----------------------- - -This example demonstrates how to include notes in a GPTable. Notes cannot -be included in data cells but may appear either in column headers or in text such -as titles, subtitles, etc. - -Placeholders for notes are put in using the notation, $$note$$. The actual note text -must be provided as a Pandas dataframe to the notes_table argument of the ``gptables.write_workbook`` function. -This dataframe should contain the text of the placeholder, the actual text you want in the note and (optionally) -any hyperlinks you want in the note. -""" - -import gptables as gpt -import pandas as pd -import numpy as np -from pathlib import Path - -## Read data -parent_dir = Path(__file__).parents[1] - -penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") - -#Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet - -## Define table elements - -penguins_table_name = "penguins_statistics" - -#Notes are added by using $$note$$ in text -penguins_title = "The Penguins Dataset$$noteabouty$$" -penguins_subtitles = [ - "This is the first subtitle$$noteaboutx$$", - "Just another subtitle" - ] - -#Notes can also be included in column headers, see below -penguins_table_notes = {"species": "$$noteaboutx$$", 2: "$$noteaboutz$$"} #Columns can be referenced either by index or by name -penguins_units = {2:"mm", "bill_depth_mm":"mm",4:"mm","body_mass_g":"g"} #As above for column referencing -penguins_scope = "Penguins" -penguins_source = "Palmer Station, Antarctica" - -kwargs = { - "table_name": penguins_table_name, - "title": penguins_title, - "subtitles": penguins_subtitles, - "units": penguins_units, - "table_notes": penguins_table_notes, - "scope": penguins_scope, - "source": penguins_source, - } - -## Define our GPTable -penguins_table = gpt.GPTable(table=penguins_data, **kwargs) - -penguins_sheets = {"Penguins": penguins_table} - -# Notesheet - Note that the ordering of each list only matters with respect to the other lists in the "notes" dictionary. -# GPTables will use the "Note reference" list to ensure the "Note text" is assigned correctly -notes = { - "Note reference": ["noteaboutz", "noteaboutx", "noteabouty"], - "Note text": ["This is a note about z linking to google.", "This is a note about x linking to duckduckgo.", "This is a note about y linking to the ONS website."], - "Useful link": ["[google](https://www.google.com)", "[duckduckgo](https://duckduckgo.com/)", "[ONS](https://www.ons.gov.uk)"], - } -penguins_notes_table = pd.DataFrame.from_dict(notes) - -## Use write_workbook to win! -if __name__ == "__main__": - output_path = parent_dir / "python_penguins_gptable.xlsx" - gpt.write_workbook( - filename=output_path, - sheets=penguins_sheets, - notes_table=penguins_notes_table, - contentsheet_options={"additional_elements": ["subtitles", "scope"]} - ) - print("Output written at: ", output_path) diff --git a/gptables/examples/penguins_theme.py b/gptables/examples/penguins_theme.py deleted file mode 100644 index d87cdf6f..00000000 --- a/gptables/examples/penguins_theme.py +++ /dev/null @@ -1,59 +0,0 @@ -""" -Penguins - Theme Example ----------------------- - -This example demonstrates how to use a custom theme in the production of a workbook. - -Summary statistics from the penguins dataset are used to build a ``gptables.GPTable`` -object. Elements of metadata are provided to the corresponding parameters of the class. -Where you wish to provide no metadata in required parameters, use ``None``. - -The theme parameter must take either a directory or a yaml file in the ``gptables.write_workbook`` function. -The yaml file used in this example can be found in the themes folder as ''penguins_test_theme.yaml''. -""" - -import gptables as gpt -import pandas as pd -import numpy as np -from pathlib import Path - -## Read data -parent_dir = Path(__file__).parents[1] - -penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") - -#Any data processing could go here as long as you end with a Pandas dataframe that you want to write in a spreadsheet - -## Define table elements -penguins_table_name = "penguins_statistics" -penguins_title = "The Penguins Dataset" -penguins_subtitles = [ - "This is the first subtitle", - "Just another subtitle" - ] -penguins_scope = "Penguins" -penguins_source = "Palmer Station, Antarctica" - -kwargs = { - "table_name": penguins_table_name, - "title": penguins_title, - "subtitles": penguins_subtitles, - "scope": penguins_scope, - "source": penguins_source, - } -penguins_table = gpt.GPTable(table=penguins_data, **kwargs) - -penguins_sheets = {"Penguins": penguins_table} - -## Use write_workbook to win! -# Simply pass the filepath of the yaml file containing your theme to the GPTables Theme class and then to write_workbook -if __name__ == "__main__": - output_path = parent_dir / "python_penguins_gptable.xlsx" - theme_path = str(Path(__file__).parent.parent / "themes/penguins_test_theme.yaml") - gpt.write_workbook( - filename=output_path, - sheets=penguins_sheets, - theme = gpt.Theme(theme_path), - contentsheet_options={"additional_elements": ["subtitles", "scope"]} - ) - print("Output written at: ", output_path) diff --git a/gptables/examples/penguins_wide_vs_long.py b/gptables/examples/penguins_wide_vs_long.py new file mode 100644 index 00000000..a3c39f4a --- /dev/null +++ b/gptables/examples/penguins_wide_vs_long.py @@ -0,0 +1,76 @@ +""" +Penguins - Wide vs Long Data Example +------------------------------------ + +This example demonstrates: +- The difference between wide and long data formats +- How to convert between them using pandas +- How to use gptables with both formats + +See https://www.statology.org/long-vs-wide-data/ for an +overview of wide vs long data formats. +Also see https://tidyr.tidyverse.org/articles/tidy-data.html +for more on tidy data principles. +""" + +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +# Read data +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +# --- Wide Format --- +# The original penguins_data is in wide format: each measurement is a separate column + +wide_table = gpt.GPTable( + table=penguins_data, + table_name="penguins_wide", + title="Penguins Dataset (Wide Format)", + subtitles=[ + "Each measurement is a separate column.", + "Wide format is common for spreadsheets and some analyses.", + ], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +# --- Long Format --- +# Convert to long format using melt +# This example combines measurements into a single column +long_df = pd.melt( + penguins_data, + id_vars=["Species", "Island", "Sex"], + value_vars=[ + "Culmen Length (mm)", + "Culmen Depth (mm)", + "Flipper Length (mm)", + "Body Mass (g)", + ], + var_name="Measurement", + value_name="Value", +) + +long_table = gpt.GPTable( + table=long_df, + table_name="penguins_long", + title="Penguins Dataset (Long Format)", + subtitles=[ + "Measurements are stacked in a single column.", + "Long format is preferred for tidy data and many analyses.", + ], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +if __name__ == "__main__": + output_path = parent_dir / "python_penguins_wide_long_gptable.xlsx" + gpt.write_workbook( + filename=output_path, + sheets={"Wide Format": wide_table, "Long Format": long_table}, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + print("Output written at: ", output_path) diff --git a/gptables/examples/survey_data.csv b/gptables/examples/survey_data.csv index bc757ad5..465d7f3f 100644 --- a/gptables/examples/survey_data.csv +++ b/gptables/examples/survey_data.csv @@ -1,19 +1,19 @@ Time period and dataset code row,"Number of people -(thousands)","Economically active +(thousands)","Economically active (thousands) -[note 1]","Employment level -(thousands) -[note 2]","Unemployment level -(thousands) -[note 2]","Economically inactive -(thousands) -[note 3]","Economically active rate +[note 1]","Employment level +(thousands) +[note 2]","Unemployment level +(thousands) +[note 2]","Economically inactive +(thousands) +[note 3]","Economically active rate (%) ","Employment rate (%) -[note 4]","Unemployment rate +[note 4]","Unemployment rate (%) -[note 4]","Economically inactive rate +[note 4]","Economically inactive rate (%) [note 4]" Dataset identifier code,MGSL,MGSF,MGRZ,MGSC,MGSI,MGWG,MGSR,MGSX,YBTC diff --git a/gptables/examples/survey_data.py b/gptables/examples/survey_data.py index f7b07ee9..13d0ee4f 100644 --- a/gptables/examples/survey_data.py +++ b/gptables/examples/survey_data.py @@ -1,53 +1,78 @@ """ Labour market overview, UK: December 2020 - Real Survey Data Example -------------------------------------------------------------------- -This example demonstrates how to replicate the Labour Market overview accessible +This example demonstrates how to replicate the Labour Market overview accessible example found at https://analysisfunction.civilservice.gov.uk/policy-store/further-resources-for-releasing-statistics-in-spreadsheets/ """ -import gptables as gpt -import pandas as pd -import numpy as np from pathlib import Path -## Read data and arrange +import pandas as pd + +import gptables as gpt + +# Read data and arrange parent_dir = Path(__file__).parent labour_market_data = pd.read_csv(parent_dir / "survey_data.csv") -labour_market_data.dropna(axis=0, how="all", inplace=True) # Remove empty rows in the data -labour_market_data.dropna(axis=1, how="all", inplace=True) # Remove columns rows in the data -col_names = ["Time period and dataset code row", - "Number of people", - "Economically active", - "Employment level", - "Unemployment level", - "Economically inactive", - "Economically active rate", - "Employment rate", - "Unemployment rate", - "Economically inactive rate"] +labour_market_data.dropna( + axis=0, how="all", inplace=True +) # Remove empty rows in the data +labour_market_data.dropna( + axis=1, how="all", inplace=True +) # Remove columns rows in the data +col_names = [ + "Time period and dataset code row", + "Number of people", + "Economically active", + "Employment level", + "Unemployment level", + "Economically inactive", + "Economically active rate", + "Employment rate", + "Unemployment rate", + "Economically inactive rate", +] labour_market_data.columns = col_names -## Define table elements +# Define table elements table_name = "Labour_market_overview_accessibility_example_Nov21" title = "Number and percentage of population aged 16 and over in each labour market activity group, UK, seasonally adjusted" subtitles = [ "This worksheet contains one table. Some cells refer to notes which can be found on the notes worksheet." - ] -units = {1:"thousands", 2:"thousands", 3:"thousands", 4:"thousands", - 5:"thousands", 6:"%", 7:"%", 8:"%", 9:"%"} -table_notes = {2:"$$note 1$$", 3:"$$note 2$$", 4:"$$note 2$$",5: "$$note 3$$", - 7:"$$note 4$$", 8:"$$note 4$$", 9:"$$note 4$$"} +] +units = { + 1: "thousands", + 2: "thousands", + 3: "thousands", + 4: "thousands", + 5: "thousands", + 6: "%", + 7: "%", + 8: "%", + 9: "%", +} +table_notes = { + 2: "$$note 1$$", + 3: "$$note 2$$", + 4: "$$note 2$$", + 5: "$$note 3$$", + 7: "$$note 4$$", + 8: "$$note 4$$", + 9: "$$note 4$$", +} scope = "Labour Market" source = "Source: Office for National Statistics" index = {2: 0} # Column 0 is a level 2 index -additional_formatting = [{ +additional_formatting = [ + { "row": { "rows": [1], "format": {"bold": True, "font_size": 14}, } - }] + } +] # or use kwargs to pass these to the appropriate parameters @@ -61,10 +86,10 @@ "source": source, "index_columns": index, "additional_formatting": additional_formatting, - } +} -## Define our GPTable +# Define our GPTable survey_table = gpt.GPTable(table=labour_market_data, **kwargs) sheets = {"sheet 1a": survey_table} @@ -72,48 +97,58 @@ cover = gpt.Cover( cover_label="Cover", title="Labour market overview data tables, UK, December 2020 (accessibility example)", - intro=["This spreadsheet contains a selection of the data tables published alongside the Office for National Statistics' Labour market overview for December 2020. We have edited these data tables and the accompanying cover sheet, table of contents and notes worksheet to meet the legal accessibility regulations. It is intended to be an example of an accessible spreadsheet. The data tables and accompanying information have not been quality assured. Please see the original statistical release if you are looking for accurate data.", - "[Labour market overview, UK: December 2020](https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/bulletins/uklabourmarket/december2020)"], - about=[[{"bold": True, "font_size": 14}, "Publication dates"], - "This data tables in this spreadsheet were originally published at 7:00am 15 December 2020", - "The next publication was published at 7:00am 26 January 2021.", - [{"bold": True, "font_size": 14}, "Note on weighting methodology"], - "Due to the coronavirus (COVID19) pandemic, all face to face interviewing for the Labour Force Survey was suspended and replaced with telephone interviewing. This change in mode for first interviews has changed the non-response bias of the survey, affecting interviews from March 2020 onwards. All data included in this spreadsheet have now been updated and are based on latest weighting methodology.", - "More information about the impact of COVID19 on the Labour Force Survey", - "Dataset identifier codes", - "The four-character identification codes appearing in the tables are the ONS' references for the data series.", - [{"bold": True, "font_size": 14}, "Comparing quarterly changes"], - "When comparing quarterly changes ONS recommends comparing with the previous non-overlapping three-month average time period, for example, compare Apr to Jun with Jan to Mar, not with Mar to May.", - [{"bold": True, "font_size": 14}, "Units, notes and no data"], - "Some cells in the tables refer to notes which can be found in the notes worksheet. Note markers are presented in square brackets, for example: [note 1].", - "Some cells have no data, when this is the case the words 'no data' are presented in square brackets, for example: '[no data]'. An explanation of why there is no data is given in the notes worksheet, see the column headings for which notes you should refer to.", - "Some column headings give units, when this is the case the units are presented in round brackets to differentiate them from note markers.", - [{"bold": True, "font_size": 14}, "Historic publication dates for labour market statistics", " "], - "The monthly labour market statistics release was first published in April 1998. Prior to April 1998 there was no integrated monthly release and the Labour Force Survey estimates were published separately, on different dates, from other labour market statistics. From April 2018 the usual publication day for the release was changed from Wednesday to Tuesday.", - [{"bold": True, "font_size": 14}, "More labour market data"], - "Other labour market datasets are available on the ONS website.", - "Labour market statistics time series dataset on the ONS website." + intro=[ + "This spreadsheet contains a selection of the data tables published alongside the Office for National Statistics' Labour market overview for December 2020. We have edited these data tables and the accompanying cover sheet, table of contents and notes worksheet to meet the legal accessibility regulations. It is intended to be an example of an accessible spreadsheet. The data tables and accompanying information have not been quality assured. Please see the original statistical release if you are looking for accurate data.", + "[Labour market overview, UK: December 2020](https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/employmentandemployeetypes/bulletins/uklabourmarket/december2020)", + ], + about=[ + [{"bold": True, "font_size": 14}, "Publication dates"], + "This data tables in this spreadsheet were originally published at 7:00am 15 December 2020", + "The next publication was published at 7:00am 26 January 2021.", + [{"bold": True, "font_size": 14}, "Note on weighting methodology"], + "Due to the coronavirus (COVID19) pandemic, all face to face interviewing for the Labour Force Survey was suspended and replaced with telephone interviewing. This change in mode for first interviews has changed the non-response bias of the survey, affecting interviews from March 2020 onwards. All data included in this spreadsheet have now been updated and are based on latest weighting methodology.", + "More information about the impact of COVID19 on the Labour Force Survey", + "Dataset identifier codes", + "The four-character identification codes appearing in the tables are the ONS' references for the data series.", + [{"bold": True, "font_size": 14}, "Comparing quarterly changes"], + "When comparing quarterly changes ONS recommends comparing with the previous non-overlapping three-month average time period, for example, compare Apr to Jun with Jan to Mar, not with Mar to May.", + [{"bold": True, "font_size": 14}, "Units, notes and no data"], + "Some cells in the tables refer to notes which can be found in the notes worksheet. Note markers are presented in square brackets, for example: [note 1].", + "Some cells have no data, when this is the case the words 'no data' are presented in square brackets, for example: '[no data]'. An explanation of why there is no data is given in the notes worksheet, see the column headings for which notes you should refer to.", + "Some column headings give units, when this is the case the units are presented in round brackets to differentiate them from note markers.", + [ + {"bold": True, "font_size": 14}, + "Historic publication dates for labour market statistics", + " ", + ], + "The monthly labour market statistics release was first published in April 1998. Prior to April 1998 there was no integrated monthly release and the Labour Force Survey estimates were published separately, on different dates, from other labour market statistics. From April 2018 the usual publication day for the release was changed from Wednesday to Tuesday.", + [{"bold": True, "font_size": 14}, "More labour market data"], + "Other labour market datasets are available on the ONS website.", + "Labour market statistics time series dataset on the ONS website.", + ], + contact=[ + "Tel: 01633455400", + "Email: [labour.market@ons.gov.uk](mailto:labour.market@ons.gov.uk)", ], - contact=["Tel: 01633455400", "Email: [labour.market@ons.gov.uk](mailto:labour.market@ons.gov.uk)"], ) -## Notesheet +# Notesheet notes_table = pd.read_csv(parent_dir / "survey_data_notes.csv") -notes_table.dropna(axis=0, how="all", inplace=True) # Remove empty rows in the data -notes_table.dropna(axis=1, how="all", inplace=True) # Remove columns rows in the data -notes_table.columns = ['Note reference', 'Note text'] +notes_table.dropna(axis=0, how="all", inplace=True) # Remove empty rows in the data +notes_table.dropna(axis=1, how="all", inplace=True) # Remove columns rows in the data +notes_table.columns = ["Note reference", "Note text"] -## Use write_workbook to win! +# Use write_workbook to win! if __name__ == "__main__": output_path = parent_dir / "python_survey_data_gptable.xlsx" gpt.write_workbook( - filename=output_path, + filename=output_path, sheets=sheets, cover=cover, notes_table=notes_table, contentsheet_options={"additional_elements": ["subtitles", "scope"]}, auto_width=True, gridlines="show_all", - cover_gridlines=True - ) - print("Output written at: ", output_path) \ No newline at end of file + cover_gridlines=True, + ) + print("Output written at: ", output_path) diff --git a/gptables/examples/survey_data_notes.csv b/gptables/examples/survey_data_notes.csv index f7020fe9..a954260f 100644 --- a/gptables/examples/survey_data_notes.csv +++ b/gptables/examples/survey_data_notes.csv @@ -1,21 +1,21 @@ -Note number ,Note text -note 1,The economically active population is defined as those in employment plus those who are unemployed. +Note number ,Note text +note 1,The economically active population is defined as those in employment plus those who are unemployed. note 2,"We publish two levels of employment and unemployment, one for those aged 16 and over and one for those aged 16 to 64. The headline levels are for those aged 16 and over. They measure all people in work or actively seeking and able to work. " note 3,"We publish two levels of economic inactivity, one for those aged 16 and over and one for those aged 16 to 64. The headline level is for those aged 16 to 64. The inactivity level for those aged 16 and over is less meaningful as it includes elderly people who have retired from the labour force. " -note 4,"The headline employment rate is the number of people aged 16 to 64 in employment divided by the population aged 16 to 64. +note 4,"The headline employment rate is the number of people aged 16 to 64 in employment divided by the population aged 16 to 64. -The headline unemployment rate is the number of unemployed people (aged 16 and over) divided by the economically active population (aged 16 and over). +The headline unemployment rate is the number of unemployed people (aged 16 and over) divided by the economically active population (aged 16 and over). -The headline inactivity rate is the number of economically inactive people aged 16 to 64 divided by the population aged 16 to 64. +The headline inactivity rate is the number of economically inactive people aged 16 to 64 divided by the population aged 16 to 64. -The employment and inactivity rates for those aged 16 and over are affected by the inclusion of the retired population in the denominators and are therefore less meaningful than the rates for those aged from 16 to 64. +The employment and inactivity rates for those aged 16 and over are affected by the inclusion of the retired population in the denominators and are therefore less meaningful than the rates for those aged from 16 to 64. However, for the unemployment rate for those aged 16 and over, no such effect occurs as the denominator for the unemployment rate is the economically active population which only includes people in work or actively seeking and able to work. " -note 5,The total series does not equal the sum of the 'UK' and 'Non UK' series as it includes people who do not state their country of birth or nationality. +note 5,The total series does not equal the sum of the 'UK' and 'Non UK' series as it includes people who do not state their country of birth or nationality. note 6,A more detailed country breakdown for employment is available at Dataset EMP06. -note 7,EU27 refers to the 27 member states of the European Union. +note 7,EU27 refers to the 27 member states of the European Union. note 8,"This series consists of those people whose main reason for being economically inactive was because they were students. It does not include all students as some students will be included in the employment and unemployment estimates. " note 9,"As the estimates are not seasonally adjusted, it is best practice to only compare the same quarter for different years e.g. compare Jan to Mar 2018 with Jan to Mar 2019 but do not compare Jul to Sep 2018 with Jan to Mar 2019." -note 10,Data for workers is only available from January 1986. +note 10,Data for workers is only available from January 1986. note 11,Due to rounding the working days lost for the public and private sector may not add up to the working days lost. diff --git a/gptables/examples/tutorial_adding_a_cover.py b/gptables/examples/tutorial_adding_a_cover.py new file mode 100644 index 00000000..f7dd9af6 --- /dev/null +++ b/gptables/examples/tutorial_adding_a_cover.py @@ -0,0 +1,54 @@ +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +penguins_table = gpt.GPTable( + table=penguins_data, + table_name="penguins_statistics", + title="The Palmer Penguins Dataset", + subtitles=["This is the first subtitle", "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +penguins_sheets = {"Penguins": penguins_table} + +penguins_cover = gpt.Cover( + cover_label="Cover", + title="Palmer Penguins Dataset", + intro=[ + "This spreadsheet contains a table of data obtained from the palmerpenguins package", + "This is intended to be a simple example of how to use the gptables package to create a spreadsheet with a cover sheet and data sheets.", + ], + about=[ + "Additional information about your publication can go here", + [{"bold": True}, "Publication dates"], + "Date published: 01 January 2025.", + "Next release: 01 January 2026.", + [{"bold": True}, "Methodology notes"], + "Information on methodology can be useful to users of your data", + [{"bold": True}, "Notes, blank cells and units"], + "Some cells in the tables refer to notes which can be found in the notes worksheet. Note markers are presented in square brackets, for example: [note 1].", + "Blank cells indicate no data. An explanation of why there is no data is given in the notes worksheet, see the column headings for which notes you should refer to.", + "Some column headings give units, when this is the case the units are presented in round brackets to differentiate them from note markers.", + ], + contact=[ + "Tel: 01234 567890", + "Email: [example@email.address](mailto: example@email.address)", + ], +) + +if __name__ == "__main__": + output_path = parent_dir / "gpt_adding_cover.xlsx" + gpt.write_workbook( + filename=output_path, + sheets=penguins_sheets, + cover=penguins_cover, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + print("Output written at: ", output_path) diff --git a/gptables/examples/tutorial_adding_datasheets.py b/gptables/examples/tutorial_adding_datasheets.py new file mode 100644 index 00000000..7086cc38 --- /dev/null +++ b/gptables/examples/tutorial_adding_datasheets.py @@ -0,0 +1,47 @@ +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +penguins_data_1 = penguins_data.iloc[:, :10] +penguins_data_2 = pd.concat( + [penguins_data.iloc[:, :3], penguins_data.iloc[:, 10:]], axis=1 +) + + +penguins_table_1 = gpt.GPTable( + table=penguins_data_1, + table_name="penguins_statistics_1", + title="The Palmer Penguins Dataset (Sheet 1)", + subtitles=["This is the first subtitle", "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +penguins_table_2 = gpt.GPTable( + table=penguins_data_2, + table_name="penguins_statistics_2", + title="The Palmer Penguins Dataset (Sheet 2)", + subtitles=[ + "This is the first subtitle for sheet 2", + "Another subtitle for sheet 2", + ], + scope="Penguins", + source="Palmer Station, Antarctica", +) + + +penguins_sheets = {"Penguins 1": penguins_table_1, "Penguins 2": penguins_table_2} + +if __name__ == "__main__": + output_path = parent_dir / "gpt_adding_datasheets.xlsx" + gpt.write_workbook( + filename=output_path, + sheets=penguins_sheets, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + print("Output written at: ", output_path) diff --git a/gptables/examples/tutorial_adding_notes.py b/gptables/examples/tutorial_adding_notes.py new file mode 100644 index 00000000..fc1fb222 --- /dev/null +++ b/gptables/examples/tutorial_adding_notes.py @@ -0,0 +1,54 @@ +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +penguins_table = gpt.GPTable( + table=penguins_data, + table_name="penguins_statistics", + title="The Palmer Penguins Dataset$$note_about_x$$", + subtitles=[ + "This is the first subtitle$$note_about_y$$", + "This is another subtitle", + ], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +penguins_sheets = {"Penguins": penguins_table} + +notes = { + "Note reference": [ + "note_about_x", + "note_about_y", + "note_about_z", + "note_with_no_link", + ], + "Note text": [ + "This is a note about x linking to google.", + "This is a note about y linking to duckduckgo.", + "This is a note about z linking to the ONS website.", + "This is a note with no link.", + ], + "Useful link": [ + "[google](https://www.google.com)", + "[duckduckgo](https://duckduckgo.com/)", + "[ONS](https://www.ons.gov.uk)", + None, + ], +} +penguins_notes_table = pd.DataFrame.from_dict(notes) + +if __name__ == "__main__": + output_path = parent_dir / "gpt_adding_notes.xlsx" + gpt.write_workbook( + filename=output_path, + sheets=penguins_sheets, + notes_table=penguins_notes_table, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + print("Output written at: ", output_path) diff --git a/gptables/examples/tutorial_starting_out.py b/gptables/examples/tutorial_starting_out.py new file mode 100644 index 00000000..08aea132 --- /dev/null +++ b/gptables/examples/tutorial_starting_out.py @@ -0,0 +1,28 @@ +from pathlib import Path + +import pandas as pd + +import gptables as gpt + +parent_dir = Path(__file__).parents[1] +penguins_data = pd.read_csv(parent_dir / "test/data/penguins.csv") + +penguins_table = gpt.GPTable( + table=penguins_data, + table_name="penguins_statistics", + title="The Palmer Penguins Dataset", + subtitles=["This is the first subtitle", "This is another subtitle"], + scope="Penguins", + source="Palmer Station, Antarctica", +) + +penguins_sheets = {"Penguins": penguins_table} + +if __name__ == "__main__": + output_path = parent_dir / "gpt_starting_out.xlsx" + gpt.write_workbook( + filename=output_path, + sheets=penguins_sheets, + contentsheet_options={"additional_elements": ["subtitles", "scope"]}, + ) + print("Output written at: ", output_path) diff --git a/gptables/test/conftest.py b/gptables/test/conftest.py new file mode 100644 index 00000000..f1ce0ca4 --- /dev/null +++ b/gptables/test/conftest.py @@ -0,0 +1,21 @@ +import pandas as pd +import pytest + +from gptables.core.gptable import GPTable + + +@pytest.fixture(scope="function") +def create_gptable_with_kwargs(): + + def generate_gptable(format_dict=None): + base_gptable = { + "table": pd.DataFrame(), + "table_name": "table_name", + "title": "", + "index_columns": {}, # Override default, as no columns in table + } + if format_dict is not None: + base_gptable.update(format_dict) + return GPTable(**base_gptable) + + return generate_gptable diff --git a/gptables/test/test_additional_formatting_syntax.py b/gptables/test/test_additional_formatting_syntax.py index 06625175..3082cd9e 100644 --- a/gptables/test/test_additional_formatting_syntax.py +++ b/gptables/test/test_additional_formatting_syntax.py @@ -1,25 +1,38 @@ +from pathlib import Path +import pandas as pd import gptables as gpt -import pandas as pd -import numpy as np -from pathlib import Path -## Read data and arrange +# Read data and arrange parent_dir = Path(__file__).parent penguin_data = pd.read_csv(parent_dir / "data/penguins.csv") -## Define table elements +# Define table elements table_name = "penguin_statistics" title = "Penguins" subtitles = [ - [{"font_name": "Chiller"}, "The", {"font_size": 30}, " first", {"font_color": "red"}, " subtitle"], - [{"bold": True}, "The", {"italic": True}, " second", {"underline": True}, " subtitle"], + [ + {"font_name": "Chiller"}, + "The", + {"font_size": 30}, + " first", + {"font_color": "red"}, + " subtitle", + ], + [ + {"bold": True}, + "The", + {"italic": True}, + " second", + {"underline": True}, + " subtitle", + ], [{"font_script": 1}, "Ignore", {"font_script": 2}, " this"], - [{"font_strikeout": True}, "bye", " "] - ] # checking font formatting -units = {key: "mm" for key in range(2,5)} + [{"font_strikeout": True}, "bye", " "], +] # checking font formatting +units = {key: "mm" for key in range(2, 5)} scope = "Penguins" index = {1: 0, 2: 1} @@ -28,38 +41,60 @@ additional_formatting = [ { "column": { - "columns": ["Species", "Island"], - "format": {"align": "vcenter"}, #checking vertical alignment - "include_names": False, + "columns": ["Species", "Island"], + "format": {"align": "vcenter"}, # checking vertical alignment + "include_names": False, } }, - {"column": {"columns": [0], "format": {"indent": 2, "rotation": 90}, "include_names": True}}, #checking alignment formatting + { + "column": { + "columns": [0], + "format": {"indent": 2, "rotation": 90}, + "include_names": True, + } + }, # checking alignment formatting { "row": { - "rows": -1, - "format": {"bottom": 1, "top": 2, "bottom_color": "blue", "top_color": "yellow", - "pattern": 6, "bg_color": "lime", "fg_color": "pink"}, # checking pattern formatting + "rows": -1, + "format": { + "bottom": 1, + "top": 2, + "bottom_color": "blue", + "top_color": "yellow", + "pattern": 6, + "bg_color": "lime", + "fg_color": "pink", + }, # checking pattern formatting } }, { "row": { "rows": 3, - "format": {"num_format": 3, "locked": True, "left": 3, "left_color": "navy"}, #checking number & protection formatting + "format": { + "num_format": 3, + "locked": True, + "left": 3, + "left_color": "navy", + }, # checking number & protection formatting } }, { "row": { "rows": 5, - "format": {"num_format": "0.00", "right": 2, "right_color": "silver"}, #checking number formatting + "format": { + "num_format": "0.00", + "right": 2, + "right_color": "silver", + }, # checking number formatting } }, { "cell": { - "cells": (1,1), + "cells": (1, 1), "format": {"text_wrap": True}, } - } - ] + }, +] # or just use kwargs kwargs = { @@ -71,17 +106,15 @@ "source": None, "index_columns": index, "additional_formatting": additional_formatting, - } +} -## Define our GPTable +# Define our GPTable penguin_table = gpt.GPTable(table=penguin_data, **kwargs) -## Use produce workbook to return GPWorkbook +# Use produce workbook to return GPWorkbook if __name__ == "__main__": output_path = parent_dir / "test_additional_formatting_gptable.xlsx" - wb = gpt.produce_workbook( - filename=output_path, sheets={"Penguins": penguin_table} - ) + wb = gpt.produce_workbook(filename=output_path, sheets={"Penguins": penguin_table}) # Carry out additional modifications on the GPWorkbook or GPWorksheets # This supports all `XlsxWriter` package functionality @@ -90,4 +123,4 @@ # Finally use the close method to save the output wb.close() - print("Output written at: ", output_path) \ No newline at end of file + print("Output written at: ", output_path) diff --git a/gptables/test/test_api.py b/gptables/test/test_api.py index 3d8683e6..3e874b23 100644 --- a/gptables/test/test_api.py +++ b/gptables/test/test_api.py @@ -1,8 +1,9 @@ -import pytest -import pandas as pd -import gptables as gpt from pathlib import Path +import pandas as pd +import pytest + +import gptables as gpt from gptables.test.test_utils.excel_comparison_test import ExcelComparisonTest @@ -26,31 +27,32 @@ def generate_gpworkbook(output_path): source="My imagination", units={0: "Latin alphabet", "columnB": "real numbers"}, table_notes={1: "$$ref2$$"}, - index_columns={2:0}, - additional_formatting=[{ - "column":{ - "columns": ["columnA"], - "format": {"bold": True} - } - }] + additional_formatting=[ + {"column": {"columns": ["columnA"], "format": {"bold": True}}} + ], ) - notes_table = pd.DataFrame({ - "Note reference": ["ref1", "ref2"], - "Note text": ["Some text", "Some more text"], - "Link": ["[gov.uk](https://www.gov.uk)", "[Wikipedia](https://en.wikipedia.org)"] - }) + notes_table = pd.DataFrame( + { + "Note reference": ["ref1", "ref2"], + "Note text": ["Some text", "Some more text"], + "Link": [ + "[gov.uk](https://www.gov.uk)", + "[Wikipedia](https://en.wikipedia.org)", + ], + } + ) cover = gpt.Cover( title="Cover title", intro=["Introduction"], about=["About"], contact=["Me", "[please.dont@contact.me](mailto:please.dont@contact.me)"], - cover_label="Cover" + cover_label="Cover", ) - gpt.write_workbook( # Use defaults for theme and autowidth - filename=output_path/"actual_workbook.xlsx", + gpt.write_workbook( # Use defaults for theme and autowidth + filename=output_path / "test_end_to_end.obtained.xlsx", sheets={"Label": gptable}, cover=cover, contentsheet_label="Table of contents", @@ -59,27 +61,38 @@ def generate_gpworkbook(output_path): notesheet_label="Notes table", notesheet_options={"title": "Table with notes"}, gridlines="show_all", - cover_gridlines=False + cover_gridlines=False, ) return generate_gpworkbook -def test_end_to_end(create_gpworkbook): +def test_end_to_end(create_gpworkbook, file_regression): """ - Test that runs the API functions with example input to check for errors and - expected output. + Test that runs the API functions with example input and checks the generated Excel + file using a manually managed expected_workbook.xlsx and ExcelComparisonTest. """ - output_path = Path(__file__).parent - + output_path = Path(__file__).parent / "test_api" + output_path.mkdir(parents=True, exist_ok=True) create_gpworkbook(output_path) + actual_file = output_path / "test_end_to_end.obtained.xlsx" - ect = ExcelComparisonTest() + # Use file_regression to manage the regression file, but ignore its assertion + try: + file_regression.check(actual_file.read_bytes(), extension=".xlsx", binary=True) + except AssertionError: + pass - ect.exp_filename = output_path / "expected_workbook.xlsx" - ect.got_filename = output_path / "actual_workbook.xlsx" + regression_file = output_path / "test_end_to_end.xlsx" + if not regression_file.exists(): + raise FileNotFoundError( + f"Could not find regression file for comparison: {regression_file}" + ) + + ect = ExcelComparisonTest() + ect.got_filename = str(actual_file) + ect.exp_filename = str(regression_file) ect.ignore_files = [] ect.ignore_elements = {} - ect.assertExcelEqual() ect.tearDown() diff --git a/gptables/test/expected_workbook.xlsx b/gptables/test/test_api/test_end_to_end.xlsx similarity index 57% rename from gptables/test/expected_workbook.xlsx rename to gptables/test/test_api/test_end_to_end.xlsx index 9ea668a5..d3152d7f 100644 Binary files a/gptables/test/expected_workbook.xlsx and b/gptables/test/test_api/test_end_to_end.xlsx differ diff --git a/gptables/test/test_cover.py b/gptables/test/test_cover.py index 5cca9c0d..bc4a8f0b 100644 --- a/gptables/test/test_cover.py +++ b/gptables/test/test_cover.py @@ -1,27 +1,35 @@ import pytest from gptables.core.cover import Cover -from gptables.core.gptable import FormatList + class TestCover: - - @pytest.mark.parametrize("input_data", [ - None, - ["text"], - [[{"bold":True}, "richtext"]], - [[{"bold":True}, "richtext", " "]], - [[{"bold":True}, "richtext", " "], "text"], - "text", - 42, - [15] - ]) - + + @pytest.mark.parametrize( + "input_data", + [ + None, + ["text"], + [[{"bold": True}, "richtext"]], + [[{"bold": True}, "richtext", " "]], + [[{"bold": True}, "richtext", " "], "text"], + "text", + 42, + [15], + ], + ) def test_parse_formatting(self, input_data): - + got = Cover._parse_formatting(input_data) - + if isinstance(input_data, list): - assert all([got_element.list == input_element for input_element, got_element in zip(input_data, got) if isinstance(input_element, list)]) - + assert all( + [ + got_element.list == input_element + for input_element, got_element in zip(input_data, got) + if isinstance(input_element, list) + ] + ) + else: assert got == input_data diff --git a/gptables/test/test_gptable.py b/gptables/test/test_gptable.py index f75183ce..6fdfe7fc 100644 --- a/gptables/test/test_gptable.py +++ b/gptables/test/test_gptable.py @@ -1,12 +1,8 @@ -import numpy as np +from contextlib import contextmanager + import pandas as pd import pytest from pandas.testing import assert_frame_equal -from contextlib import contextmanager - - -from gptables import GPTable - # TODO: These should be stored in GPTable gptable_compulsory_text_attrs = ["title", "instructions"] @@ -20,8 +16,8 @@ {}, {1: "one"}, {1: "one", 2: "two"}, - {1: "one", 2: "two", 3: "three"} - ] + {1: "one", 2: "two", 3: "three"}, +] valid_text_elements_excl_none = [ "This is a string", @@ -31,39 +27,17 @@ valid_text_elements_incl_none = valid_text_elements_excl_none.copy() valid_text_elements_incl_none.append(None) -invalid_text_elements_excl_none = [ - dict(), - set(), - 42, - 3.14, - True -] +invalid_text_elements_excl_none = [dict(), set(), 42, 3.14, True] invalid_text_elements_incl_none = invalid_text_elements_excl_none.copy() invalid_text_elements_incl_none.append(None) + @contextmanager def does_not_raise(): yield -@pytest.fixture(scope="function") -def create_gptable_with_kwargs(): - - def generate_gptable(format_dict=None): - base_gptable = { - "table": pd.DataFrame(), - "table_name": "table_name", - "title": "", - "index_columns": {} # Override default, as no columns in table - } - if format_dict is not None: - base_gptable.update(format_dict) - return GPTable(**base_gptable) - - return generate_gptable - - def test_init_defaults(create_gptable_with_kwargs): """ Test that given a minimal input, default attributes are correct types. @@ -74,19 +48,20 @@ def test_init_defaults(create_gptable_with_kwargs): assert empty_gptable.title == "" assert empty_gptable.table_name == "table_name" - assert_frame_equal( - empty_gptable.table, pd.DataFrame().reset_index(drop=True) - ) + assert_frame_equal(empty_gptable.table, pd.DataFrame().reset_index(drop=True)) # Optional args - assert empty_gptable.scope == None - assert empty_gptable.source == None - assert empty_gptable.units == None + assert empty_gptable.scope is None + assert empty_gptable.source is None + assert empty_gptable.units is None assert empty_gptable.index_columns == {} assert empty_gptable.subtitles == [] assert empty_gptable.legend == [] assert empty_gptable.additional_formatting == [] - assert empty_gptable.instructions == "This worksheet contains one table. Some cells may refer to notes, which can be found on the notes worksheet." + assert ( + empty_gptable.instructions + == "This worksheet contains one table. Some cells may refer to notes, which can be found on the notes worksheet." + ) # Other assert empty_gptable.index_levels == 0 @@ -104,55 +79,59 @@ def test_invalid_index_level(self, level, create_gptable_with_kwargs): invalid. """ with pytest.raises(ValueError): - create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["one", "two", "three"]), - # Valid column index, but invalid level - "index_columns": {level: 0} - }) - - - @pytest.mark.parametrize("idx,expectation", [ - (0, does_not_raise()), - (1, does_not_raise()), - (3, pytest.raises(ValueError)), - (-1, pytest.raises(ValueError)) - ]) + create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["one", "two", "three"]), + # Valid column index, but invalid level + "index_columns": {level: 0}, + } + ) + + @pytest.mark.parametrize( + "idx,expectation", + [ + (0, does_not_raise()), + (1, does_not_raise()), + (3, pytest.raises(ValueError)), + (-1, pytest.raises(ValueError)), + ], + ) def test_invalid_column_index(self, idx, expectation, create_gptable_with_kwargs): """ Test that GPTable index_columns raises error when a column index number is invalid. """ with expectation: - create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["one", "two", "three"]), - # Valid index level, but non existent column - "index_columns": {1: idx} - }) - - - @pytest.mark.parametrize("index_cols,col_headings", zip( - valid_index_columns, [ - {0, 1, 2, 3}, - {1, 2, 3}, - {2, 3}, - {3} - ] - )) - def test_set_column_index(self, index_cols, col_headings, create_gptable_with_kwargs): + create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["one", "two", "three"]), + # Valid index level, but non existent column + "index_columns": {1: idx}, + } + ) + + @pytest.mark.parametrize( + "index_cols,col_headings", + zip(valid_index_columns, [{0, 1, 2, 3}, {1, 2, 3}, {2, 3}, {3}]), + ) + def test_set_column_index( + self, index_cols, col_headings, create_gptable_with_kwargs + ): """ Test that setting GPTable index_columns with valid column index works as expected. """ - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["one", "two", "three", "four"]), - "index_columns": index_cols - }) + gptable = create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["one", "two", "three", "four"]), + "index_columns": index_cols, + } + ) assert gptable.index_columns == index_cols assert gptable.index_levels == len(index_cols) assert gptable._column_headings == col_headings - - @pytest.mark.parametrize("not_a_table", [1, "?", [1,3,5,6]]) + @pytest.mark.parametrize("not_a_table", [1, "?", [1, 3, 5, 6]]) def test_invalid_table_type(self, not_a_table, create_gptable_with_kwargs): """ Test that setting GPTable table to object that is not a @@ -161,7 +140,6 @@ def test_invalid_table_type(self, not_a_table, create_gptable_with_kwargs): with pytest.raises(TypeError): create_gptable_with_kwargs({"table": not_a_table}) - def test_set_table_name(self, create_gptable_with_kwargs): """ Test that setting GPTable table name with a valid string works as expected @@ -169,7 +147,6 @@ def test_set_table_name(self, create_gptable_with_kwargs): gptable = create_gptable_with_kwargs({"table_name": "table_name"}) assert gptable.table_name == "table_name" - @pytest.mark.parametrize("invalid_name", invalid_text_elements_incl_none) def test_invalid_type_table_name(self, invalid_name, create_gptable_with_kwargs): """ @@ -179,7 +156,6 @@ def test_invalid_type_table_name(self, invalid_name, create_gptable_with_kwargs) with pytest.raises(TypeError): create_gptable_with_kwargs({"table_name": invalid_name}) - def test_table_name_not_list(self, create_gptable_with_kwargs): """ Test that setting GPTable table name to a list, eg with rich text, @@ -188,9 +164,10 @@ def test_table_name_not_list(self, create_gptable_with_kwargs): with pytest.raises(TypeError): create_gptable_with_kwargs({"table_name": []}) - @pytest.mark.parametrize("invalid_name", [" ", " "]) - def test_invalid_characters_table_name(self, invalid_name, create_gptable_with_kwargs): + def test_invalid_characters_table_name( + self, invalid_name, create_gptable_with_kwargs + ): """ Test that setting GPTable table name to string with whitespace raises an error. @@ -198,10 +175,11 @@ def test_invalid_characters_table_name(self, invalid_name, create_gptable_with_k with pytest.raises(ValueError): create_gptable_with_kwargs({"table_name": invalid_name}) - @pytest.mark.parametrize("attr", gptable_compulsory_text_attrs) @pytest.mark.parametrize("not_text", invalid_text_elements_incl_none) - def test_invalid_text_in_compulsory_str_attrs(self, attr, not_text, create_gptable_with_kwargs): + def test_invalid_text_in_compulsory_str_attrs( + self, attr, not_text, create_gptable_with_kwargs + ): """ Test that setting an invalid GPTable text types raises a TypeError for each attribute that holds a string. @@ -211,18 +189,21 @@ def test_invalid_text_in_compulsory_str_attrs(self, attr, not_text, create_gptab @pytest.mark.parametrize("attr", gptable_optional_text_attrs) @pytest.mark.parametrize("not_text", invalid_text_elements_excl_none) - def test_invalid_text_in_optional_str_attrs(self, attr, not_text, create_gptable_with_kwargs): + def test_invalid_text_in_optional_str_attrs( + self, attr, not_text, create_gptable_with_kwargs + ): """ Test that setting an invalid GPTable text types raises a TypeError for each attribute that holds a string. """ with pytest.raises(TypeError): create_gptable_with_kwargs({attr: not_text}) - @pytest.mark.parametrize("attr", gptable_compulsory_text_attrs) @pytest.mark.parametrize("text", valid_text_elements_excl_none) - def test_valid_text_in_compulsory_str_attrs(self, attr, text, create_gptable_with_kwargs): + def test_valid_text_in_compulsory_str_attrs( + self, attr, text, create_gptable_with_kwargs + ): """ Test that setting valid GPTable text elements works as expected. Test strings and list containing strings and format dicts (rich text). @@ -237,7 +218,9 @@ def test_valid_text_in_compulsory_str_attrs(self, attr, text, create_gptable_wit @pytest.mark.parametrize("attr", gptable_optional_text_attrs) @pytest.mark.parametrize("text", valid_text_elements_incl_none) - def test_valid_text_in_compulsory_str_attrs(self, attr, text, create_gptable_with_kwargs): + def test_valid_text_in_optional_str_attrs( + self, attr, text, create_gptable_with_kwargs + ): """ Test that setting valid GPTable text elements works as expected. Test strings and list containing strings and format dicts (rich text). @@ -250,7 +233,6 @@ def test_valid_text_in_compulsory_str_attrs(self, attr, text, create_gptable_wit else: assert getattr(gptable, attr) == text - @pytest.mark.parametrize("attr", gptable_list_text_attrs) @pytest.mark.parametrize("text", invalid_text_elements_incl_none) def test_invalid_text_in_list_attrs(self, attr, text, create_gptable_with_kwargs): @@ -262,7 +244,6 @@ def test_invalid_text_in_list_attrs(self, attr, text, create_gptable_with_kwargs with pytest.raises(TypeError): create_gptable_with_kwargs({attr: text}) - @pytest.mark.parametrize("attr", gptable_list_text_attrs) @pytest.mark.parametrize("text", valid_text_elements_excl_none) def test_valid_text_in_list_attrs(self, attr, text, create_gptable_with_kwargs): @@ -278,18 +259,18 @@ def test_valid_text_in_list_attrs(self, attr, text, create_gptable_with_kwargs): else: assert getattr(gptable, attr) == text_list - - @pytest.mark.parametrize("key", invalid_text_elements_incl_none[2:] + ["invalid_key"]) + @pytest.mark.parametrize( + "key", invalid_text_elements_incl_none[2:] + ["invalid_key"] + ) def test_invalid_additional_format_keys(self, key, create_gptable_with_kwargs): """ Test that adding additional formatting with an invalid key raises an error. """ with pytest.raises(ValueError): - create_gptable_with_kwargs({ - "additional_formatting": [{key: {"format": {"bold": True}}}] - }) - + create_gptable_with_kwargs( + {"additional_formatting": [{key: {"format": {"bold": True}}}]} + ) @pytest.mark.parametrize("key", ["cell", "row", "column"]) def test_valid_additional_format_keys(self, key, create_gptable_with_kwargs): @@ -297,58 +278,55 @@ def test_valid_additional_format_keys(self, key, create_gptable_with_kwargs): Test that adding additional formatting with a valid key (column, row or cell) works as expected. """ - gptable = create_gptable_with_kwargs({ - "additional_formatting": [ - {key: {"format": {"bold": True}}}] - }) - assert getattr(gptable, "additional_formatting") == [{key: {"format": {"bold": True}}}] - + gptable = create_gptable_with_kwargs( + {"additional_formatting": [{key: {"format": {"bold": True}}}]} + ) + assert getattr(gptable, "additional_formatting") == [ + {key: {"format": {"bold": True}}} + ] - @pytest.mark.parametrize("format_label", invalid_text_elements_incl_none[2:] + ["not_a_format"]) - def test_invalid_additional_format_labels(self, format_label, create_gptable_with_kwargs): + @pytest.mark.parametrize( + "format_label", invalid_text_elements_incl_none[2:] + ["not_a_format"] + ) + def test_invalid_additional_format_labels( + self, format_label, create_gptable_with_kwargs + ): """ Test that adding additional formatting with a format parameter that is not supported by XlsxWriter raises an error. """ with pytest.raises(ValueError): - create_gptable_with_kwargs({ - "additional_formatting": - [ - {"cell": - {"format": {format_label: True}, - "cells": (0, 0) - } - } + create_gptable_with_kwargs( + { + "additional_formatting": [ + {"cell": {"format": {format_label: True}, "cells": (0, 0)}} ] - }) - + } + ) @pytest.mark.parametrize( - "format_dict",[ + "format_dict", + [ {"bold": True}, {"font_size": 17}, {"align": "center"}, {"font_color": "red"}, - {"bottom": 1} - ]) - def test_valid_additional_format_labels(self, format_dict, create_gptable_with_kwargs): + {"bottom": 1}, + ], + ) + def test_valid_additional_format_labels( + self, format_dict, create_gptable_with_kwargs + ): """ Test that adding additional formatting with a format parameter that is supported by XlsxWriter works as expected. """ - additional_formatting = [ - {"cell": - {"format": format_dict, - "cells": (0, 0) - } - } - ] - gptable = create_gptable_with_kwargs({ - "additional_formatting": additional_formatting - }) + additional_formatting = [{"cell": {"format": format_dict, "cells": (0, 0)}}] + gptable = create_gptable_with_kwargs( + {"additional_formatting": additional_formatting} + ) assert getattr(gptable, "additional_formatting") == additional_formatting - @pytest.mark.parametrize("unit_text", valid_text_elements_excl_none) @pytest.mark.parametrize("column_id", ["columnA", 0]) def test_units_placement(self, unit_text, column_id, create_gptable_with_kwargs): @@ -357,187 +335,196 @@ def test_units_placement(self, unit_text, column_id, create_gptable_with_kwargs) """ table_with_units = pd.DataFrame(columns=[f"columnA\n({unit_text})"]) - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["columnA"]), - }) + gptable = create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["columnA"]), + } + ) - gptable.set_units(new_units = {column_id: unit_text}) + gptable.set_units(new_units={column_id: unit_text}) assert gptable.table.columns == table_with_units.columns - @pytest.mark.parametrize("column_id", ["columnA", 0]) def test_table_notes_placement(self, column_id, create_gptable_with_kwargs): """ - Test that units are placed correctly under column headers. + Test that notes are placed correctly under column headers. """ - table_with_notes = pd.DataFrame(columns=[f"columnA\n$$note_reference$$"]) + expected_columns = pd.Index(["columnA"]) - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["columnA"]) - }) - - gptable.set_table_notes( - new_table_notes = {column_id: "$$note_reference$$"} + gptable = create_gptable_with_kwargs( + {"table": pd.DataFrame(columns=["columnA"])} ) - assert gptable.table.columns == table_with_notes.columns + gptable.set_table_notes(new_table_notes={column_id: "$$note_reference$$"}) + + assert gptable.table.columns.equals(expected_columns) + note_key = column_id if isinstance(column_id, int) else "columnA" + assert gptable.table_notes == {note_key: "$$note_reference$$"} + + assert all("$$" not in str(h) for h in gptable.table.columns) @pytest.mark.parametrize("column_id", ["columnA", 0]) - def test_table_units_and_notes_placement(self, column_id, create_gptable_with_kwargs): + def test_table_units_and_notes_placement( + self, column_id, create_gptable_with_kwargs + ): """ Test that units and notes are placed correctly under column headers. """ - table_with_units_and_notes = pd.DataFrame( - columns=[f"columnA\n(unit)\n$$note_reference$$"] - ) - - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["columnA"]) - }) + expected_columns = pd.Index(["columnA\n(unit)"]) - gptable.set_units(new_units = {column_id: "unit"}) - gptable.set_table_notes(new_table_notes = {column_id: "$$note_reference$$"}) + gptable = create_gptable_with_kwargs( + {"table": pd.DataFrame(columns=["columnA"])} + ) - assert gptable.table.columns == table_with_units_and_notes.columns + gptable.set_units(new_units={column_id: "unit"}) + gptable.set_table_notes(new_table_notes={column_id: "$$note_reference$$"}) + assert gptable.table.columns.equals(expected_columns) + note_key = column_id if isinstance(column_id, int) else "columnA" + assert gptable.table_notes == {note_key: "$$note_reference$$"} + assert all("$$" not in str(h) for h in gptable.table.columns) def test_additional_formatting_with_units(self, create_gptable_with_kwargs): """ Test that units are placed correctly under column headers. """ - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["columnA"]), - "units": {"columnA": "unit"}, - "additional_formatting": [{"column": { - "columns": ["columnA"], - "format": {"bold": True} - }}] - }) - - assert gptable.additional_formatting == [{"column": { - "columns": ["columnA\n(unit)"], - "format": {"bold": True} - }}] + gptable = create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["columnA"]), + "units": {"columnA": "unit"}, + "additional_formatting": [ + {"column": {"columns": ["columnA"], "format": {"bold": True}}} + ], + } + ) + assert gptable.additional_formatting == [ + {"column": {"columns": ["columnA\n(unit)"], "format": {"bold": True}}} + ] def test_additional_formatting_with_table_notes(self, create_gptable_with_kwargs): """ Test that units are placed correctly under column headers. """ - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["columnA"]), - "table_notes": {"columnA": "$$ref$$"}, - "additional_formatting": [{"column": { - "columns": ["columnA"], - "format": {"bold": True} - }}] - }) - - assert gptable.additional_formatting == [{"column": { - "columns": ["columnA\n$$ref$$"], - "format": {"bold": True} - }}] + gptable = create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["columnA"]), + "table_notes": {"columnA": "$$ref$$"}, + "additional_formatting": [ + {"column": {"columns": ["columnA"], "format": {"bold": True}}} + ], + } + ) + assert gptable.additional_formatting == [ + {"column": {"columns": ["columnA"], "format": {"bold": True}}} + ] + assert gptable.table_notes == {"columnA": "$$ref$$"} + assert all("$$" not in str(h) for h in gptable.table.columns) - def test_additional_formatting_with_units_and_table_notes(self, create_gptable_with_kwargs): + def test_additional_formatting_with_units_and_table_notes( + self, create_gptable_with_kwargs + ): """ Test that units are placed correctly under column headers. """ - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=["columnA"]), - "units": {"columnA": "unit"}, - "table_notes": {"columnA": "$$ref$$"}, - "additional_formatting": [{"column": { - "columns": ["columnA"], - "format": {"bold": True} - }}] - }) + gptable = create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["columnA"]), + "units": {0: "unit"}, + "table_notes": {0: "$$ref$$"}, + "additional_formatting": [ + {"column": {"columns": ["columnA"], "format": {"bold": True}}} + ], + } + ) - assert gptable.additional_formatting == [{"column": { - "columns": ["columnA\n(unit)\n$$ref$$"], - "format": {"bold": True} - }}] + assert gptable.additional_formatting == [ + { + "column": { + "columns": ["columnA\n(unit)"], + "format": {"bold": True}, + } + } + ] + assert gptable.units == {0: "unit"} + assert gptable.table_notes == {0: "$$ref$$"} - @pytest.mark.parametrize("column_names,expectation", [ - (["columnA", "columnB"], does_not_raise()), - (["columnA", ""], pytest.raises(ValueError)), - ([None, "columnB"], pytest.raises(ValueError)) - ]) - def test__validate_all_column_names_have_text(self, column_names, expectation, create_gptable_with_kwargs): + @pytest.mark.parametrize( + "column_names,expectation", + [ + (["columnA", "columnB"], does_not_raise()), + (["columnA", ""], pytest.raises(ValueError)), + ([None, "columnB"], pytest.raises(ValueError)), + ], + ) + def test__validate_all_column_names_have_text( + self, column_names, expectation, create_gptable_with_kwargs + ): """ Test that GPTable raises error when there are null values or empty strings for column names. """ with expectation: - create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=column_names) - }) + create_gptable_with_kwargs({"table": pd.DataFrame(columns=column_names)}) - - @pytest.mark.parametrize("column_names,expectation", [ - (["columnA", "columnB", "columnC"], does_not_raise()), - (["columnA", "columnB", "columnB"], pytest.raises(ValueError)) - ]) - def test__validate_no_duplicate_column_names(self, column_names, expectation, create_gptable_with_kwargs): + @pytest.mark.parametrize( + "column_names,expectation", + [ + (["columnA", "columnB", "columnC"], does_not_raise()), + (["columnA", "columnB", "columnB"], pytest.raises(ValueError)), + ], + ) + def test__validate_no_duplicate_column_names( + self, column_names, expectation, create_gptable_with_kwargs + ): """ Test that GPTable raises error when there are duplicate column names in table data. """ with expectation: - create_gptable_with_kwargs({ - "table": pd.DataFrame(columns=column_names) - }) - + create_gptable_with_kwargs({"table": pd.DataFrame(columns=column_names)}) class TestIndirectAttrs: """ Test that non-formatting create_gptable_with_kwargs attributes are indirectly set correctly. """ + @pytest.mark.parametrize("index_cols", valid_index_columns) def test_index_levels_set(self, index_cols, create_gptable_with_kwargs): """ Test that number of index levels are set, when one, two or three indexes are used. """ - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame( - columns = [ - "one", - "two", - "three", - "four" - ]), - "index_columns": index_cols - }) + gptable = create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["one", "two", "three", "four"]), + "index_columns": index_cols, + } + ) assert getattr(gptable, "index_levels") == len(index_cols) - @pytest.mark.parametrize("index_cols", valid_index_columns) def test_column_headings_set(self, index_cols, create_gptable_with_kwargs): """ Test that non-index columns are set as column headings. """ - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame( - columns = [ - "one", - "two", - "three", - "four" - ]), - "index_columns": index_cols - }) - + gptable = create_gptable_with_kwargs( + { + "table": pd.DataFrame(columns=["one", "two", "three", "four"]), + "index_columns": index_cols, + } + ) + # Expect all column numbers that have no index level assigned exp = set(range(4)) - set(range(len(index_cols))) - - assert getattr(gptable, "_column_headings") == exp + assert getattr(gptable, "_column_headings") == exp def test__annotations_set(self, create_gptable_with_kwargs): """ @@ -555,7 +542,7 @@ def test__annotations_set(self, create_gptable_with_kwargs): "legend": ["Legend$$6$$"], "units": {0: "Unit$$7$$"}, "table_notes": {0: "Note$$8$$"}, - "table": table + "table": table, } gptable = create_gptable_with_kwargs(kwargs) diff --git a/gptables/test/test_theme.py b/gptables/test/test_theme.py index 060e3cba..af3aed3e 100644 --- a/gptables/test/test_theme.py +++ b/gptables/test/test_theme.py @@ -1,20 +1,18 @@ -import pytest -import os from contextlib import redirect_stdout -from pkg_resources import resource_filename from itertools import chain, combinations -from gptables import Theme -from gptables import gptheme - +import importlib_resources +import pytest +from gptables import Theme, gptheme valid_description_elements = ["legend", "instructions", "scope", "source"] def powerset(iterable): s = list(iterable) - return chain.from_iterable(combinations(s, r) for r in range(len(s)+1)) + return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) + @pytest.fixture() def empty_theme(): @@ -24,19 +22,18 @@ def empty_theme(): class TestCleanInitTheme: """ Test initialisation of the Theme class without config. - """ + """ + @pytest.mark.parametrize("attr", Theme()._format_attributes) - def test_default_format_attrs(self, attr, empty_theme): + def test_default_format_attrs(self, attr, empty_theme): """Test Theme attribute default types""" exp = {} got = getattr(empty_theme, attr) assert exp == got - def test_default_other_attrs(self, empty_theme): assert empty_theme.description_order == [] - def test_print_attributes(self, empty_theme): """Test Theme print_attributes()""" from io import StringIO @@ -44,11 +41,10 @@ def test_print_attributes(self, empty_theme): file_handler = StringIO() with redirect_stdout(file_handler): empty_theme.print_attributes() - + got = file_handler.getvalue() - exp = ( -"""cover_title_format : {} + exp = """cover_title_format : {} cover_subtitle_format : {} cover_text_format : {} title_format : {} @@ -64,107 +60,69 @@ def test_print_attributes(self, empty_theme): legend_format : {} description_order : [] """ - ) assert got == exp + class TestConfigInitTheme: """ Test initialisation of the Theme class using a config dictionary. """ + def test_dict_init(self): config = { - "global": - { - "font_size": 12, - "font_name": "Arial", - "font_color": "automatic" - }, - - "cover_title": - { - 'font_size': 16, - 'bold': True, - 'text_wrap': True, - }, - - "cover_subtitle": - { - 'font_size': 14, - 'bold': True, - 'text_wrap': True, - }, - - "cover_text": - { - 'text_wrap': True, - }, - - "title": - { - "bold": True, - "font_size": 16 - }, - - "subtitle": - {"font_size": 14}, - + "global": { + "font_size": 12, + "font_name": "Arial", + "font_color": "automatic", + }, + "cover_title": { + "font_size": 16, + "bold": True, + "text_wrap": True, + }, + "cover_subtitle": { + "font_size": 14, + "bold": True, + "text_wrap": True, + }, + "cover_text": { + "text_wrap": True, + }, + "title": {"bold": True, "font_size": 16}, + "subtitle": {"font_size": 14}, "instructions": None, - "scope": None, - - "column_heading": - { + "column_heading": { "bold": True, "bottom": 1, "text_wrap": 1, - "valign": "top" - }, - - "index_1": - { - "bold": True, - "text_wrap": 1 - }, - + "valign": "top", + }, + "index_1": {"bold": True, "text_wrap": 1}, "index_2": {"text_wrap": 1}, - "index_3": {"text_wrap": 1}, - "data": {"text_wrap": 1}, - - "source": - {"font_size": 12}, - - "legend": - {"font_size": 12}, - - "description_order": - [ - "instructions", - "legend", - "source", - "scope" - ], - } + "source": {"font_size": 12}, + "legend": {"font_size": 12}, + "description_order": ["instructions", "legend", "source", "scope"], + } got = Theme(config) - + exp = gptheme - + assert exp == got - + def test_file_init(self): """ Test initialisation of Theme using default theme yaml config file. """ - config_file = resource_filename( - "gptables", - "themes/gptheme.yaml" - ) - got = Theme(config_file) - + config_file = importlib_resources.files("gptables") / "themes/gptheme.yaml" + + got = Theme(str(config_file)) + exp = gptheme - + assert exp == got @@ -172,6 +130,7 @@ class TestFormatValidationTheme: """ Test validation of format dictionaries. """ + def test_invalid_attribute_config(self, empty_theme): """ Test that invalid attribute names in config raises a ValueError. @@ -180,7 +139,6 @@ def test_invalid_attribute_config(self, empty_theme): with pytest.raises(ValueError): empty_theme.apply_config(config) - def test_invalid_format_label_config(self, empty_theme): """ Test that invalid format labels in config raises a ValueError. @@ -188,7 +146,6 @@ def test_invalid_format_label_config(self, empty_theme): config = {"notes": {"not_a_format": 5}} with pytest.raises(ValueError): empty_theme.apply_config(config) - @pytest.mark.parametrize("attr", Theme()._format_attributes) def test_invalid_format_label_single_attr(self, attr, empty_theme): @@ -199,7 +156,6 @@ def test_invalid_format_label_single_attr(self, attr, empty_theme): format_dict = {"not_a_format": 5} with pytest.raises(ValueError): getattr(empty_theme, "update_" + attr)(format_dict) - @pytest.mark.parametrize("attr", Theme()._format_attributes) def test_valid_format_label(self, attr, empty_theme): @@ -208,13 +164,12 @@ def test_valid_format_label(self, attr, empty_theme): is used. """ getattr(empty_theme, "update_" + attr)({"font_size": 9}) - + exp = {"font_size": 9} got = getattr(empty_theme, attr) - + assert exp == got - def test_valid_format_label_config(self, empty_theme): """ Test that valid format labels in config changes specified format attr @@ -223,14 +178,14 @@ def test_valid_format_label_config(self, empty_theme): """ config = {"source": {"font_size": 5}} empty_theme.apply_config(config) - + exp = {"font_size": 5} got = empty_theme.source_format - + assert exp == got - + got2 = empty_theme.title_format - + assert {} == got2 @@ -238,16 +193,10 @@ class TestOtherValidationTheme: """ Test validation of non-format Theme attributes. """ + @pytest.mark.parametrize( - "format_order",[ - "notes", - {"annotations": 2}, - 1, - 3.14, - False, - None - ] - ) + "format_order", ["notes", {"annotations": 2}, 1, 3.14, False, None] + ) def test_invalid_description_order_type(self, format_order, empty_theme): """ Test that non-list description_order entries raise a TypeError. @@ -255,16 +204,7 @@ def test_invalid_description_order_type(self, format_order, empty_theme): with pytest.raises(TypeError): empty_theme.update_description_order(format_order) - - @pytest.mark.parametrize( - "format_order",[ - ["potato"], - [1], - [3.14], - [dict()], - [[]] - ] - ) + @pytest.mark.parametrize("format_order", [["potato"], [1], [3.14], [dict()], [[]]]) def test_invalid_description_order_values(self, format_order, empty_theme): """ Test that list description_order entries containing invalid elements raises @@ -272,7 +212,6 @@ def test_invalid_description_order_values(self, format_order, empty_theme): """ with pytest.raises(ValueError): empty_theme.update_description_order(format_order) - @pytest.mark.parametrize("description_order", powerset(valid_description_elements)) def test_valid_description_order_values(self, description_order, empty_theme): diff --git a/gptables/test/test_utils/excel_comparison_test.py b/gptables/test/test_utils/excel_comparison_test.py index 28c73f77..2896392b 100644 --- a/gptables/test/test_utils/excel_comparison_test.py +++ b/gptables/test/test_utils/excel_comparison_test.py @@ -6,8 +6,9 @@ # Copyright (c), 2013-2022, John McNamara, jmcnamara@cpan.org # -import unittest import os +import unittest + from .helperfunctions import _compare_xlsx_files @@ -18,34 +19,36 @@ class ExcelComparisonTest(unittest.TestCase): """ + maxDiff = None + def set_filename(self, filename): # Set the filename and paths for the test xlsx files. - self.maxDiff = None - self.got_filename = '' - self.exp_filename = '' + self.got_filename = "" + self.exp_filename = "" self.ignore_files = [] self.ignore_elements = {} - self.test_dir = 'xlsxwriter/test/comparison/' - self.vba_dir = self.test_dir + 'xlsx_files/' - self.image_dir = self.test_dir + 'images/' + self.test_dir = "xlsxwriter/test/comparison/" + self.vba_dir = self.test_dir + "xlsx_files/" + self.image_dir = self.test_dir + "images/" # The reference Excel generated file. - self.exp_filename = self.test_dir + 'xlsx_files/' + filename + self.exp_filename = self.test_dir + "xlsx_files/" + filename # The generated XlsxWriter file. - self.got_filename = self.test_dir + '_test_' + filename - + self.got_filename = self.test_dir + "_test_" + filename def set_text_file(self, filename): # Set the filename and path for text files used in tests. - self.txt_filename = self.test_dir + 'actual_' + filename + self.txt_filename = self.test_dir + "actual_" + filename def assertExcelEqual(self): # Compare the generate file and the reference Excel file. - got, exp = _compare_xlsx_files(self.got_filename, - self.exp_filename, - self.ignore_files, - self.ignore_elements) + got, exp = _compare_xlsx_files( + self.got_filename, + self.exp_filename, + self.ignore_files, + self.ignore_elements, + ) self.assertEqual(exp, got) diff --git a/gptables/test/test_utils/helperfunctions.py b/gptables/test/test_utils/helperfunctions.py index 1f379b18..46587d7e 100644 --- a/gptables/test/test_utils/helperfunctions.py +++ b/gptables/test/test_utils/helperfunctions.py @@ -6,28 +6,26 @@ # Copyright (c), 2013-2022, John McNamara, jmcnamara@cpan.org # -import re import os.path -from zipfile import ZipFile -from zipfile import BadZipfile -from zipfile import LargeZipFile +import re +from zipfile import BadZipfile, LargeZipFile, ZipFile def _xml_to_list(xml_str): # Convert test generated XML strings into lists for comparison testing. # Split the XML string at tag boundaries. - parser = re.compile(r'>\s*<') + parser = re.compile(r">\s*<") elements = parser.split(xml_str.strip()) elements = [s.replace("\r", "") for s in elements] # Add back the removed brackets. for index, element in enumerate(elements): - if not element[0] == '<': - elements[index] = '<' + elements[index] - if not element[-1] == '>': - elements[index] = elements[index] + '>' + if not element[0] == "<": + elements[index] = "<" + elements[index] + if not element[-1] == ">": + elements[index] = elements[index] + ">" return elements @@ -42,7 +40,7 @@ def _vml_to_list(vml_str): vml_str = vml_str.replace("\r", "") vml = vml_str.split("\n") - vml_str = '' + vml_str = "" for line in vml: # Skip blank lines. @@ -60,11 +58,11 @@ def _vml_to_list(vml_str): line += " " # Add newline after element end. - if re.search('>$', line): + if re.search(">$", line): line += "\n" # Split multiple elements. - line = line.replace('><', ">\n<") + line = line.replace("><", ">\n<") # Put all of Anchor on one line. if line == "\n": @@ -97,7 +95,9 @@ def _sort_rel_file_data(xml_elements): return xml_elements -def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): +def _compare_xlsx_files( + got_file, exp_file, ignore_files, ignore_elements, ignore_auto_colour=True +): # Compare two XLSX files by extracting the XML files from each # zip archive and comparing them. # @@ -106,25 +106,26 @@ def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): # # In order to compare the XLSX files we convert the data in each # XML file into an list of XML elements. + # try: # Open the XlsxWriter as a zip file for testing. - got_zip = ZipFile(got_file, 'r') + got_zip = ZipFile(got_file, "r") except IOError as e: error = "XlsxWriter file error: " + str(e) - return error, '' + return error, "" except (BadZipfile, LargeZipFile) as e: error = "XlsxWriter zipfile error, '" + exp_file + "': " + str(e) - return error, '' + return error, "" try: # Open the Excel as a zip file for testing. - exp_zip = ZipFile(exp_file, 'r') + exp_zip = ZipFile(exp_file, "r") except IOError as e: error = "Excel file error: " + str(e) - return error, '' + return error, "" except (BadZipfile, LargeZipFile) as e: error = "Excel zipfile error, '" + exp_file + "': " + str(e) - return error, '' + return error, "" # Get the filenames from the zip files. got_files = sorted(got_zip.namelist()) @@ -145,50 +146,54 @@ def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): # Compare binary files with string comparison based on extension. extension = os.path.splitext(filename)[1] - if extension in ('.png', '.jpeg', '.gif', '.bmp', '.wmf', '.emf', '.bin'): + if extension in (".png", ".jpeg", ".gif", ".bmp", ".wmf", ".emf", ".bin"): if got_xml_str != exp_xml_str: - return 'got: %s' % filename, 'exp: %s' % filename + return "got: %s" % filename, "exp: %s" % filename continue - got_xml_str = got_xml_str.decode('utf-8') - exp_xml_str = exp_xml_str.decode('utf-8') + got_xml_str = got_xml_str.decode("utf-8") + exp_xml_str = exp_xml_str.decode("utf-8") # Remove dates and user specific data from the core.xml data. - if filename == 'docProps/core.xml': - exp_xml_str = re.sub(r' ?John', '', exp_xml_str) - exp_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ', - '', exp_xml_str) - got_xml_str = re.sub(r'\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ', - '', got_xml_str) + if filename == "docProps/core.xml": + exp_xml_str = re.sub(r" ?John", "", exp_xml_str) + exp_xml_str = re.sub( + r"\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ", "", exp_xml_str + ) + got_xml_str = re.sub( + r"\d\d\d\d-\d\d-\d\dT\d\d\:\d\d:\d\dZ", "", got_xml_str + ) # Remove workbookView dimensions which are almost always different # and calcPr which can have different Excel version ids. - if filename == 'xl/workbook.xml': - exp_xml_str = re.sub(r']*>', - '', exp_xml_str) - got_xml_str = re.sub(r']*>', - '', got_xml_str) - exp_xml_str = re.sub(r']*>', - '', exp_xml_str) - got_xml_str = re.sub(r']*>', - '', got_xml_str) + if filename == "xl/workbook.xml": + exp_xml_str = re.sub(r"]*>", "", exp_xml_str) + got_xml_str = re.sub(r"]*>", "", got_xml_str) + exp_xml_str = re.sub(r"]*>", "", exp_xml_str) + got_xml_str = re.sub(r"]*>", "", got_xml_str) # Remove printer specific settings from Worksheet pageSetup elements. - if re.match(r'xl/worksheets/sheet\d.xml', filename): - exp_xml_str = re.sub(r'horizontalDpi="200" ', '', exp_xml_str) - exp_xml_str = re.sub(r'verticalDpi="200" ', '', exp_xml_str) - exp_xml_str = re.sub(r'(]*) r:id="rId1"', - r'\1', exp_xml_str) + if re.match(r"xl/worksheets/sheet\d.xml", filename): + exp_xml_str = re.sub(r'horizontalDpi="200" ', "", exp_xml_str) + exp_xml_str = re.sub(r'verticalDpi="200" ', "", exp_xml_str) + exp_xml_str = re.sub(r'(]*) r:id="rId1"', r"\1", exp_xml_str) # Remove Chart pageMargin dimensions which are almost always different. - if re.match(r'xl/charts/chart\d.xml', filename): - exp_xml_str = re.sub(r']*>', - '', exp_xml_str) - got_xml_str = re.sub(r']*>', - '', got_xml_str) + if re.match(r"xl/charts/chart\d.xml", filename): + exp_xml_str = re.sub( + r"]*>", "", exp_xml_str + ) + got_xml_str = re.sub( + r"]*>", "", got_xml_str + ) + + # Remove automatic color tags + if ignore_auto_colour: + exp_xml_str = re.sub(r'', "", exp_xml_str) + got_xml_str = re.sub(r'', "", got_xml_str) # Convert the XML string to lists for comparison. - if re.search('.vml$', filename): + if re.search(".vml$", filename): got_xml = _xml_to_list(got_xml_str) exp_xml = _vml_to_list(exp_xml_str) else: @@ -204,7 +209,7 @@ def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): got_xml = [tag for tag in got_xml if not re.match(pattern, tag)] # Reorder the XML elements in the XLSX relationship files. - if filename == '[Content_Types].xml' or re.search('.rels$', filename): + if filename == "[Content_Types].xml" or re.search(".rels$", filename): got_xml = _sort_rel_file_data(got_xml) exp_xml = _sort_rel_file_data(exp_xml) @@ -215,7 +220,7 @@ def _compare_xlsx_files(got_file, exp_file, ignore_files, ignore_elements): return got_xml, exp_xml # If we got here the files are the same. - return 'Ok', 'Ok' + return "Ok", "Ok" # External wrapper function to allow simplified equality testing of two Excel diff --git a/gptables/test/test_wrappers.py b/gptables/test/test_wrappers.py index cb083604..1fd60b81 100644 --- a/gptables/test/test_wrappers.py +++ b/gptables/test/test_wrappers.py @@ -1,51 +1,40 @@ -import pytest from collections import namedtuple -import pandas as pd -from pandas.testing import assert_frame_equal, assert_series_equal +import pandas as pd +import pytest import xlsxwriter +from pandas.testing import assert_frame_equal, assert_series_equal import gptables -from gptables.core.wrappers import GPWorkbook -from gptables.core.wrappers import GPWorksheet +from gptables import Theme, gptheme from gptables.core.gptable import FormatList -from gptables import Theme -from gptables import gptheme -from gptables.test.test_gptable import create_gptable_with_kwargs, does_not_raise +from gptables.core.wrappers import GPWorkbook, GPWorksheet +from gptables.test.test_gptable import does_not_raise Tb = namedtuple("Testbook", "wb ws") valid_text_elements = [ # Not None "This is a string", FormatList(["More than ", {"italic": True}, "just ", "a string"]), - FormatList([{"bold":True}, "text"]) + FormatList([{"bold": True}, "text"]), ] test_text_list = [ "This has a $$reference$$", "This one doesn't", - "Here's $$another$$one" - ] + "Here's $$another$$one", +] -exp_text_list = [ - "This has a [note 1]", - "This one doesn't", - "Here's one[note 2]" - ] +exp_text_list = ["This has a [note 1]", "This one doesn't", "Here's one[note 2]"] -invalid_text_elements = [ - dict(), - set(), - 42, - 3.14, - True -] +invalid_text_elements = [dict(), set(), 42, 3.14, True] + @pytest.fixture() def testbook(): # See https://github.com/jmcnamara/XlsxWriter/issues/746#issuecomment-685869888 - wb = GPWorkbook(options={'in_memory': True}) + wb = GPWorkbook(options={"in_memory": True}) ws = wb.add_worksheet() yield Tb(wb, ws) wb.fileclosed = 1 @@ -55,24 +44,19 @@ class TestGPWorksheetInit: """ Test that default attributes are set when GPWorksheets are created. """ - + def test_subclass(self): """ Test that the GPWorksheet class is a subclass of the XlsxWriter Worksheet class. """ - assert issubclass( - GPWorksheet, - xlsxwriter.worksheet.Worksheet - ) - + assert issubclass(GPWorksheet, xlsxwriter.worksheet.Worksheet) def test_default_theme_set(self, testbook): """ Test that the default theme (gptheme) is used when no theme is set. """ assert testbook.wb.theme == gptheme - def test_default_gridlines(self, testbook): """ @@ -80,24 +64,16 @@ def test_default_gridlines(self, testbook): """ assert testbook.ws.print_gridlines == 0 assert testbook.ws.screen_gridlines == 0 - def test_wb_reference(self, testbook): """ Test that GPWorksheets reference their parent GPWorkbook. """ assert testbook.ws._workbook == testbook.wb - - - @pytest.mark.parametrize("not_a_gptable", [ - dict(), - set(), - [], - 1, - 3.14, - "test_string", - pd.DataFrame() - ]) + + @pytest.mark.parametrize( + "not_a_gptable", [dict(), set(), [], 1, 3.14, "test_string", pd.DataFrame()] + ) def test_invalid_write_gptable(self, not_a_gptable, testbook): """ Test that write_gptable() raises a TypeError when argument is not a @@ -121,77 +97,76 @@ def test__smart_write_str(self, testbook): got_string = testbook.ws.str_table.string_table exp_string = {valid_text_elements[0]: 0} assert got_string == exp_string - + # String is referenced using a named tuple (string, Format) # Here we get first element, which references string lookup location got_lookup = testbook.ws.table[0][0][0] exp_lookup = 0 assert got_lookup == exp_lookup - def test__smart_write_formatted_str(self, testbook): testbook.ws._smart_write(1, 2, valid_text_elements[0], {"bold": True}) # Strings are stored in a lookup table for efficiency got_string = testbook.ws.str_table.string_table exp_string = {valid_text_elements[0]: 0} assert got_string == exp_string - + # String is referenced using a named tuple (string, Format) # Here we get first element, which references string lookup location - cell = testbook.ws.table[1][2] + cell = testbook.ws.table[1][2] got_lookup = cell[0] exp_lookup = 0 assert got_lookup == exp_lookup - + format_obj = cell[1] assert format_obj.bold - def test__smart_write_rich_text(self, testbook): testbook.wb.set_theme(Theme({})) - + testbook.ws._smart_write(0, 0, valid_text_elements[1], {}) # Strings are stored in a lookup table for efficiency got_string = testbook.ws.str_table.string_table - exp_string = {'More than just a string': 0} + exp_string = { + 'More than just a string': 0 + } assert got_string == exp_string - + # String is referenced using a named tuple (string, Format) # Here we get first element, which references string lookup location got_lookup = testbook.ws.table[0][0][0] exp_lookup = 0 assert got_lookup == exp_lookup - - def test__smart_write_formatted_rich_text(self, testbook): testbook.wb.set_theme(Theme({})) - + testbook.ws._smart_write(1, 2, valid_text_elements[1], {}) # Strings are stored in a lookup table for efficiency got_string = testbook.ws.str_table.string_table - exp_string = {'More than ' - 'just a string': 0} + exp_string = { + 'More than ' + 'just a string': 0 + } assert got_string == exp_string - + # String is referenced using a named tuple (string, Format) # Here we get first element, which references string lookup location - cell = testbook.ws.table[1][2] + cell = testbook.ws.table[1][2] got_lookup = cell[0] exp_lookup = 0 assert got_lookup == exp_lookup - def test__smart_write_link(self, testbook): testbook.wb.set_theme(Theme({})) @@ -204,7 +179,7 @@ def test__smart_write_link(self, testbook): exp_string = {display_text: 0} assert got_string == exp_string - got_hyperlink = testbook.ws.hyperlinks[0][0]["url"] + got_hyperlink = testbook.ws.hyperlinks[0][0]._link exp_hyperlink = url assert got_hyperlink == exp_hyperlink @@ -217,9 +192,8 @@ def test__smart_write_link(self, testbook): assert got_lookup == exp_lookup format_obj = cell[1] - assert format_obj.underline == True - assert format_obj.font_color == "#0000FF" # aka Blue - + assert format_obj.underline is True + assert format_obj.font_color._rgb_hex_value() == "0000FF" # aka Blue def test__smart_write_null_cell(self, testbook): testbook.ws._smart_write(0, 0, None, {}) @@ -233,80 +207,100 @@ def test__smart_write_null_cell(self, testbook): cell = testbook.ws.table[0][0] assert len(cell) == 1 - def test__write_empty_table(self, testbook, create_gptable_with_kwargs): - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame({"col": [None]}) - }) + gptable = create_gptable_with_kwargs({"table": pd.DataFrame({"col": [None]})}) with pytest.raises(ValueError): - testbook.ws._write_table_elements([0,0], gptable, auto_width=True) + testbook.ws._write_table_elements([0, 0], gptable, auto_width=True) def test__write_integer_table(self, testbook, create_gptable_with_kwargs): - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame({"a": [0], "b": [1]}) - }) - - #Testing that this function executes with no errors - testbook.ws._write_table_elements([0,0], gptable, auto_width=True) - - - @pytest.mark.parametrize("cell_value1,cell_value2,expectation", [ - (None, "valid text", pytest.warns(UserWarning)), - ("", "valid text", pytest.warns(UserWarning)), - (" ", "valid text", pytest.warns(UserWarning)), - (" ", "valid text", pytest.warns(UserWarning)), - ("_", "valid text", pytest.raises(ValueError)), - (" *", "valid text", pytest.raises(ValueError)), - (" Hello_World! ", "valid text", does_not_raise()), - ]) - def test__write_table_elements_cell_validation(self, testbook, - create_gptable_with_kwargs, cell_value1, cell_value2, expectation): - gptable = create_gptable_with_kwargs({ - "table": pd.DataFrame({ - "colA": [cell_value1, cell_value2], - "colB": ["valid text", "valid text"] - }) - }) + gptable = create_gptable_with_kwargs( + {"table": pd.DataFrame({"a": [0], "b": [1]})} + ) + + # Testing that this function executes with no errors + testbook.ws._write_table_elements([0, 0], gptable, auto_width=True) + + @pytest.mark.parametrize( + "cell_value1,cell_value2,expectation", + [ + (None, "valid text", pytest.warns(UserWarning)), + ("", "valid text", pytest.warns(UserWarning)), + (" ", "valid text", pytest.warns(UserWarning)), + (" ", "valid text", pytest.warns(UserWarning)), + ("_", "valid text", pytest.raises(ValueError)), + (" *", "valid text", pytest.raises(ValueError)), + (" Hello_World! ", "valid text", does_not_raise()), + ], + ) + def test__write_table_elements_cell_validation( + self, + testbook, + create_gptable_with_kwargs, + cell_value1, + cell_value2, + expectation, + ): + gptable = create_gptable_with_kwargs( + { + "table": pd.DataFrame( + { + "colA": [cell_value1, cell_value2], + "colB": ["valid text", "valid text"], + } + ) + } + ) with expectation: - testbook.ws._write_table_elements([0,0], gptable, auto_width=True) + testbook.ws._write_table_elements([0, 0], gptable, auto_width=True) + def test__apply_column_alignments(self, testbook): + data_table = pd.DataFrame( + { + "index_column": [1, 2], + "integer_column": [1, 2], + "float_column": [1.1, 2.2], + "string_column": ["A", "B"], + "url_column": [{"display_text": "link"}, {"display_text": "link"}], + "integer_with_confidential_shorthand": [1, "[c]"], + "float_with_significant_shorthand": ["1.1[sss]", 2.2], + } + ) + format_table = pd.DataFrame( + { + "index_column": [{}, {}], + "integer_column": [{}, {}], + "float_column": [{}, {}], + "string_column": [{}, {}], + "url_column": [{}, {}], + "integer_with_confidential_shorthand": [{}, {}], + "float_with_significant_shorthand": [{}, {}], + } + ) - def test__apply_column_alignments(self, testbook): - data_table = pd.DataFrame({ - "index_column": [1, 2], - "integer_column": [1, 2], - "float_column": [1.1, 2.2], - "string_column": ["A", "B"], - "url_column": [{"display_text": "link"}, {"display_text": "link"}], - "integer_with_confidential_shorthand": [1, "[c]"], - "float_with_significant_shorthand": ["1.1[sss]", 2.2], - }) - - format_table = pd.DataFrame({ - "index_column": [{}, {}], - "integer_column": [{}, {}], - "float_column": [{}, {}], - "string_column": [{}, {}], - "url_column": [{}, {}], - "integer_with_confidential_shorthand": [{}, {}], - "float_with_significant_shorthand": [{}, {}], - }) - - testbook.ws._apply_column_alignments(data_table, format_table, index_columns=[0]) - - exp_format_table = pd.DataFrame({ - "index_column": [{"align": "left"}, {"align": "left"}], - "integer_column": [{"align": "right"}, {"align": "right"}], - "float_column": [{"align": "right"}, {"align": "right"}], - "string_column": [{"align": "left"}, {"align": "left"}], - "url_column": [{"align": "left"}, {"align": "left"}], - "integer_with_confidential_shorthand": [{"align": "right"}, {"align": "right"}], - "float_with_significant_shorthand": [{"align": "right"}, {"align": "right"}], - }) + testbook.ws._apply_column_alignments( + data_table, format_table, index_columns=[0] + ) - assert_frame_equal(format_table, exp_format_table) + exp_format_table = pd.DataFrame( + { + "index_column": [{"align": "left"}, {"align": "left"}], + "integer_column": [{"align": "right"}, {"align": "right"}], + "float_column": [{"align": "right"}, {"align": "right"}], + "string_column": [{"align": "left"}, {"align": "left"}], + "url_column": [{"align": "left"}, {"align": "left"}], + "integer_with_confidential_shorthand": [ + {"align": "right"}, + {"align": "right"}, + ], + "float_with_significant_shorthand": [ + {"align": "right"}, + {"align": "right"}, + ], + } + ) + assert_frame_equal(format_table, exp_format_table) class TestGPWorksheetReferences: @@ -314,6 +308,7 @@ class TestGPWorksheetReferences: Test that GPTable note references are modified correctly by GPWorksheet during write_gptable(). """ + @pytest.mark.parametrize("text", test_text_list) def test__replace_reference(self, text, testbook): """ @@ -323,60 +318,60 @@ def test__replace_reference(self, text, testbook): got_output = [] reference_order = ["reference", "another"] - got_output = [testbook.ws._replace_reference(text, reference_order) for text in test_text_list] - + got_output = [ + testbook.ws._replace_reference(text, reference_order) + for text in test_text_list + ] + exp_refs = ["reference", "another"] assert reference_order == exp_refs assert got_output == exp_text_list - - @pytest.mark.parametrize("text,refs,output", - zip(test_text_list, - [["reference"], [], ["another"]], - ["This has a [note 1]", "This one doesn't", "Here's one[note 2]"] - )) + @pytest.mark.parametrize( + "text,refs,output", + zip( + test_text_list, + [["reference"], [], ["another"]], + ["This has a [note 1]", "This one doesn't", "Here's one[note 2]"], + ), + ) def test__replace_reference_in_attr_str(self, text, refs, output, testbook): """ Test that references are replaced in a single string. """ reference_order = ["reference", "another"] - got_text = testbook.ws._replace_reference_in_attr( - text, - reference_order - ) + got_text = testbook.ws._replace_reference_in_attr(text, reference_order) assert got_text == output - def test__replace_reference_in_attr_dict(self, testbook): """ Test that references are replaced in dictionary values, but not keys. """ reference_order = ["reference", "another"] test_text_dict = { - "$$key$$": "This is a value with a $$reference$$", - "second_key": "Second value", - "another_key": "$$another$$reference" - } + "$$key$$": "This is a value with a $$reference$$", + "second_key": "Second value", + "another_key": "$$another$$reference", + } got_text = testbook.ws._replace_reference_in_attr( - test_text_dict, - reference_order - ) - + test_text_dict, reference_order + ) + exp_text_dict = { - "$$key$$": "This is a value with a [note 1]", - "second_key": "Second value", - "another_key": "reference[note 2]" - } - - assert got_text == exp_text_dict + "$$key$$": "This is a value with a [note 1]", + "second_key": "Second value", + "another_key": "reference[note 2]", + } + assert got_text == exp_text_dict class TestGPWorksheetFormatUpdate: """ Test that GPWorksheet format updating methods work as expected. """ + def test__apply_format_dict(self, testbook): test = dict() format_dict = {"bold": True} @@ -384,7 +379,6 @@ def test__apply_format_dict(self, testbook): exp = {"bold": True} assert test == exp - def test__apply_format_series(self, testbook): test = pd.Series([{} for n in range(3)]) format_dict = {"bold": True} @@ -392,37 +386,38 @@ def test__apply_format_series(self, testbook): exp = pd.Series([{"bold": True} for n in range(3)]) assert_series_equal(test, exp) - def test__apply_format_dataframe(self, testbook): - test = pd.DataFrame(columns=[0, 1, 2], index = [0, 1]) + test = pd.DataFrame(columns=[0, 1, 2], index=[0, 1]) test.iloc[0] = [{} for n in range(3)] test.iloc[1] = [{} for n in range(3)] - + format_dict = {"bold": True} testbook.ws._apply_format(test, format_dict) - exp = pd.DataFrame(columns=[0, 1, 2], index = [0, 1]) + exp = pd.DataFrame(columns=[0, 1, 2], index=[0, 1]) exp.iloc[0] = [{"bold": True} for n in range(3)] exp.iloc[1] = [{"bold": True} for n in range(3)] assert_frame_equal(test, exp) - class TestGPWorksheetTable: """ Test that the table property inherited from `xlsxwriter.Worksheet` is set correctly. """ - def test__mark_data_as_worksheet_table( - self, testbook, create_gptable_with_kwargs - ): + + def test__mark_data_as_worksheet_table(self, testbook, create_gptable_with_kwargs): df = pd.DataFrame({"col1": ["x", "y"], "col2": [0, 1]}) - gptable = create_gptable_with_kwargs({ - "table": df, - }) + gptable = create_gptable_with_kwargs( + { + "table": df, + } + ) gptable._set_data_range() table_format = pd.DataFrame({"col1": [{}, {}], "col2": [{}, {}]}) - testbook.ws._write_array([0, 2], df, table_format) # First two rows reserved for title and instructions + testbook.ws._write_array( + [0, 2], df, table_format + ) # First two rows reserved for title and instructions testbook.ws._mark_data_as_worksheet_table(gptable, table_format) @@ -450,79 +445,140 @@ def test__mark_data_as_worksheet_table( exp_heading_format = testbook.wb.add_format(table_format.iloc[0, n]) assert got_heading_format.__dict__ == exp_heading_format.__dict__ - - @pytest.mark.parametrize("cell_val,exp_length",[ - ("string", 6), - (42, 2), - (3.14, 4), - ({"gov.uk": "https://www.gov.uk"}, 6), - (FormatList(["Partially ", {"bold": True}, "bold", " string"]), 21), - (["string", "another string"], 14), - ("string\nwith\nnewlines", 8), - (FormatList(["string\r\n", {"bold": True}, "bold string"]), 11), - (set(), 0) - ]) - def test__longest_line_length(self, testbook, cell_val, exp_length): - got_length = testbook.ws._longest_line_length(cell_val) - - assert got_length == exp_length - - - @pytest.mark.parametrize("data", [ - ["string", "longer string"], - ["longer string", "longer string"], - ["string\nstring\nstring", "longer string"]]) - @pytest.mark.parametrize("format", [ - [{"font_size": 12}, {"font_size": 12}], - [{"font_size": 10}, {"font_size": 12}]]) - def test__calculate_column_widths(self, testbook, data, format): - table = pd.DataFrame({"col": data}) - table_format = pd.DataFrame({"col": format}) + @pytest.mark.parametrize( + "data,format,exp_width", + [ + # Single column, normal case + (["string", "longer string"], [{"font_size": 12}, {"font_size": 12}], [93]), + # Multiple columns + ( + pd.DataFrame({"col1": ["a", "bb"], "col2": ["ccc", "dddd"]}), + pd.DataFrame( + { + "col1": [{"font_size": 11}, {"font_size": 12}], + "col2": [{"font_size": 10}, {"font_size": 14}], + } + ), + [26, 50], + ), + # Bold formatting + ( + ["bold", "bolder"], + [{"font_size": 11, "bold": True}, {"font_size": 12, "bold": True}], + [58], + ), + # Multi-line cell + ( + ["short\nlongest\nmid", "tiny"], + [{"font_size": 11}, {"font_size": 11}], + [53], + ), + # Empty string + (["", ""], [{"font_size": 11}, {"font_size": 11}], [0]), + # Number cell + ([123, 4567], [{"font_size": 11}, {"font_size": 11}], [35]), + ], + ) + def test__calculate_column_widths(self, testbook, data, format, exp_width): + if isinstance(data, pd.DataFrame): + table = data + table_format = format + else: + table = pd.DataFrame({"col": data}) + table_format = pd.DataFrame({"col": format}) got_width = testbook.ws._calculate_column_widths(table, table_format) - exp_width = [testbook.ws._excel_string_width(string_len=13, font_size=12)] - assert got_width == exp_width - + assert all(isinstance(w, int) for w in got_width) + + @pytest.mark.parametrize( + "format_dict,longest_line,expected", + [ + ({"font_size": 11, "bold": False}, "abc", 1.0), + ({"font_size": 12, "bold": False}, "abc", 12 / 11), + ({"font_size": 11, "bold": True}, "abc", 1.1), + ({"font_size": 12, "bold": True}, "abc", (12 / 11) * 1.1), + ({}, "abc", 1.0), + ({"font_size": 11, "bold": False}, "ABC", 1.0 * (1 + 0.15 * 1)), + ({"font_size": 11, "bold": False}, "AbC", 1.0 * (1 + 0.15 * (2 / 3))), + ({"font_size": 11, "bold": True}, "ALLCAPS", 1.1 * (1 + 0.15 * 1)), + ( + {"font_size": 12, "bold": True}, + "MiXeD", + (12 / 11) * 1.1 * (1 + 0.15 * (3 / 5)), + ), + ({"font_size": 11, "bold": False}, "lower", 1.0), + ], + ) + def test__get_scaling_factor(self, testbook, format_dict, longest_line, expected): + got = testbook.ws._get_scaling_factor(format_dict, longest_line) + assert got == expected + + @pytest.mark.parametrize( + "cell_val,expected", + [ + ("short\nlongest\nmid", "longest"), + ("one line", "one line"), + (["a", "bb", "ccc"], "ccc"), + ("a\nbb\nccc", "ccc"), + ], + ) + def test__get_longest_line(self, testbook, cell_val, expected): + got = testbook.ws._get_longest_line(cell_val) + assert got == expected + + @pytest.mark.parametrize( + "cell_val,expected", + [ + ("abc", "abc"), + (123, "123"), + (["a", "b", "c"], "a\nb\nc"), + ({"x": 1, "y": 2}, "x\ny"), + (pd.Timestamp("2023-09-30 12:34:56"), "2023-09-30 12:34:56"), + ], + ) + def test__get_cell_string(self, testbook, cell_val, expected): + # Patch FormatList handling if needed + got = testbook.ws._get_cell_string(cell_val) + assert got == expected class TestGPWorkbookStatic: """ Test that the GPWorkbook static methods work as expected. """ - @pytest.mark.parametrize("input, expected", [ - ("no references", "no references"), - ("ref at end$$1$$", "ref at end"), - ("$$1$$ref at start", "ref at start"), - ("two$$1$$ refs$$2$$", "two refs"), - ("three$$1$$ refs$$2$$, wow$$3$$", "three refs, wow") - ]) + + @pytest.mark.parametrize( + "input, expected", + [ + ("no references", "no references"), + ("ref at end$$1$$", "ref at end"), + ("$$1$$ref at start", "ref at start"), + ("two$$1$$ refs$$2$$", "two refs"), + ("three$$1$$ refs$$2$$, wow$$3$$", "three refs, wow"), + ], + ) def test__strip_annotation_references(self, input, expected): assert GPWorkbook._strip_annotation_references(input) == expected - class TestGPWorkbook: """ Test that GPWorkbook initialisation and methods work as expected. - """ + """ + def test_subclass(self): """ Test that the GPWorkbook class is a subclass of the XlsxWriter Workbook class. """ - assert issubclass( - GPWorkbook, - xlsxwriter.Workbook - ) - + assert issubclass(GPWorkbook, xlsxwriter.Workbook) def test_default_theme_set(self, testbook): """ Test that the workbook theme is set to gptheme by default. - """ + """ assert testbook.wb.theme == gptheme - def test_valid_set_theme(self, testbook): """ @@ -532,19 +588,12 @@ def test_valid_set_theme(self, testbook): theme_config = {"title": {"bold": True}} theme = gptables.Theme(theme_config) testbook.wb.set_theme(theme) - - assert testbook.wb.theme == gptables.Theme(theme_config) + assert testbook.wb.theme == gptables.Theme(theme_config) - @pytest.mark.parametrize("not_a_theme", [ - dict(), - set(), - [], - 1, - 3.14, - "test_string", - pd.DataFrame() - ]) + @pytest.mark.parametrize( + "not_a_theme", [dict(), set(), [], 1, 3.14, "test_string", pd.DataFrame()] + ) def test_invalid_set_theme(self, not_a_theme, testbook): """ Test that setting theme with an object that is not a gptables.Theme @@ -553,7 +602,6 @@ def test_invalid_set_theme(self, not_a_theme, testbook): with pytest.raises(TypeError): testbook.wb.set_theme(not_a_theme) - def test__update_annotations(self, testbook, create_gptable_with_kwargs): """ Test that _update_annotations produces a correctly ordered list of @@ -566,7 +614,7 @@ def test__update_annotations(self, testbook, create_gptable_with_kwargs): "subtitles": ["Subtitle$$2$$"], "units": {0: "Unit$$3$$"}, "table_notes": {0: "Note$$4$$"}, - "table": table + "table": table, } kwargs2 = { @@ -574,7 +622,7 @@ def test__update_annotations(self, testbook, create_gptable_with_kwargs): "subtitles": ["Subtitle$$3$$"], "units": {0: "Unit$$5$$"}, "table_notes": {0: "Note$4$$"}, - "table": table + "table": table, } gptable1 = create_gptable_with_kwargs(kwargs1) @@ -586,17 +634,20 @@ def test__update_annotations(self, testbook, create_gptable_with_kwargs): assert gpworkbook._annotations == ["1", "2", "3", "4", "5"] - - @pytest.mark.parametrize("additional_elements,values", [ - (None, None), - (["scope"], ["scope"]), - ( - ["subtitles", "instructions", "scope", "source"], - [["subtitles"], "instructions", "scope", "source"] - ) - ]) - def test_make_table_of_contents(self, testbook, create_gptable_with_kwargs, - additional_elements, values): + @pytest.mark.parametrize( + "additional_elements,values", + [ + (None, None), + (["scope"], ["scope"]), + ( + ["subtitles", "instructions", "scope", "source"], + [["subtitles"], "instructions", "scope", "source"], + ), + ], + ) + def test_make_table_of_contents( + self, testbook, create_gptable_with_kwargs, additional_elements, values + ): """ Test that attributes are set as expected when contentsheet is created. """ @@ -604,24 +655,27 @@ def test_make_table_of_contents(self, testbook, create_gptable_with_kwargs, if additional_elements: kwargs.update(dict(zip(additional_elements, values))) - exp_toc = pd.DataFrame({ - "Sheet name": [{"sheet": "internal:'sheet'!A1"}], - "Table description": [["Sheet title", *kwargs.keys()]] - }) - exp_contentsheet = create_gptable_with_kwargs({ - "table_name": "contents_table", - "title": "Table of contents", - "instructions": "This worksheet contains one table.", - "table": exp_toc, - "index_columns": {2: 0} - }) + exp_toc = pd.DataFrame( + { + "Sheet name": [{"sheet": "internal:'sheet'!A1"}], + "Table description": [["Sheet title", *kwargs.keys()]], + } + ) + exp_contentsheet = create_gptable_with_kwargs( + { + "table_name": "contents_table", + "title": "Table of contents", + "instructions": "This worksheet contains one table.", + "table": exp_toc, + "index_columns": {2: 0}, + } + ) got_contentsheet = testbook.wb.make_table_of_contents( sheets={ - "sheet": create_gptable_with_kwargs({ - "title": "Sheet title", **kwargs - }) - }, additional_elements=list(kwargs.keys()) + "sheet": create_gptable_with_kwargs({"title": "Sheet title", **kwargs}) + }, + additional_elements=list(kwargs.keys()), ) assert_frame_equal(got_contentsheet.table, exp_contentsheet.table) @@ -631,7 +685,6 @@ def test_make_table_of_contents(self, testbook, create_gptable_with_kwargs, assert got_contentsheet.__dict__ == exp_contentsheet.__dict__ - def test_make_notesheet(self, testbook, create_gptable_with_kwargs): """ Test that creating a notes table sheet using `make_notesheet` generates @@ -639,7 +692,9 @@ def test_make_notesheet(self, testbook, create_gptable_with_kwargs): """ gpworkbook = testbook.wb gpworkbook._annotations = [1, 2] - dummy_table = pd.DataFrame(data={"Note number":[1, 2], "Note text":["text", "more text"]}) + dummy_table = pd.DataFrame( + data={"Note number": [1, 2], "Note text": ["text", "more text"]} + ) notes_name = "Just_a_notesheet" notes_title = "Are these the notes you're looking for?" @@ -649,14 +704,16 @@ def test_make_notesheet(self, testbook, create_gptable_with_kwargs): notes_table=dummy_table, table_name=notes_name, title=notes_title, - instructions=notes_instructions + instructions=notes_instructions, + ) + exp_notesheet = create_gptable_with_kwargs( + { + "table": dummy_table, + "table_name": notes_name, + "title": notes_title, + "instructions": notes_instructions, + } ) - exp_notesheet = create_gptable_with_kwargs({ - "table": dummy_table, - "table_name": notes_name, - "title": notes_title, - "instructions": notes_instructions - }) assert_frame_equal(got_notesheet.table, exp_notesheet.table) @@ -665,7 +722,6 @@ def test_make_notesheet(self, testbook, create_gptable_with_kwargs): assert got_notesheet.__dict__ == exp_notesheet.__dict__ - def test_notesheet_defaults(self, testbook, create_gptable_with_kwargs): """ Test that creating a notes table sheet with arguments set to defaults generates @@ -673,19 +729,23 @@ def test_notesheet_defaults(self, testbook, create_gptable_with_kwargs): """ gpworkbook = testbook.wb gpworkbook._annotations = [1, 2] - dummy_table = pd.DataFrame(data={"Note number":[1, 2], "Note text":["text", "more text"]}) + dummy_table = pd.DataFrame( + data={"Note number": [1, 2], "Note text": ["text", "more text"]} + ) notes_name = "notes_table" notes_title = "Notes" notes_instructions = "This worksheet contains one table." got_notesheet = gpworkbook.make_notesheet(notes_table=dummy_table) - exp_notesheet = create_gptable_with_kwargs({ - "table": dummy_table, - "table_name": notes_name, - "title": notes_title, - "instructions": notes_instructions - }) + exp_notesheet = create_gptable_with_kwargs( + { + "table": dummy_table, + "table_name": notes_name, + "title": notes_title, + "instructions": notes_instructions, + } + ) assert_frame_equal(got_notesheet.table, exp_notesheet.table) diff --git a/gptables/themes/example_theme_basic.yaml b/gptables/themes/example_theme_basic.yaml new file mode 100644 index 00000000..474a7a3b --- /dev/null +++ b/gptables/themes/example_theme_basic.yaml @@ -0,0 +1,19 @@ +global: + font_size: 13 + font_name: Arial + font_color: '#AD0000' + +title: + font_size: 20 + +subtitle: + font_size: 16 + +data: + text_wrap: 1 + +description_order: + - instructions + - source + - legend + - scope diff --git a/gptables/themes/example_theme_cover.yaml b/gptables/themes/example_theme_cover.yaml new file mode 100644 index 00000000..b02822a1 --- /dev/null +++ b/gptables/themes/example_theme_cover.yaml @@ -0,0 +1,19 @@ +global: + font_size: 13 + font_name: Arial + font_color: '#AD0000' + +cover_title: + font_size: 20 + +cover_subtitle: + font_size: 18 + +data: + text_wrap: 1 + +description_order: + - instructions + - source + - legend + - scope diff --git a/gptables/themes/gptheme.yaml b/gptables/themes/gptheme.yaml index f5f2e3ba..0da11def 100644 --- a/gptables/themes/gptheme.yaml +++ b/gptables/themes/gptheme.yaml @@ -19,7 +19,7 @@ cover_text: title: bold: True font_size: 16 - + subtitle: font_size: 14 @@ -56,4 +56,4 @@ description_order: - instructions - legend - source - - scope \ No newline at end of file + - scope diff --git a/gptables/themes/penguins_test_theme.yaml b/gptables/themes/penguins_test_theme.yaml deleted file mode 100644 index f89fb3af..00000000 --- a/gptables/themes/penguins_test_theme.yaml +++ /dev/null @@ -1,50 +0,0 @@ -global: - font_size: 12 - font_name: Arial - font_color: 'automatic' - -cover_title: - font_size: 17 - -cover_subtitle: - font_size: 15 - -cover_text: - -title: - font_size: 16 - -subtitle: - font_size: 15 - -instructions: - -scope: - -column_heading: - bottom: 1 - text_wrap: 1 - -index_1: - text_wrap: 1 - -index_2: - text_wrap: 1 - -index_3: - text_wrap: 1 - -data: - text_wrap: 1 - -source: - font_size: 12 - -legend: - font_size: 12 - -description_order: - - instructions - - source - - legend - - scope \ No newline at end of file diff --git a/gptables/utils/pickle_themes.py b/gptables/utils/pickle_themes.py index 607894e0..9aa2d0bf 100644 --- a/gptables/utils/pickle_themes.py +++ b/gptables/utils/pickle_themes.py @@ -1,19 +1,16 @@ -import gptables.core.theme -import pickle import glob -from os.path import abspath, dirname, join, basename, splitext +import pickle # nosec B403 +from os.path import abspath, basename, dirname, join, splitext + +import gptables.core.theme + def pickle_themes(): """ Utility function for updating theme pickles. """ package_dir = dirname(dirname(dirname(abspath(__file__)))) - theme_configs = glob.glob(join( - package_dir, - "gptables", - "themes", - "*.yaml") - ) + theme_configs = glob.glob(join(package_dir, "gptables", "themes", "*.yaml")) pickled_output_dir = join(package_dir, "gptables", "theme_pickles") @@ -23,10 +20,8 @@ def pickle_themes(): theme = gptables.core.theme.Theme(cfg) - pickle.dump( - theme, - open(out_file, "wb") - ) + pickle.dump(theme, open(out_file, "wb")) + if __name__ == "__main__": pickle_themes() diff --git a/gptables/utils/unpickle_themes.py b/gptables/utils/unpickle_themes.py index e399d5a9..416e6d4f 100644 --- a/gptables/utils/unpickle_themes.py +++ b/gptables/utils/unpickle_themes.py @@ -1,20 +1,23 @@ -import pickle -from pkg_resources import resource_filename +import pickle # nosec B403 + +import importlib_resources + class ThemeUnpickler(pickle.Unpickler): """ Points the unpickler to the Theme class. Allows unpickling for package init. """ - def find_class(self, module, name): - if name =="Theme": + + def find_class(self, module: str, name: str) -> type: + if name == "Theme": from gptables.core.theme import Theme + return Theme return super().find_class(module, name) -gptheme = ThemeUnpickler( - open( - resource_filename("gptables", "theme_pickles/gptheme.pickle"), - "rb" - ) - ).load() + +file = importlib_resources.files("gptables") / "theme_pickles/gptheme.pickle" + +with importlib_resources.as_file(file) as path: + gptheme = ThemeUnpickler(open(path, "rb")).load() diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 00000000..9ef86e08 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,63 @@ +site_name: gptables +repo_url: https://github.com/ONSdigital/gptables + +theme: + name: material + features: + - toc.integrate + - content.code.copy + +nav: + - Home: index.md + - Getting started: + - Tutorial: getting_started/tutorial.md + - Table elements: getting_started/elements.md + - How to: + - Add additional formatting: how_to/additional_formatting.md + - Add a custom theme: how_to/custom_theme.md + - API: + - API reference: api/api_reference.md + - Functions: + - produce_workbook(): api/functions/produce_workbook.md + - write_workbook(): api/functions/write_workbook.md + - Classes: + - Cover: api/classes/cover.md + - GPTable: api/classes/gptable.md + - GPWorkbook: api/classes/gpworkbook.md + - GPWorksheet: api/classes/gpworksheet.md + - Theme: api/classes/theme.md + - Reference: + - Accessibility checklist: reference/checklist.md + - Examples: reference/examples.md + - Glossary: reference/glossary.md + - About GPTables: + - Contributing guidance: info/contributing.md + - Changelog: info/changelog.md + - Contributors: info/contributors.md + +plugins: +- mkdocstrings: + handlers: + python: + paths: ["."] + options: + docstring_style: numpy + show_source: false + show_signature: true + show_signature_annotations: true + separate_signature: true + separate_signature: false + extra: + show_type_annotations: true + + filters: + - "!^__" # Hide methods like __init__() + - "!^_" # Hide non-public facing methods + +markdown_extensions: +- toc: + toc_depth: 1-2 +- admonition +- pymdownx.details +- pymdownx.superfences +- admonition diff --git a/pyproject.toml b/pyproject.toml index 77e832cc..beef946b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,8 +13,8 @@ authors = [ maintainers = [ {name = "Analysis Standards and Pipelines", email = "ASAP@ons.gov.uk"} ] -version = "1.2.0" -requires-python = '>=3.7' +version = "2.0.0" +requires-python = '>=3.9' description = 'Simplifying good practice in statistical tables.' readme = "README.rst" license = {text = "MIT License"} @@ -24,23 +24,29 @@ classifiers = [ "Operating System :: OS Independent" ] dependencies = [ - "pandas>=0.25.3", - "xlrd>=1.2.0", - "XlsxWriter>=1.2.6", - "pyyaml>=3.12" + "pandas>=2.3.2", + "xlrd>=2.0.2", + "XlsxWriter>=3.2.9", + "pyyaml>=6.0.2" ] [project.optional-dependencies] docs = [ - "sphinx>=2", - "sphinx_rtd_theme" + "mkdocs", + "mkdocs-material", + "mkdocs-git-revision-date-localized-plugin", + "mkdocs-print-site-plugin", + "mkdocstrings-python", + "mkdocs-material", + "pymdown-extensions" ] -testing = [ - "sphinx>=2", - "sphinx_rtd_theme", +dev = [ "coverage", - "pytest>=6.2.5", - "pytest-cov" + "pytest>=8.4.2", + "pytest-cov", + "pre-commit", + "importlib_resources", + "pytest-regressions" ] [version] @@ -48,4 +54,7 @@ file = "VERSION" [project.urls] Homepage = "https://github.com/best-practice-and-impact/gptables" -Documentation = "https://gptables.readthedocs.io/en/latest/" \ No newline at end of file +Documentation = "https://gptables.readthedocs.io/en/latest/" + +[tool.bandit.assert_used] +skips = ['*_test.py', '*/test_*.py'] diff --git a/readthedocs.yml b/readthedocs.yml deleted file mode 100644 index d7aea811..00000000 --- a/readthedocs.yml +++ /dev/null @@ -1,15 +0,0 @@ -# Read the Docs configuration file -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details - -# Required -version: 2 - -# Build documentation in the docs/ directory with Sphinx -sphinx: - configuration: docs/source/conf.py - -# Optionally set the version of Python and requirements required to build your docs -python: - version: 3.7 - install: - - requirements: docs/requirements.txt