diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..fe839678d --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,54 @@ +name: docs_pages_workflow + +on: [pull_request] + +permissions: + pull-requests: write + +jobs: + build_docs_job: + permissions: write-all + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: 3.8 + + - name: Get pip cache dir + id: pip-cache + run: | + python -m pip install --upgrade pip + echo "dir={$(pip cache dir)}" >> $GITHUB_OUTPUT + + - name: pip cache + uses: actions/cache@v3 + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-pip-${{ hashFiles('**/setup.py', '**/requirements.txt', '**/docs/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install docs requirements + run: | + python -m pip install -r docs/requirements.txt + + - name: make the sphinx docs + run: | + make -C docs clean + make -C docs html + + - uses: readthedocs/actions/preview@v1 + with: + project-slug: "trlx" + project-language: "en" + # see: https://github.com/readthedocs/actions/tree/main/preview + # message-template (optional): Text message to be injected by the action in the Pull Request description. It supports the following placeholders to be replaced: + # {docs-pr-index-url}: URL to the root of the documentation for the Pull Request preview. + # platform (optional): Read the Docs Community (community) or Read the Docs for Business (business). (default: community) + # single-version (optional): Set this to 'true' if your project is single version, so we can link to the correct URL. (default: 'false') diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d94d14f2a..cbf9fa775 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: -- repo: https://github.com/pre-commit/pre-commit-hooks + - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: check-case-conflict @@ -18,17 +18,24 @@ repos: args: [--fix=lf] - id: requirements-txt-fixer - id: trailing-whitespace -- repo: https://github.com/psf/black + - repo: https://github.com/psf/black rev: 23.1.0 hooks: - - id: black + - id: black files: ^(trlx|examples|tests|setup.py)/ -- repo: https://github.com/pycqa/isort + - repo: https://github.com/pycqa/isort rev: 5.12.0 hooks: - - id: isort + - id: isort name: isort (python) -- repo: https://github.com/pycqa/flake8 + - repo: https://github.com/pycqa/flake8 rev: 6.0.0 hooks: - - id: flake8 + - id: flake8 + - repo: https://github.com/codespell-project/codespell + rev: v2.2.2 + hooks: + - id: codespell + args: [--ignore-words, dictionary.txt] + additional_dependencies: + - tomli diff --git a/.readthedocs.yml b/.readthedocs.yml index c8f03ab0a..d5f60f2e8 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,9 +1,25 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required version: 2 +build: + os: "ubuntu-20.04" + tools: + python: "3.8" + +# Build documentation in the docs/ directory with Sphinx sphinx: - configuration: docs/source/conf.py + configuration: docs/conf.py + fail_on_warning: false + +# Optionally build your docs in additional formats such as PDF and ePub +formats: + - htmlzip +# Optionally set the version of Python and requirements required to build your docs python: - version: 3.9 install: - - requirements: docs/requirements.txt + - requirements: docs/requirements.txt diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..4c09f1f5f --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +gendoc: + docker build -t trlxgendocs -f docker/docs/Dockerfile . +run: + docker run --rm -it \ + -p 8000:8000 \ + --entrypoint python trlxgendocs -m http.server 8000 --directory build/docs/build/html + +sh: + docker run --rm -it \ + -p 8000:8000 \ + --entrypoint /bin/bash trlxgendocs diff --git a/dictionary.txt b/dictionary.txt new file mode 100644 index 000000000..212e125ae --- /dev/null +++ b/dictionary.txt @@ -0,0 +1,2 @@ +rouge +sart diff --git a/docker/docs/Dockerfile b/docker/docs/Dockerfile new file mode 100644 index 000000000..9247f61db --- /dev/null +++ b/docker/docs/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.8-slim + +# pip install -r docs/requirements.txt +# sphinx-build -b html docs docs/build/html -j auto +# sphinx-build -b html -D nb_execution_mode=off docs docs/build/html -j auto + +RUN python -m pip install --upgrade --no-cache-dir pip +ADD docs/requirements.txt /tmp/requirements.txt +RUN python -m pip install --exists-action=w --no-cache-dir -r /tmp/requirements.txt +RUN apt-get update && apt-get install make imagemagick -y --no-install-recommends \ + git \ + && rm -rf /var/lib/apt/lists/* +RUN mkdir /build +WORKDIR /build/ +ADD . . +RUN python -m pip install -e . +RUN cd docs && make html +ENTRYPOINT [ "python", "-m", "http.server", "8000" ] diff --git a/docs/Makefile b/docs/Makefile index d0c3cbf10..8d57a73b3 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -5,7 +5,7 @@ # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build -SOURCEDIR = source +SOURCEDIR = . BUILDDIR = build # Put it first so that "make" without argument is like "make help". @@ -14,7 +14,20 @@ help: .PHONY: help Makefile +logo: + convert trlx_logo_red.png -define icon:auto-resize=64,48,32,16 _static/favicon.ico + convert trlx_logo_red.png -resize 16x16 _static/favicon-16x16.png + convert trlx_logo_red.png -resize 57x57 _static/apple-touch-icon.png + convert trlx_logo_red.png -resize 57x57 _static/apple-touch-icon-57x57.png + convert trlx_logo_red.png -resize 72x72 _static/apple-touch-icon-72x72.png + convert trlx_logo_red.png -resize 76x76 _static/apple-touch-icon-76x76.png + convert trlx_logo_red.png -resize 114x114 _static/apple-touch-icon-114x114.png + convert trlx_logo_red.png -resize 120x120 _static/apple-touch-icon-120x120.png + convert trlx_logo_red.png -resize 144x144 _static/apple-touch-icon-144x144.png + convert trlx_logo_red.png -resize 152x152 _static/apple-touch-icon-152x152.png + convert trlx_logo_red.png -resize 180x180 _static/apple-touch-icon-180x180.png + # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile +%: Makefile logo @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..68e1d13e9 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,5 @@ +# How To build the documentation + +```bash +make html +``` diff --git a/docs/_static/apple-touch-icon-114x114.png b/docs/_static/apple-touch-icon-114x114.png new file mode 100644 index 000000000..8701f2095 Binary files /dev/null and b/docs/_static/apple-touch-icon-114x114.png differ diff --git a/docs/_static/apple-touch-icon-120x120.png b/docs/_static/apple-touch-icon-120x120.png new file mode 100644 index 000000000..15796b587 Binary files /dev/null and b/docs/_static/apple-touch-icon-120x120.png differ diff --git a/docs/_static/apple-touch-icon-144x144.png b/docs/_static/apple-touch-icon-144x144.png new file mode 100644 index 000000000..fb7e4b842 Binary files /dev/null and b/docs/_static/apple-touch-icon-144x144.png differ diff --git a/docs/_static/apple-touch-icon-152x152.png b/docs/_static/apple-touch-icon-152x152.png new file mode 100644 index 000000000..36b770945 Binary files /dev/null and b/docs/_static/apple-touch-icon-152x152.png differ diff --git a/docs/_static/apple-touch-icon-180x180.png b/docs/_static/apple-touch-icon-180x180.png new file mode 100644 index 000000000..a05c2e035 Binary files /dev/null and b/docs/_static/apple-touch-icon-180x180.png differ diff --git a/docs/_static/apple-touch-icon-57x57.png b/docs/_static/apple-touch-icon-57x57.png new file mode 100644 index 000000000..062cb08bd Binary files /dev/null and b/docs/_static/apple-touch-icon-57x57.png differ diff --git a/docs/_static/apple-touch-icon-72x72.png b/docs/_static/apple-touch-icon-72x72.png new file mode 100644 index 000000000..ee52ff6b8 Binary files /dev/null and b/docs/_static/apple-touch-icon-72x72.png differ diff --git a/docs/_static/apple-touch-icon-76x76.png b/docs/_static/apple-touch-icon-76x76.png new file mode 100644 index 000000000..1c7e77688 Binary files /dev/null and b/docs/_static/apple-touch-icon-76x76.png differ diff --git a/docs/_static/apple-touch-icon.png b/docs/_static/apple-touch-icon.png new file mode 100644 index 000000000..062cb08bd Binary files /dev/null and b/docs/_static/apple-touch-icon.png differ diff --git a/docs/_static/favicon-16x16.png b/docs/_static/favicon-16x16.png new file mode 100644 index 000000000..714c2e036 Binary files /dev/null and b/docs/_static/favicon-16x16.png differ diff --git a/docs/_static/favicon.ico b/docs/_static/favicon.ico new file mode 100644 index 000000000..bc86cfb2f Binary files /dev/null and b/docs/_static/favicon.ico differ diff --git a/docs/_static/style.css b/docs/_static/style.css new file mode 100644 index 000000000..27efb234a --- /dev/null +++ b/docs/_static/style.css @@ -0,0 +1,23 @@ +:root { + --block-bg-opacity: 0.5; +} + +.wy-side-nav-search { + background-color: #fff; +} + +.getting-started { + background-color: rgba(78, 150, 253, var(--block-bg-opacity)); +} + +.user-guides { + background-color: rgba(0, 169, 154, var(--block-bg-opacity)); +} + +.developer-docs { + background-color: rgba(171, 0, 182, var(--block-bg-opacity)); +} + +.key-ideas { + border: 0px; +} diff --git a/docs/_templates/layout.html b/docs/_templates/layout.html new file mode 100644 index 000000000..4c57ba830 --- /dev/null +++ b/docs/_templates/layout.html @@ -0,0 +1,2 @@ +{% extends "!layout.html" %} +{% set css_files = css_files + ["_static/style.css"] %} diff --git a/docs/build.sh b/docs/build.sh new file mode 100755 index 000000000..147ebab99 --- /dev/null +++ b/docs/build.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +`which sphinx-build` -T -E -b html -d _build/doctrees-readthedocs -D language=en . _build/html diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 000000000..32a8c2df3 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,187 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys + +sys.path.insert(0, os.path.abspath("..")) + + +# -- Project information ----------------------------------------------------- + +project = "trlX" +copyright = "2023, CarperAI" +author = "CarperAI" + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. + +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.intersphinx", + "sphinx.ext.mathjax", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "matplotlib.sphinxext.plot_directive", + "sphinx_autodoc_typehints", + "myst_nb", + # "myst_parser", + "sphinx_remove_toctrees", + "sphinx_copybutton", + "sphinx_design", +] + +intersphinx_mapping = { + "python": ("https://docs.python.org/3/", None), + "numpy": ("https://docs.scipy.org/doc/numpy/", None), + "scipy": ("https://docs.scipy.org/doc/scipy/reference/", None), + "pytorch": ("https://pytorch.readthedocs.io/", None), +} + +autodoc_preserve_defaults = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ["_templates"] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = [".rst", ".md"] + +# The master toctree document. +main_doc = "index" + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [ + # Sometimes sphinx reads its own outputs as inputs! + "build/html", +] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = None + +autosummary_generate = True +napolean_use_rtype = False + +# -- Options for nbsphinx ----------------------------------------------------- + +# Execute notebooks before conversion: 'always', 'never', 'auto' (default) +# We execute all notebooks, exclude the slow ones using 'exclude_patterns' +nbsphinx_execute = "always" + +# Use this kernel instead of the one stored in the notebook metadata: +# nbsphinx_kernel_name = 'python3' + +# List of arguments to be passed to the kernel that executes the notebooks: +# nbsphinx_execute_arguments = [] + +# If True, the build process is continued even if an exception occurs: +# nbsphinx_allow_errors = True + + +# Controls when a cell will time out (defaults to 30; use -1 for no timeout): +nbsphinx_timeout = 180 + +# Default Pygments lexer for syntax highlighting in code cells: +# nbsphinx_codecell_lexer = 'ipython3' + +# Width of input/output prompts used in CSS: +# nbsphinx_prompt_width = '8ex' + +# If window is narrower than this, input/output prompts are on separate lines: +# nbsphinx_responsive_width = '700px' + +# This is processed by Jinja2 and inserted before each notebook +nbsphinx_prolog = r""" # noqa: E501 +{% set docname = 'docs/' + env.doc2path(env.docname, base=None) %} +.. only:: html + .. role:: raw-html(raw) + :format: html + .. nbinfo:: + Interactive online version: + :raw-html:`Open In Colab` + __ https://github.com/CarperAI/trlx/blob/ + {{ env.config.release }}/{{ docname }} +""" + +# This is processed by Jinja2 and inserted after each notebook +# nbsphinx_epilog = r""" +# """ + +# Input prompt for code cells. "%s" is replaced by the execution count. +# nbsphinx_input_prompt = 'In [%s]:' + +# Output prompt for code cells. "%s" is replaced by the execution count. +# nbsphinx_output_prompt = 'Out[%s]:' + +# Specify conversion functions for custom notebook formats: +# import jupytext +# nbsphinx_custom_formats = { +# '.Rmd': lambda s: jupytext.reads(s, '.Rmd'), +# } + +# Link or path to require.js, set to empty string to disable +# nbsphinx_requirejs_path = '' + +# Options for loading require.js +# nbsphinx_requirejs_options = {'async': 'async'} + +# mathjax_config = { +# 'TeX': {'equationNumbers': {'autoNumber': 'AMS', 'useLabelIds': True}}, +# } + +# Additional files needed for generating LaTeX/PDF output: +# latex_additional_files = ['references.bib'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = "sphinx_book_theme" + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ["_static"] + +# Output file base name for HTML help builder. +htmlhelp_basename = "TRLXdoc" + +# -- Extension configuration ------------------------------------------------- + +# Tell sphinx-autodoc-typehints to generate stub parameter annotations including +# types, even if the parameters aren't explicitly documented. +always_document_param_types = True + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +html_theme_options = { + # "logo_only": True, + "show_toc_level": 2, + "repository_url": "https://github.com/CarperAI/trlx", + "use_repository_button": True, # add a "link to repository" button +} + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = "_static/apple-touch-icon-144x144.png" + +html_favicon = "_static/favicon-16x16.png" diff --git a/docs/source/configs.rst b/docs/configs.rst similarity index 86% rename from docs/source/configs.rst rename to docs/configs.rst index da5e1f2e6..0e2abd369 100644 --- a/docs/source/configs.rst +++ b/docs/configs.rst @@ -25,10 +25,10 @@ the specific method being used (i.e. ILQL or PPO) **PPO** -.. autoclass:: trlx.data.method_configs.PPOConfig +.. autoclass:: trlx.trainer.nn.ppo_models.MethodConfig :members: **ILQL** -.. autoclass:: trlx.data.method_configs.ILQLConfig +.. autoclass:: trlx.trainer.nn.ilql_models.ILQLConfig :members: diff --git a/docs/source/data.rst b/docs/data.rst similarity index 100% rename from docs/source/data.rst rename to docs/data.rst diff --git a/docs/examples.md b/docs/examples.md new file mode 100644 index 000000000..0a2ec49f0 --- /dev/null +++ b/docs/examples.md @@ -0,0 +1,26 @@ +# Examples + +In the `examples` folder you can find several example training tasks. + +Check the configs folder for the associated configs files. + +## randomwalks + +does offline reinforcement on a set of graph random walks to stitch shortest paths +to some destination. + +## simulacra + +optimizes prompts by using [prompts-ratings dataset](https://github.com/JD-P/simulacra-aesthetic-captions). + +## architext + +tries to optimize designs represented textually by minimizing number of rooms (pre-trained model is under a license on hf). + +## ilql_sentiments and ppo_sentiments + +train to generate movie reviews with a positive sentiment, in offline setting – by fitting to IMDB +dataset sentiment scores, and in online setting – by sampling finetuned on IMDB +model and rating samples with learned sentiment reward model, You can tweak +these scripts to your liking and tune hyperparameters to your problem if you +wish to use trlx for some custom task. diff --git a/docs/faq.md b/docs/faq.md new file mode 100644 index 000000000..e663db2d2 --- /dev/null +++ b/docs/faq.md @@ -0,0 +1,8 @@ +# Frequently Asked Questions + +```{admonition} How to add a new page to the documentation? +RST primer for Sphinx: https://thomas-cokelaer.info/tutorials/sphinx/rest_syntax.html +``` + +We are collecting here answers to frequently asked questions. +Contributions welcome! diff --git a/docs/favicon.ico b/docs/favicon.ico new file mode 100644 index 000000000..bc86cfb2f Binary files /dev/null and b/docs/favicon.ico differ diff --git a/docs/glossary.md b/docs/glossary.md new file mode 100644 index 000000000..aef6f47bb --- /dev/null +++ b/docs/glossary.md @@ -0,0 +1,81 @@ +# Glossary of Terms + +```{glossary} +[Agent]() + An agent in reinforcement learning is the entity that interacts with the {term}`Environment` to learn how to maximize its {term}`Reward`. + +[Action]() + An action in reinforcement learning is the signal that the {term}`Agent` provides to the {term}`Environment` to indicate what it wants to do. + + In other words, an action is a scalar value that the agent provides to the environment to indicate what it wants to do. The agent's goal is to maximize the total reward it receives over a sequence of {term}`Steps`. + +[CPU](https://en.wikipedia.org/wiki/Central_processing_unit) + Short for *Central Processing Unit*, CPUs are the standard computational architecture + available in most computers. trlX can run computations on CPUs, but often can achieve + much better performance on {term}`GPU` . + + +[Device](https://en.wikipedia.org/wiki/Device_computing) + The generic name used to refer to the {term}`CPU`, {term}`GPU`, used + by TRLX to perform computations. + +[Environment]() + An environment in reinforcement learning is the system that the agent interacts with. It is the source of {term}`State`, {term}`Action`, and {term}`Reward`. + + In other words, an environment is a system that defines the agent's observation space, action space, and reward function. It is the source of the agent's experience, and the goal of the agent is to maximize the total reward it receives over a sequence of {term}`Steps`. + +[GPU](https://en.wikipedia.org/wiki/Graphics_processing_unit) + Short for *Graphical Processing Unit*, GPUs were originally specialized for operations + related to rendering of images on screen, but now are much more general-purpose. TRLX is + able to target GPUs for fast operations on arrays (see also {term}`CPU`). + +[Policy]() + A policy in reinforcement learning is a function that maps {term}`State` to {term}`Action`. + + In other words, a policy is a function that maps the agent's current state to the action it should take. The agent's goal is to maximize the total reward it receives over a sequence of {term}`Steps`. + +[PPO](https://arxiv.org/abs/1707.06347) + Short for *Proximal Policy Optimization*, PPO is a {term}`Policy Gradient` algorithm + that is able to learn policies in high-dimensional, continuous action spaces. + +[Policy Gradient](https://spinningup.openai.com/en/latest/spinningup/rl_intro3.html#policy-gradients) + Policy gradient methods are a class of reinforcement learning algorithms that are able to + learn policies in high-dimensional, continuous action spaces. + +[Reinforcement Learning](https://en.wikipedia.org/wiki/Reinforcement_learning) + Reinforcement learning (RL) is a machine learning paradigm that trains an agent to maximize its + {term}`Reward` by interacting with an {term}`Environment`. + +[Reward]() + A reward in reinforcement learning is the signal that the {term}`Environment` provides to the {term}`Agent` to indicate how well it is performing. + + In other words, a reward is a scalar value that the environment provides to the agent to indicate how well it is performing. The agent's goal is to maximize the total reward it receives over a sequence of {term}`Steps`. + +[Rollout]() + A rollout in reinforcement learning is the process of executing a {term}`Policy`, starting from a specific state in the {term}`Environment`, and following it to the end to obtain a complete trajectory of {term}`State`, {term}`Action`, and {term}`Reward`. + + In other words, a Rollout is a simulation of a policy's behavior in the environment over a fixed number of {term}`Steps` or until a terminal state is reached. It provides a means of evaluating the {term}`Policy`'s performance, as the total reward collected over the trajectory can be used as a measure of its effectiveness. + +[State]() + A state in reinforcement learning is the observation that the {term}`Environment` provides to the {term}`Agent`. + +[Steps]() + A step in reinforcement learning is the process of taking a single {term}`Action` in the {term}`Environment`, and observing the resulting {term}`State` and {term}`Reward`. + + In other words, a step is a single iteration of the environment's dynamics, where the agent takes an action and receives a reward and a new state. The agent's goal is to maximize the total reward it receives over a sequence of steps. + +[Trajectory] + + In a {term}`PPO` (Proximal Policy Optimization) setup, a fixed-length trajectory + segment refers to a fixed number of time steps in an episode of an + environment.At each time step, the agent takes an action based on the current + state and receives a reward from the environment. By using fixed-length + trajectory segments, the agent's behavior is divided into chunks of a fixed + length, and each chunk is used for a single PPO update. This allows for more + efficient use of the {term}`Agent`'s experience by breaking it into smaller pieces, and + it also helps to stabilize the learning process by making the training updates + less sensitive to the length of the episode. Fixed-length trajectory segments + are often used in Reinforcement Learning (RL) algorithms, including {term}`PPO`, to + update the policy network. + +``` diff --git a/docs/source/index.rst b/docs/index.rst similarity index 70% rename from docs/source/index.rst rename to docs/index.rst index 1b2947593..0a91d3e50 100644 --- a/docs/source/index.rst +++ b/docs/index.rst @@ -8,16 +8,39 @@ Welcome to trlX's documentation! trlX is a library made for training large language models using reinforcement learning. It currently supports training using PPO or ILQL for models up to 20B using Accelerate. +Installation +------------ +.. code-block:: bash + + pip install "trlx" + + .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :caption: Contents: + README data - models configs pipeline + trainer + +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Examples + examples +.. toctree:: + :hidden: + :maxdepth: 1 + :caption: Resources + + faq + glossary + + Indices and tables ================== diff --git a/docs/pipeline.rst b/docs/pipeline.rst new file mode 100644 index 000000000..3d7c45192 --- /dev/null +++ b/docs/pipeline.rst @@ -0,0 +1,33 @@ +.. _pipeline: + +Pipelines and Rollout Store +*************************** + +*Pipelines* + +Pipelines in trlX provide a way to read from a dataset. They are used to fetch data from the dataset and feed it to the models for training or inference. The pipelines allow for efficient processing of the data and ensure that the models have access to the data they need for their tasks. + +.. autoclass:: trlx.pipeline.BasePipeline + :members: + +.. autoclass:: trlx.pipeline.BaseRolloutStore + :members: + + +*Rollout Stores* + +Rollout stores in trlX are used to store experiences created for the models by the orchestrator. The experiences in the rollout stores serve as the training data for the models. The models use the experiences stored in their rollout stores to learn and improve their behavior. The rollout stores provide a convenient and efficient way for the models to access the experiences they need for training. + + +**PPO** + +.. autoclass:: trlx.pipeline.ppo_pipeline.PPORolloutStorage + :members: + +**ILQL** + +.. autoclass:: trlx.pipeline.offline_pipeline.PromptPipeline + :members: + +.. autoclass:: trlx.pipeline.offline_pipeline.ILQLRolloutStorage + :members: diff --git a/docs/requirements.txt b/docs/requirements.txt index 7a33f300e..3052a2f0c 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,11 +1,20 @@ -accelerate==0.12.0 -datasets==2.4.0 -deepspeed==0.7.3 -einops==0.4.1 -numpy==1.23.2 -sphinx==4.0.0 -sphinx_rtd_theme +accelerate +commonmark +datasets +deepspeed +docutils +jupyter-sphinx +matplotlib +myst-nb +nbsphinx +Pygments +ray +readthedocs-sphinx-ext +rich +sphinx-autodoc-typehints +sphinx-book-theme +sphinx-copybutton +sphinx-design +sphinx-remove-toctrees torchtyping -tqdm==4.64.0 -transformers==4.21.2 -wandb==0.13.2 +transformers diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 0a9a11c86..000000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,54 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -import sys - -import sphinx_rtd_theme - -sys.path.insert(0, os.path.abspath('../..')) - - -# -- Project information ----------------------------------------------------- - -project = 'trlX' -copyright = '2022, CarperAI' -author = 'CarperAI' - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. - -extensions = ['sphinx_rtd_theme', 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.autosectionlabel'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'sphinx_rtd_theme' - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] diff --git a/docs/source/examples.rst b/docs/source/examples.rst deleted file mode 100644 index 6f5db49d1..000000000 --- a/docs/source/examples.rst +++ /dev/null @@ -1,18 +0,0 @@ -.. _examples: - -Examples -************************ - -In the ``examples`` folder you can find several example training tasks. Check -the configs folder for the associated configs files. ``examples.randomwalks`` -does offline reinforcement on a set of graph random walks to stitch shortest -paths to some destination. ``examples.simulacra`` optimizes prompts by using -prompts-ratings dataset (https://github.com/JD-P/simulacra-aesthetic-captions). -``examples.architext`` tries to optimize designs represented textually by -minimazing number of rooms (pretrained model is under a license on hf). -``examples.ilql_sentiments`` and ``examples.ppo_sentiments`` train to generate -movie reviews with a positive sentiment, in offline setting – by fitting to IMDB -dataset sentiment scores, and in online setting – by sampling finetuned on IMDB -model and rating samples with learned sentiment reward model, You can tweak -these scripts to your liking and tune hyperparameters to your problem if you -wish to use trlx for some custom task. diff --git a/docs/source/pipeline.rst b/docs/source/pipeline.rst deleted file mode 100644 index 68279d889..000000000 --- a/docs/source/pipeline.rst +++ /dev/null @@ -1,28 +0,0 @@ -.. _pipeline: - -Pipelines -************************ - -Pipelines are how you read from a dataset with trlX. Rollout stores are how models store experiences created -for them. It is these experiences in their rollout store that they are trained on. - -**General** - -.. autoclass:: trlx.pipeline.BasePipeline - :members: - -.. autoclass:: trlx.pipeline.BaseRolloutStore - :members: - -**PPO** - -.. autoclass:: trlx.pipeline.ppo_pipeline.PPORolloutStorage - :members: - -**ILQL** - -.. autoclass:: trlx.pipeline.offline_pipeline.PromptPipeline - :members: - -.. autoclass:: trlx.pipeline.offline_pipeline.ILQLRolloutStorage - :members: diff --git a/docs/source/trainer.rst b/docs/trainer.rst similarity index 100% rename from docs/source/trainer.rst rename to docs/trainer.rst diff --git a/docs/trlx_logo_bw.png b/docs/trlx_logo_bw.png new file mode 100644 index 000000000..cb0c86f6c Binary files /dev/null and b/docs/trlx_logo_bw.png differ diff --git a/docs/trlx_logo_red.png b/docs/trlx_logo_red.png new file mode 100644 index 000000000..91bfc378d Binary files /dev/null and b/docs/trlx_logo_red.png differ diff --git a/examples/experiments/grounded_program_synthesis/lang.py b/examples/experiments/grounded_program_synthesis/lang.py index d2436c3f6..9c3f076c0 100644 --- a/examples/experiments/grounded_program_synthesis/lang.py +++ b/examples/experiments/grounded_program_synthesis/lang.py @@ -109,7 +109,7 @@ def __call__(self, statement_string: str): # This is used to store the input, output and the function template. # Input : List given as an input to the function. # function_template : The atomic function in a given DSL Grammar -# Output : Transformed outut by applying function on the input. +# Output : Transformed output by applying function on the input. generation_template = {"function_template": "NONE", "output": "NONE", "input": []} diff --git a/examples/experiments/grounded_program_synthesis/train_trlx.py b/examples/experiments/grounded_program_synthesis/train_trlx.py index 8071fc210..6cfe793a0 100644 --- a/examples/experiments/grounded_program_synthesis/train_trlx.py +++ b/examples/experiments/grounded_program_synthesis/train_trlx.py @@ -17,7 +17,7 @@ def __init__(self): self.train_data = json.load(f) with open("dataset/test.json", "r") as f: self.test_data = json.load(f) - logger.info("Sucessfully loaded the dataset") + logger.info("Successfully loaded the dataset") def load_datapoints(self, split="train"): if split == "train": @@ -74,7 +74,7 @@ def main(hparams={}): if __name__ == "__main__": - # TEST REWARD FUNTION + # TEST REWARD FUNCTION assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -4]),1)"])) == [1] assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -a]),1)"])) == [-1] assert (reward_fn(["Input: 1 Output: [-4,-5,-2] Function: div_n(reverse([-2, -5, -3]),1)"])) == [-0.5] diff --git a/trlx/models/modeling_ilql.py b/trlx/models/modeling_ilql.py index d9e614a7f..7cf682eff 100644 --- a/trlx/models/modeling_ilql.py +++ b/trlx/models/modeling_ilql.py @@ -194,7 +194,7 @@ def sync_target_q_heads(self): class AutoModelForCausalLMWithILQLHeads(PreTrainedModelWrapper): - """An `AutoModel` class wrapper for `transformers` causal models wtih a language + """An `AutoModel` class wrapper for `transformers` causal models with a language modeling head and ILQL heads. References: diff --git a/trlx/models/modeling_nemo_ilql.py b/trlx/models/modeling_nemo_ilql.py index 31ac49a8a..17cfef358 100644 --- a/trlx/models/modeling_nemo_ilql.py +++ b/trlx/models/modeling_nemo_ilql.py @@ -666,7 +666,7 @@ def fwd_output_and_loss_func(batch: List[torch.Tensor], model, checkpoint_activa ) else: # In-between stages are given data via the pipeline engine - # Still need to specify thes arguments to avoid errors + # Still need to specify these arguments to avoid errors model_output = model(input_ids=None, position_ids=None, attention_mask=None) def gather_ntc(t: torch.Tensor): diff --git a/trlx/trainer/__init__.py b/trlx/trainer/__init__.py index 8e0d239df..ba2cce5e5 100644 --- a/trlx/trainer/__init__.py +++ b/trlx/trainer/__init__.py @@ -64,7 +64,7 @@ def sample(self, prompts: Iterable[str], length: int, n_samples: int) -> Iterabl :param prompts: List of prompts to tokenize and use as context - :param length: How many new tokens to genrate for each prompt + :param length: How many new tokens to generate for each prompt :type length: int :param n_samples: Default behavior is to take number of prompts as this diff --git a/trlx/trainer/accelerate_ppo_trainer.py b/trlx/trainer/accelerate_ppo_trainer.py index 16791f62a..84b02a7b6 100644 --- a/trlx/trainer/accelerate_ppo_trainer.py +++ b/trlx/trainer/accelerate_ppo_trainer.py @@ -278,7 +278,7 @@ def make_experience(self, num_rollouts: int = 1024, iter_count: int = 0): # noq while len(ppo_rl_elements) < num_rollouts: # Get next batch in prompt dataset and refresh if exhausted - # TOOD (jon-tow): Make `prompt_dataloader` a cyclic/infinite DataLoader to not require manually + # TODO (jon-tow): Make `prompt_dataloader` a cyclic/infinite DataLoader to not require manually # "refreshing" the contents of the `prompt_iterator` try: batch: PromptBatch = next(self.prompt_iterator)