diff --git a/rag-advanced/.gitignore b/rag-advanced/.gitignore index 0aef8997..7a2e750f 100644 --- a/rag-advanced/.gitignore +++ b/rag-advanced/.gitignore @@ -1,3 +1,5 @@ .env -data/wandb_docs/* \ No newline at end of file +data/wandb_docs/* + +.venv \ No newline at end of file diff --git a/rag-advanced/data/finance_docs/2022 Q3 AAPL.pdf b/rag-advanced/data/finance_docs/2022 Q3 AAPL.pdf new file mode 100644 index 00000000..f8af3dbe Binary files /dev/null and b/rag-advanced/data/finance_docs/2022 Q3 AAPL.pdf differ diff --git a/rag-advanced/data/finance_docs/2022 Q3 AMZN.pdf b/rag-advanced/data/finance_docs/2022 Q3 AMZN.pdf new file mode 100644 index 00000000..19d95cad Binary files /dev/null and b/rag-advanced/data/finance_docs/2022 Q3 AMZN.pdf differ diff --git a/rag-advanced/data/finance_docs/2022 Q3 INTC.pdf b/rag-advanced/data/finance_docs/2022 Q3 INTC.pdf new file mode 100644 index 00000000..c14a76d8 Binary files /dev/null and b/rag-advanced/data/finance_docs/2022 Q3 INTC.pdf differ diff --git a/rag-advanced/data/finance_docs/2022 Q3 MSFT.pdf b/rag-advanced/data/finance_docs/2022 Q3 MSFT.pdf new file mode 100644 index 00000000..3e3db06e Binary files /dev/null and b/rag-advanced/data/finance_docs/2022 Q3 MSFT.pdf differ diff --git a/rag-advanced/data/finance_docs/2022 Q3 NVDA.pdf b/rag-advanced/data/finance_docs/2022 Q3 NVDA.pdf new file mode 100644 index 00000000..c6b1993f Binary files /dev/null and b/rag-advanced/data/finance_docs/2022 Q3 NVDA.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q1 AAPL.pdf b/rag-advanced/data/finance_docs/2023 Q1 AAPL.pdf new file mode 100644 index 00000000..4620269e Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q1 AAPL.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q1 AMZN.pdf b/rag-advanced/data/finance_docs/2023 Q1 AMZN.pdf new file mode 100644 index 00000000..3b5c7e12 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q1 AMZN.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q1 INTC.pdf b/rag-advanced/data/finance_docs/2023 Q1 INTC.pdf new file mode 100644 index 00000000..27f35063 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q1 INTC.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q1 MSFT.pdf b/rag-advanced/data/finance_docs/2023 Q1 MSFT.pdf new file mode 100644 index 00000000..17e38e4d Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q1 MSFT.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q1 NVDA.pdf b/rag-advanced/data/finance_docs/2023 Q1 NVDA.pdf new file mode 100644 index 00000000..d102df3d Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q1 NVDA.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q2 AAPL.pdf b/rag-advanced/data/finance_docs/2023 Q2 AAPL.pdf new file mode 100644 index 00000000..7b8f1a60 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q2 AAPL.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q2 AMZN.pdf b/rag-advanced/data/finance_docs/2023 Q2 AMZN.pdf new file mode 100644 index 00000000..0fd39bcb Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q2 AMZN.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q2 INTC.pdf b/rag-advanced/data/finance_docs/2023 Q2 INTC.pdf new file mode 100644 index 00000000..4556c840 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q2 INTC.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q2 MSFT.pdf b/rag-advanced/data/finance_docs/2023 Q2 MSFT.pdf new file mode 100644 index 00000000..e87f0244 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q2 MSFT.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q2 NVDA.pdf b/rag-advanced/data/finance_docs/2023 Q2 NVDA.pdf new file mode 100644 index 00000000..34935357 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q2 NVDA.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q3 AAPL.pdf b/rag-advanced/data/finance_docs/2023 Q3 AAPL.pdf new file mode 100644 index 00000000..4fdfdcbc Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q3 AAPL.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q3 AMZN.pdf b/rag-advanced/data/finance_docs/2023 Q3 AMZN.pdf new file mode 100644 index 00000000..6faff905 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q3 AMZN.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q3 INTC.pdf b/rag-advanced/data/finance_docs/2023 Q3 INTC.pdf new file mode 100644 index 00000000..51ad57d2 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q3 INTC.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q3 MSFT.pdf b/rag-advanced/data/finance_docs/2023 Q3 MSFT.pdf new file mode 100644 index 00000000..f1057943 Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q3 MSFT.pdf differ diff --git a/rag-advanced/data/finance_docs/2023 Q3 NVDA.pdf b/rag-advanced/data/finance_docs/2023 Q3 NVDA.pdf new file mode 100644 index 00000000..cb35625c Binary files /dev/null and b/rag-advanced/data/finance_docs/2023 Q3 NVDA.pdf differ diff --git a/rag-advanced/notebooks/Chapter00.ipynb b/rag-advanced/notebooks/Chapter00.ipynb index 7f9d932e..9b2108c7 100644 --- a/rag-advanced/notebooks/Chapter00.ipynb +++ b/rag-advanced/notebooks/Chapter00.ipynb @@ -6,7 +6,7 @@ "source": [ "## Chapter 0: Setup\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", @@ -16,7 +16,7 @@ "\n", "### 🎉 Free Cohere API key\n", "\n", - "Before you run this colab notebook, head over to this [link to redeem a free Cohere API key](https://docs.google.com/forms/d/e/1FAIpQLSc9x4nV8_nSQvJnaINO1j9NIa2IUbAJqrKeSllNNCCbMFmCxw/viewform?usp=sf_link).\n", + "Before you run this colab notebook, head over to this [link to redeem a free Cohere API key](http://wandb.me/credits-event)\n", "\n", "Alternatively if you have a Cohere API key feel free to proceed. :)" ] @@ -27,7 +27,7 @@ "metadata": {}, "outputs": [], "source": [ - "!pip install -qq weave cohere" + "!pip install -qq weave litellm set-env-colab-kaggle-dotenv" ] }, { @@ -40,6 +40,18 @@ "The code cell below will prompt you to put in a W&B API key. You can get your API key by heading over to https://wandb.ai/authorize." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from set_env import set_env\n", + "set_env(\"WANDB_API_KEY\")\n", + "set_env(\"OPENAI_API_KEY\")\n", + "print(\"API keys set\")" + ] + }, { "cell_type": "code", "execution_count": null, @@ -50,16 +62,14 @@ "import weave\n", "\n", "# initialize weave client\n", - "weave_client = weave.init(\"rag-course\")" + "weave_client = weave.init(\"rag-course-finance\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## 2. Setup Cohere\n", - "\n", - "The code cell below will prompt you to put in a Cohere API key." + "## 2. Setup LiteLLM" ] }, { @@ -68,20 +78,14 @@ "metadata": {}, "outputs": [], "source": [ - "import getpass\n", - "\n", - "import cohere\n", - "\n", - "cohere_client = cohere.ClientV2(\n", - " api_key=getpass.getpass(\"Please enter your COHERE_API_KEY\")\n", - ")" + "from litellm import completion" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## A simple-turn chat with Cohere's command-r-plus" + "## A simple-turn chat with OpenAI's gpt-4o-mini" ] }, { @@ -90,11 +94,11 @@ "metadata": {}, "outputs": [], "source": [ - "response = cohere_client.chat(\n", + "response = completion(\n", " messages=[\n", " {\"role\": \"user\", \"content\": \"What is retrieval augmented generation (RAG)?\"}\n", " ],\n", - " model=\"command-r-plus\",\n", + " model=\"gpt-4o-mini\",\n", " temperature=0.1,\n", " max_tokens=2000,\n", ")" @@ -115,6 +119,18 @@ "kernelspec": { "display_name": "Python 3", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" } }, "nbformat": 4, diff --git a/rag-advanced/notebooks/Chapter01.ipynb b/rag-advanced/notebooks/Chapter01.ipynb index d7d79b4f..f4bd0812 100644 --- a/rag-advanced/notebooks/Chapter01.ipynb +++ b/rag-advanced/notebooks/Chapter01.ipynb @@ -6,7 +6,7 @@ "source": [ "# Chapter 1\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", @@ -23,11 +23,27 @@ "metadata": {}, "outputs": [], "source": [ - "!git clone https://github.com/wandb/edu.git\n", - "%cd edu/rag-advanced\n", - "!pip install -qqq -r requirements.txt\n", - "%cd notebooks\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", "\n", + "if IN_COLAB:\n", + " !git clone --branch rag-finance-workshop https://github.com/wandb/edu.git \n", + " %cd edu/rag-advanced\n", + " !pip install -qqq -r requirements.txt\n", + " %cd notebooks\n", + "else:\n", + " print(\"Not running in Google Colab. Skipping git clone and pip install commands.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "import nltk\n", "\n", "nltk.download(\"wordnet\")" @@ -48,13 +64,11 @@ "metadata": {}, "outputs": [], "source": [ - "import getpass\n", - "import os\n", - "\n", - "import wandb\n", + "from set_env import set_env\n", "\n", - "os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Please enter your COHERE_API_KEY\")\n", - "wandb.login()" + "set_env(\"OPENAI_API_KEY\")\n", + "set_env(\"WANDB_API_KEY\")\n", + "print(\"API keys set\")" ] }, { @@ -71,25 +85,13 @@ "from scripts.utils import display_source" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here, we will start a Weights & Biases run. We will be using this to download a [W&B Artifact](https://docs.wandb.ai/guides/artifacts) called `wandb_docs`. This is the raw Weights & Biases documentation. W&B Artifacts is suited for versiong different data sources which needs preprocessing/cleaning." - ] - }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "WANDB_PROJECT = \"rag-course\"\n", - "\n", - "run = wandb.init(\n", - " project=WANDB_PROJECT,\n", - " group=\"Chapter 1\",\n", - ")" + "WANDB_PROJECT = \"rag-course-finance\"" ] }, { @@ -100,9 +102,7 @@ "\n", "### Loading the data\n", "\n", - "Use [W&B Artifacts](https://docs.wandb.ai/guides/artifacts) to track and version data as the inputs and outputs of your W&B Runs. For example, a model training run might take in a dataset as input and produce a trained model as output. W&B Artifacts is a powerful object storage with rich UI functionalities.\n", - "\n", - "Below we are downloading an artifact named `wandb_docs` which will download 400 odd markdown files in your `../data/wandb_docs` directory. This is will be our data source." + "Below we are downloading a folder named `finance_docs` which will download 20 odd markdown files in your `../data/finance_docs` directory. This is will be our data source." ] }, { @@ -111,19 +111,9 @@ "metadata": {}, "outputs": [], "source": [ - "documents_artifact = run.use_artifact(\n", - " f\"rag-course/dev/wandb_docs:latest\", type=\"dataset\"\n", - ")\n", - "data_dir = \"../data/wandb_docs\"\n", - "\n", - "docs_dir = documents_artifact.download(data_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Let's inspect the `../data/wandb_docs` directory and look at the names of the first 5 files. We should see that they are all in markdown (`.md`) file format." + "from scripts.download_finance_docs import PDFProcessor\n", + "processor = PDFProcessor()\n", + "data = processor.load_pdf_documents()" ] }, { @@ -132,18 +122,14 @@ "metadata": {}, "outputs": [], "source": [ - "docs_dir = pathlib.Path(docs_dir)\n", - "docs_files = sorted(docs_dir.rglob(\"*.md\"))\n", - "\n", - "print(f\"Number of files: {len(docs_files)}\\n\")\n", - "print(\"First 5 files:\\n{files}\".format(files=\"\\n\".join(map(str, docs_files[:5]))))" + "docs_dir = \"../data/finance_docs\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Lets look at an example file. We can take the first element of the list (`docs_files`) and use the `Path.read_text` method to get the decoded contents of the file as a string." + "Let's inspect the `../data/finance_docs` directory and look at the names of the first 5 files. We should see that they are all in markdown (`.pdf`) file format.\n" ] }, { @@ -152,7 +138,11 @@ "metadata": {}, "outputs": [], "source": [ - "print(docs_files[0].read_text())" + "docs_dir = pathlib.Path(docs_dir)\n", + "docs_files = sorted(docs_dir.rglob(\"*.pdf\"))\n", + "\n", + "print(f\"Number of files: {len(docs_files)}\\n\")\n", + "print(\"First 5 files:\\n{files}\".format(files=\"\\n\".join(map(str, docs_files[:5]))))" ] }, { @@ -186,22 +176,18 @@ "metadata": {}, "outputs": [], "source": [ - "# We'll store the files as dictionaries with some content and metadata\n", - "data = []\n", - "for file in docs_files:\n", - " content = file.read_text()\n", - " data.append(\n", - " {\n", - " \"content\": content,\n", - " \"metadata\": {\n", - " \"source\": str(file.relative_to(docs_dir)),\n", - " \"raw_tokens\": len(content.split()),\n", - " },\n", - " }\n", - " )\n", "data[:2]" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(len(data))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -225,11 +211,6 @@ "source": [ "## W&B Weave\n", "\n", - "In the previous section we used W&B Artifacts to download the source documents.\n", - "\n", - "We could have used it to log our processed data but instead we will use W&B Weave for the task.\n", - "\n", - "Why?\n", "\n", "- W&B Weave is standalone and doesn't need backward compatibility with core Weights & Biases offerings.\n", "- W&B Weave is designed for modern LLMOps use case.\n", @@ -255,8 +236,6 @@ "metadata": {}, "outputs": [], "source": [ - "WANDB_PROJECT = \"rag-course\"\n", - "\n", "weave_client = weave.init(WANDB_PROJECT)" ] }, @@ -355,12 +334,8 @@ "metadata": {}, "outputs": [], "source": [ - "# download the `raw_data` Dataset\n", - "raw_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/raw_data:nuZosGsP58MXKxhluN2hzvKK9XB8xSxlTuOBmEzWzLo\"\n", - ").get()\n", "# uncomment the next line to get the raw data from weave from your own project instead\n", - "# raw_data = weave.ref(\"raw_data:v0\").get()\n", + "raw_data = weave.ref(\"raw_data:latest\").get()\n", "\n", "# this is how we index into the data\n", "print(raw_data.rows[:2])" @@ -389,13 +364,14 @@ "chunked_data = []\n", "for doc in raw_data.rows:\n", " chunks = split_into_chunks(doc[\"content\"])\n", - " for chunk in chunks:\n", + " for chunk_index, chunk in enumerate(chunks):\n", " chunked_data.append(\n", " {\n", " \"content\": chunk,\n", " \"metadata\": {\n", " \"source\": doc[\"metadata\"][\"source\"],\n", " \"raw_tokens\": len(chunk.split()),\n", + " \"chunk_index\": chunk_index,\n", " },\n", " }\n", " )\n", @@ -454,6 +430,15 @@ "print(cleaned_data[:2])" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(len(cleaned_data))" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -509,11 +494,7 @@ "metadata": {}, "outputs": [], "source": [ - "chunked_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/chunked_data:Lt6M8qCUICD1JZTlMYzuDLTVtvFYESxvj3tcAIoPrtE\"\n", - ").get()\n", - "# uncomment the next line to get the chunked data from weave from your own project instead\n", - "# chunked_data = weave.ref(\"chunked_data:v0\").get()\n", + "chunked_data = weave.ref(\"chunked_data:latest\").get()\n", "print(chunked_data.rows[:2])" ] }, @@ -596,7 +577,7 @@ "metadata": {}, "outputs": [], "source": [ - "query = \"How do I use W&B to log metrics in my training script?\"\n", + "query = \"How has Apple's total net sales changed over time?\"\n", "search_results = retriever.search(query)\n", "for result in search_results:\n", " print(result)" @@ -610,7 +591,7 @@ "\n", "There are two components of any RAG pipeline - a `Retriever` and a `ResponseGenerator`. Earlier, we designed a simple retriever. Here we are designing a simple `ResponseGenerator`.\n", "\n", - "The `generate_response` method takes the user question along with the retrieved context (chunks) as inputs and makes a LLM call using the `model` and `prompt` (system prompt). This way the generated answer is grounded on the documentation (our usecase). In this course we are using Cohere's `command-r` model.\n", + "The `generate_response` method takes the user question along with the retrieved context (chunks) as inputs and makes a LLM call using the `model` and `prompt` (system prompt). This way the generated answer is grounded on the documentation (our usecase).\n", "\n", "As earlier, we have wrapped this `ResponseGenerator` class with weave for tracking the inputs and the output." ] @@ -647,7 +628,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Let's generate the response for the question \"How do I use Weights & Biases to log metrics in my training script?\". We have already retrieved the context in the previous section and passing both the question and the context to the `generate_response` method." + "Let's generate the response for the question \"How has Apple's total net sales changed over time?\". We have already retrieved the context in the previous section and passing both the question and the context to the `generate_response` method." ] }, { @@ -656,9 +637,10 @@ "metadata": {}, "outputs": [], "source": [ - "response_generator = SimpleResponseGenerator(model=\"command-r\", prompt=INITIAL_PROMPT)\n", + "response_generator = SimpleResponseGenerator(model=\"gpt-4o-mini\", prompt=INITIAL_PROMPT)\n", "answer = response_generator.generate_response(query, search_results)\n", - "print(answer)" + "print(answer)\n", + "\n" ] }, { @@ -701,7 +683,7 @@ "outputs": [], "source": [ "# Initialize the response generator\n", - "response_generator = SimpleResponseGenerator(model=\"command-r\", prompt=INITIAL_PROMPT)\n", + "response_generator = SimpleResponseGenerator(model=\"gpt-4o-mini\", prompt=INITIAL_PROMPT)\n", "\n", "# Bring them together as a RAG pipeline\n", "rag_pipeline = SimpleRAGPipeline(\n", @@ -715,7 +697,7 @@ "metadata": {}, "outputs": [], "source": [ - "response = rag_pipeline.predict(\"How do I get get started with wandb?\")\n", + "response = rag_pipeline.predict(\"How has Apple's revenue from iPhone sales fluctuated across quarters?\")\n", "print(response, sep=\"\\n\")" ] }, @@ -738,7 +720,7 @@ "\n", "- **Data Processing**: How to ingest, chunk, and clean data using W&B Artifacts and Weave\n", "- **Retrieval**: Implementing a basic TF-IDF based retriever\n", - "- **Response Generation**: Using Cohere's API and `command-r` model to generate responses based on retrieved context\n", + "- **Response Generation**: Using Litellm's API and \"gpt-4o-mini\" model to generate responses based on retrieved context\n", "- **RAG Pipeline**: Combining retrieval and generation into a cohesive system\n", "- **Logging and Tracking**: Utilizing W&B Weave for efficient experiment tracking" ] @@ -752,6 +734,18 @@ "display_name": "rag-edu", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" } }, "nbformat": 4, diff --git a/rag-advanced/notebooks/Chapter02.ipynb b/rag-advanced/notebooks/Chapter02.ipynb index b9743845..8d100b5f 100644 --- a/rag-advanced/notebooks/Chapter02.ipynb +++ b/rag-advanced/notebooks/Chapter02.ipynb @@ -6,7 +6,7 @@ "source": [ "# Chapter 2:\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", @@ -27,12 +27,27 @@ "metadata": {}, "outputs": [], "source": [ - "# @title Setup\n", - "!git clone https://github.com/wandb/edu.git\n", - "%cd edu/rag-advanced\n", - "!pip install -qqq -r requirements.txt\n", - "%cd notebooks\n", - "\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", + "\n", + "if IN_COLAB:\n", + " !git clone --branch rag-finance-workshop https://github.com/wandb/edu.git\n", + " %cd edu/rag-advanced\n", + " !pip install -qqq -r requirements.txt\n", + " %cd notebooks\n", + "else:\n", + " print(\"Not running in Google Colab. Skipping git clone and pip install commands.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "import nltk\n", "\n", "nltk.download(\"wordnet\")" @@ -44,10 +59,11 @@ "metadata": {}, "outputs": [], "source": [ - "import getpass\n", - "import os\n", + "from set_env import set_env\n", "\n", - "os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Please enter your COHERE_API_KEY\")" + "set_env(\"WANDB_API_KEY\")\n", + "set_env(\"OPENAI_API_KEY\")\n", + "print(\"API keys set\")" ] }, { @@ -82,7 +98,7 @@ "metadata": {}, "outputs": [], "source": [ - "WANDB_PROJECT = \"rag-course\"\n", + "WANDB_PROJECT = \"rag-course-finance\"\n", "\n", "weave_client = weave.init(WANDB_PROJECT)" ] @@ -93,9 +109,9 @@ "source": [ "## Collecting data for evaluation\n", "\n", - "We are using a subset of the evaluation dataset we had created for wandbot.\n", + "We are using a subset of evaluation data from Docugami.\n", "\n", - "Learn more about how we created the evaluation dataset here:\n", + "Learn more about how evaluation datasets are created here:\n", "\n", "- [How to Evaluate an LLM, Part 1: Building an Evaluation Dataset for our LLM System](https://wandb.ai/wandbot/wandbot-eval/reports/How-to-Evaluate-an-LLM-Part-1-Building-an-Evaluation-Dataset-for-our-LLM-System--Vmlldzo1NTAwNTcy)\n", "- [How to Evaluate an LLM, Part 2: Manual Evaluation of Wandbot, our LLM-Powered Docs Assistant](https://wandb.ai/wandbot/wandbot-eval/reports/How-to-Evaluate-an-LLM-Part-2-Manual-Evaluation-of-Wandbot-our-LLM-Powered-Docs-Assistant--Vmlldzo1NzU4NTM3)\n", @@ -109,7 +125,7 @@ "\n", "The evaluation samples are logged as [`weave.Dataset`](https://wandb.github.io/weave/guides/core-types/datasets/). `weave.Dataset` enable you to collect examples for evaluation and automatically track versions for accurate comparisons.\n", "\n", - "Below we will download the latest version locally with a simple API." + "However for this use case we synthetically created the evaluation dataset from the Docugami data in `scripts/generate_context_list.py`." ] }, { @@ -119,9 +135,10 @@ "outputs": [], "source": [ "# Easy eval dataset with 20 samples.\n", - "eval_dataset = weave.ref(\n", - " \"weave:///rag-course/dev/object/Dataset:Qj4IFICc2EbdXu5A5UuhkPiWgxM1GvJMIvXEyv1DYnM\"\n", - ").get()\n", + "# eval_dataset = weave.ref(\n", + "# \"eval_data:latest\"\n", + "# ).get()\n", + "eval_dataset = weave.ref(\"weave:///a-sh0ts/rag-course-finance/object/eval_data:CoQDvdOENbZqkwg7IlhZm33drBCAf9OUNvf8ar6YHzM\").get()\n", "\n", "print(\"Number of evaluation samples: \", len(eval_dataset.rows))" ] @@ -165,12 +182,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Reload the data from Chapter 1\n", - "chunked_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/chunked_data:Lt6M8qCUICD1JZTlMYzuDLTVtvFYESxvj3tcAIoPrtE\"\n", - ").get()\n", - "# uncomment the next line to get the chunked data from weave from your own project instead\n", - "# chunked_data = weave.ref(\"chunked_data:v0\").get()\n", + "chunked_data = weave.ref(\"chunked_data:latest\").get()\n", "print(\"Number of chunked data: \", len(chunked_data.rows))\n", "chunked_data.rows[:2]" ] @@ -322,7 +334,7 @@ "from scripts.response_generator import SimpleResponseGenerator\n", "\n", "INITIAL_PROMPT = open(\"prompts/initial_system.txt\", \"r\").read()\n", - "response_generator = SimpleResponseGenerator(model=\"command-r\", prompt=INITIAL_PROMPT)\n", + "response_generator = SimpleResponseGenerator(model=\"gpt-4o-mini\", prompt=INITIAL_PROMPT)\n", "rag_pipeline = SimpleRAGPipeline(\n", " retriever=retriever, response_generator=response_generator, top_k=5\n", ")" @@ -428,6 +440,18 @@ "display_name": "rag-edu", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" } }, "nbformat": 4, diff --git a/rag-advanced/notebooks/Chapter03.ipynb b/rag-advanced/notebooks/Chapter03.ipynb index 09568d7b..71010829 100644 --- a/rag-advanced/notebooks/Chapter03.ipynb +++ b/rag-advanced/notebooks/Chapter03.ipynb @@ -6,7 +6,7 @@ "source": [ "# Chapter 3 \n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", @@ -36,11 +36,27 @@ "metadata": {}, "outputs": [], "source": [ - "!git clone https://github.com/wandb/edu.git\n", - "%cd edu/rag-advanced\n", - "!pip install -qqq -r requirements.txt\n", - "%cd notebooks\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", "\n", + "if IN_COLAB:\n", + " !git clone --branch rag-finance-workshop https://github.com/wandb/edu.git\n", + " %cd edu/rag-advanced\n", + " !pip install -qqq -r requirements.txt\n", + " %cd notebooks\n", + "else:\n", + " print(\"Not running in Google Colab. Skipping git clone and pip install commands.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "import nltk\n", "\n", "nltk.download(\"wordnet\")\n", @@ -66,10 +82,11 @@ "metadata": {}, "outputs": [], "source": [ - "import getpass\n", - "import os\n", + "from set_env import set_env\n", "\n", - "os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Please enter your COHERE_API_KEY\")" + "set_env(\"WANDB_API_KEY\")\n", + "set_env(\"OPENAI_API_KEY\")\n", + "print(\"API keys set\")" ] }, { @@ -95,7 +112,7 @@ "metadata": {}, "outputs": [], "source": [ - "WANDB_PROJECT = \"rag-course\"\n", + "WANDB_PROJECT = \"rag-course-finance\"\n", "\n", "weave_client = weave.init(WANDB_PROJECT)" ] @@ -124,12 +141,7 @@ "metadata": {}, "outputs": [], "source": [ - "# download the `raw_data` Dataset from chapter 1\n", - "raw_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/raw_data:nuZosGsP58MXKxhluN2hzvKK9XB8xSxlTuOBmEzWzLo\"\n", - ").get()\n", - "# uncomment the next line to get the raw data from weave from your own project instead\n", - "# raw_data = weave.ref(\"raw_data:v0\").get()\n", + "raw_data = weave.ref(\"raw_data:latest\").get()\n", "\n", "# this is how we index into the data\n", "print(raw_data.rows[:2])\n", @@ -150,10 +162,10 @@ "\n", "**The `tokenize_text` Function**: This function tokenizes input text using Cohere's tokenization API. Here's how it works:\n", "\n", - "1. It initializes a Cohere client using an API key stored in environment variables.\n", - "2. It calls the `tokenize` method of the Cohere client, passing:\n", + "1. It initializes a LiteLLM client using an API key stored in environment variables.\n", + "2. It calls the `tokenize` method of the LiteLLM client, passing:\n", " - The input `text`\n", - " - The specified `model` (defaulting to \"command-r\")\n", + " - The specified `model` (defaulting to \"gpt-4o-mini\")\n", " - `offline=True` to use a locally cached tokenizer for efficiency\n", "\n", "The function returns a list of tokens, which are subword units that the model uses to process text.\n", @@ -367,12 +379,7 @@ "metadata": {}, "outputs": [], "source": [ - "# download the `parsed_data` Dataset\n", - "parsed_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/parsed_data:UhWHAwXzvIcYaZ3X1x4eX2KDyYhCM4TPSsj8Oq8dLq4\"\n", - ").get()\n", - "# uncomment the next line to get the parsed data from weave from your own project instead\n", - "# parsed_data = weave.ref(\"parsed_data:v0\").get()\n", + "parsed_data = weave.ref(\"parsed_data:latest\").get()\n", "\n", "# this is how we index into the data\n", "print(parsed_data.rows[:2])\n", @@ -424,11 +431,7 @@ "outputs": [], "source": [ "# fetch the chunked data\n", - "chunked_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/chunked_data:Ij9KThmiZQ9ljpCm8rVXTJlCaAbY2qC0zX6UJkBWHQ0\"\n", - ").get()\n", - "# uncomment the next line to get the chunked data from weave from your own project instead\n", - "# chunked_data = weave.ref(\"chunked_data:latest\").get()\n", + "chunked_data = weave.ref(\"chunked_data:latest\").get()\n", "\n", "# this is how we index into the data\n", "print(chunked_data.rows[:2])\n", @@ -436,31 +439,6 @@ "chunked_data = list(map(dict, chunked_data.rows[:]))" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mean_chunk_size = np.mean([doc[\"metadata\"][\"parsed_tokens\"] for doc in chunked_data])\n", - "std_chunk_size = np.std([doc[\"metadata\"][\"parsed_tokens\"] for doc in chunked_data])\n", - "print(f\"Mean chunk size: {mean_chunk_size}, Std chunk size: {std_chunk_size}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# if you run your own chunking method, you can publish the chunked data in a weave Dataset\n", - "# # Again, we'll store the chunked data in a weave Dataset\n", - "# chunked_data = weave.Dataset(name=\"chunked_data\", rows=chunked_data)\n", - "\n", - "# # publish the dataset\n", - "# weave.publish(chunked_data)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -527,7 +505,7 @@ "outputs": [], "source": [ "INITIAL_PROMPT = open(\"prompts/initial_system.txt\", \"r\").read()\n", - "response_generator = SimpleResponseGenerator(model=\"command-r\", prompt=INITIAL_PROMPT)\n", + "response_generator = SimpleResponseGenerator(model=\"gpt-4o-mini\", prompt=INITIAL_PROMPT)\n", "bm25_rag_pipeline = SimpleRAGPipeline(\n", " retriever=bm25_retriever, response_generator=response_generator, top_k=5\n", ")\n", @@ -576,9 +554,10 @@ "metadata": {}, "outputs": [], "source": [ - "eval_dataset = weave.ref(\n", - " \"weave:///rag-course/dev/object/Dataset:Qj4IFICc2EbdXu5A5UuhkPiWgxM1GvJMIvXEyv1DYnM\"\n", - ").get()\n", + "# eval_dataset = weave.ref(\n", + "# \"eval_data:latest\"\n", + "# ).get()\n", + "eval_dataset = weave.ref(\"weave:///a-sh0ts/rag-course-finance/object/eval_data:CoQDvdOENbZqkwg7IlhZm33drBCAf9OUNvf8ar6YHzM\").get()\n", "\n", "print(\"Number of evaluation samples: \", len(eval_dataset.rows))" ] @@ -695,9 +674,8 @@ "source": [ "# Exercise\n", "\n", - "1. Add more data sources to the RAG system. - Add Jupyter Notbooks from the See wandb/examples repo.\n", - "2. Use a different chunking method. - Try your own parsing and chunking method.\n", - "3. Use a small-to-big retrieval method. Where we embed small documents but retrieve big documents -> You can add the parent document to the metadata and modify the `Retriever.search` method." + "1. Use a different chunking method. - Try your own parsing and chunking method.\n", + "2. Use a small-to-big retrieval method. Where we embed small documents but retrieve big documents -> You can add the parent document to the metadata and modify the `Retriever.search` method." ] } ], @@ -706,6 +684,18 @@ "display_name": "rag-edu", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" } }, "nbformat": 4, diff --git a/rag-advanced/notebooks/Chapter04.ipynb b/rag-advanced/notebooks/Chapter04.ipynb index 138936fa..347d1608 100644 --- a/rag-advanced/notebooks/Chapter04.ipynb +++ b/rag-advanced/notebooks/Chapter04.ipynb @@ -6,7 +6,7 @@ "source": [ "# Chapter 4\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", @@ -49,11 +49,27 @@ "metadata": {}, "outputs": [], "source": [ - "!git clone https://github.com/wandb/edu.git\n", - "%cd edu/rag-advanced\n", - "!pip install -qqq -r requirements.txt\n", - "%cd notebooks\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", "\n", + "if IN_COLAB:\n", + " !git clone --branch rag-finance-workshop https://github.com/wandb/edu.git\n", + " %cd edu/rag-advanced\n", + " !pip install -qqq -r requirements.txt\n", + " %cd notebooks\n", + "else:\n", + " print(\"Not running in Google Colab. Skipping git clone and pip install commands.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "import nltk\n", "\n", "nltk.download(\"wordnet\")\n", @@ -79,10 +95,11 @@ "metadata": {}, "outputs": [], "source": [ - "import getpass\n", - "import os\n", + "from set_env import set_env\n", "\n", - "os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Please enter your COHERE_API_KEY\")" + "set_env(\"WANDB_API_KEY\")\n", + "set_env(\"OPENAI_API_KEY\")\n", + "print(\"API keys set\")" ] }, { @@ -97,7 +114,6 @@ "\n", "nest_asyncio.apply()\n", "\n", - "import cohere\n", "import weave\n", "from IPython.display import Markdown" ] @@ -108,7 +124,7 @@ "metadata": {}, "outputs": [], "source": [ - "WANDB_PROJECT = \"rag-course\"\n", + "WANDB_PROJECT = \"rag-course-finance\"\n", "\n", "weave_client = weave.init(WANDB_PROJECT)" ] @@ -132,12 +148,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Reload the data from Chapter 3\n", - "chunked_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/chunked_data:Ij9KThmiZQ9ljpCm8rVXTJlCaAbY2qC0zX6UJkBWHQ0\"\n", - ").get()\n", - "# uncomment the next line to get the chunked data from weave from your own project instead\n", - "# chunked_data = weave.ref(\"chunked_data:latest\").get()\n", + "chunked_data = weave.ref(\"chunked_data:latest\").get()\n", "\n", "print(chunked_data.rows[:2])\n", "\n", @@ -183,7 +194,7 @@ "source": [ "## Analyzing Query Enhancement Results\n", "\n", - "Let's examine the output of our `QueryEnhancer` for the input: \"How do I log images in lightning with wandb?\"" + "Let's examine the output of our `QueryEnhancer` for the input: \"In Intel's Q3 2022 report, compare the capital expenditure data with future investment plans outlined in the management's discussion.\"" ] }, { @@ -192,7 +203,7 @@ "metadata": {}, "outputs": [], "source": [ - "response = await query_enhancer.predict(\"How do I log images in lightning with wandb?\")\n", + "response = await query_enhancer.predict(\"In Intel's Q3 2022 report, compare the capital expenditure data with future investment plans outlined in the management's discussion.\")\n", "response" ] }, @@ -205,16 +216,11 @@ "1. **Language Detection**: The query is identified as English ('en'). This allows our system to respond in the appropriate language.\n", "\n", "2. **Sub-query Generation**: The original query is broken down into more specific sub-queries:\n", - " - \"How to log images in lightning with wandb\"\n", - " - \"How to log images in lightning\"\n", - " - \"Log images wandb\"\n", - " - \"Wandb image logging\"\n", - " - \"Log images in lightning\"\n", " \n", " These sub-queries help capture different aspects of the original question, potentially improving retrieval accuracy.\n", "\n", - "3. **Intent Classification**: The query is classified under the \"integrations\" intent. This suggests the user is asking about a specific integration between Lightning and Weights & Biases.\n", - "\n", + "3. **Intent Classification**: The query is classified under a specific intent.\n", + " \n", "By leveraging this enhanced query information, our RAG system can now perform more targeted retrieval and generate more relevant, context-aware responses." ] }, @@ -271,9 +277,9 @@ "metadata": {}, "outputs": [], "source": [ - "from scripts.response_generator import QueryEnhanedResponseGenerator\n", + "from scripts.response_generator import QueryEnhancedResponseGenerator\n", "\n", - "display_source(QueryEnhanedResponseGenerator)" + "display_source(QueryEnhancedResponseGenerator)" ] }, { @@ -346,8 +352,8 @@ "# lets add the new prompt\n", "QUERY_ENHANCED_PROMPT = open(\"prompts/query_enhanced_system.txt\").read()\n", "\n", - "response_generator = QueryEnhanedResponseGenerator(\n", - " model=\"command-r\", prompt=QUERY_ENHANCED_PROMPT, client=cohere.AsyncClientV2()\n", + "response_generator = QueryEnhancedResponseGenerator(\n", + " model=\"gpt-4o-mini\", prompt=QUERY_ENHANCED_PROMPT\n", ")" ] }, @@ -365,7 +371,7 @@ ")\n", "\n", "response = await query_enhanced_rag_pipeline.predict(\n", - " \"How do I log images in lightning with wandb?\"\n", + " \"In Intel's Q3 2022 report, compare the capital expenditure data with future investment plans outlined in the management's discussion.\"\n", ")\n", "\n", "\n", @@ -410,9 +416,10 @@ "metadata": {}, "outputs": [], "source": [ - "eval_dataset = weave.ref(\n", - " \"weave:///rag-course/dev/object/Dataset:Qj4IFICc2EbdXu5A5UuhkPiWgxM1GvJMIvXEyv1DYnM\"\n", - ").get()\n", + "# eval_dataset = weave.ref(\n", + "# \"eval_data:latest\"\n", + "# ).get()\n", + "eval_dataset = weave.ref(\"weave:///a-sh0ts/rag-course-finance/object/eval_data:CoQDvdOENbZqkwg7IlhZm33drBCAf9OUNvf8ar6YHzM\").get()\n", "\n", "print(eval_dataset.rows[:2])" ] @@ -429,7 +436,7 @@ "from scripts.response_generator import SimpleResponseGenerator\n", "\n", "INITIAL_PROMPT = open(\"prompts/initial_system.txt\", \"r\").read()\n", - "response_generator = SimpleResponseGenerator(model=\"command-r\", prompt=INITIAL_PROMPT)\n", + "response_generator = SimpleResponseGenerator(model=\"gpt-4o-mini\", prompt=INITIAL_PROMPT)\n", "simple_rag_pipeline = SimpleRAGPipeline(\n", " retriever=retriever, response_generator=response_generator, top_k=5\n", ")" @@ -562,6 +569,18 @@ "display_name": "rag-edu", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" } }, "nbformat": 4, diff --git a/rag-advanced/notebooks/Chapter05.ipynb b/rag-advanced/notebooks/Chapter05.ipynb index 597e440b..8914e2ea 100644 --- a/rag-advanced/notebooks/Chapter05.ipynb +++ b/rag-advanced/notebooks/Chapter05.ipynb @@ -6,7 +6,7 @@ "source": [ "# Chapter 5\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", @@ -29,12 +29,27 @@ "metadata": {}, "outputs": [], "source": [ - "# @title Setup\n", - "!git clone https://github.com/wandb/edu.git\n", - "%cd edu/rag-advanced\n", - "!pip install -qqq -r requirements.txt\n", - "%cd notebooks\n", - "\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", + "\n", + "if IN_COLAB:\n", + " !git clone --branch rag-finance-workshop https://github.com/wandb/edu.git\n", + " %cd edu/rag-advanced\n", + " !pip install -qqq -r requirements.txt\n", + " %cd notebooks\n", + "else:\n", + " print(\"Not running in Google Colab. Skipping git clone and pip install commands.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "import nltk\n", "\n", "nltk.download(\"wordnet\")\n", @@ -47,10 +62,12 @@ "metadata": {}, "outputs": [], "source": [ - "import getpass\n", - "import os\n", + "from set_env import set_env\n", "\n", - "os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Please enter your COHERE_API_KEY\")" + "set_env(\"WANDB_API_KEY\")\n", + "set_env(\"OPENAI_API_KEY\")\n", + "set_env(\"COHERE_API_KEY\")\n", + "print(\"API keys set\")" ] }, { @@ -74,7 +91,7 @@ "metadata": {}, "outputs": [], "source": [ - "WANDB_PROJECT = \"rag-course\"\n", + "WANDB_PROJECT = \"rag-course-finance\"\n", "\n", "weave_client = weave.init(WANDB_PROJECT)" ] @@ -94,7 +111,7 @@ "source": [ "# Reload the data from Chapter 3\n", "chunked_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/chunked_data:Ij9KThmiZQ9ljpCm8rVXTJlCaAbY2qC0zX6UJkBWHQ0\"\n", + " \"chunked_data:latest\"\n", ").get()\n", "# uncomment the next line to get the chunked data from weave from your own project instead\n", "# chunked_data = weave.ref(\"chunked_data:latest\").get()\n", @@ -124,12 +141,12 @@ "metadata": {}, "outputs": [], "source": [ - "from scripts.reranker import CohereReranker\n", + "from scripts.reranker import LiteLLMReranker\n", "from scripts.retriever import DenseRetriever, DenseRetrieverWithReranker\n", "from scripts.utils import display_source\n", "\n", "display_source(DenseRetriever)\n", - "display_source(CohereReranker)\n", + "display_source(LiteLLMReranker)\n", "display_source(DenseRetrieverWithReranker)" ] }, @@ -158,9 +175,10 @@ "source": [ "from scripts.retrieval_metrics import IR_METRICS\n", "\n", - "eval_dataset = weave.ref(\n", - " \"weave:///rag-course/dev/object/Dataset:Qj4IFICc2EbdXu5A5UuhkPiWgxM1GvJMIvXEyv1DYnM\"\n", - ").get()\n", + "# eval_dataset = weave.ref(\n", + "# \"eval_data:latest\"\n", + "# ).get()\n", + "eval_dataset = weave.ref(\"weave:///a-sh0ts/rag-course-finance/object/eval_data:CoQDvdOENbZqkwg7IlhZm33drBCAf9OUNvf8ar6YHzM\").get()\n", "\n", "retrieval_evaluation = weave.Evaluation(\n", " name=\"Dense Retrieval Evaluation\",\n", @@ -197,9 +215,10 @@ "source": [ "from scripts.retrieval_metrics import IR_METRICS\n", "\n", - "eval_dataset = weave.ref(\n", - " \"weave:///rag-course/dev/object/Dataset:Qj4IFICc2EbdXu5A5UuhkPiWgxM1GvJMIvXEyv1DYnM\"\n", - ").get()\n", + "# eval_dataset = weave.ref(\n", + "# \"eval_data:latest\"\n", + "# ).get()\n", + "eval_dataset = weave.ref(\"weave:///a-sh0ts/rag-course-finance/object/eval_data:CoQDvdOENbZqkwg7IlhZm33drBCAf9OUNvf8ar6YHzM\").get()\n", "\n", "retrieval_evaluation = weave.Evaluation(\n", " name=\"Dense Retrieval Rerank Evaluation\",\n", @@ -258,9 +277,10 @@ "metadata": {}, "outputs": [], "source": [ - "eval_dataset = weave.ref(\n", - " \"weave:///rag-course/dev/object/Dataset:Qj4IFICc2EbdXu5A5UuhkPiWgxM1GvJMIvXEyv1DYnM\"\n", - ").get()\n", + "# eval_dataset = weave.ref(\n", + "# \"eval_data:latest\"\n", + "# ).get()\n", + "eval_dataset = weave.ref(\"weave:///a-sh0ts/rag-course-finance/object/eval_data:CoQDvdOENbZqkwg7IlhZm33drBCAf9OUNvf8ar6YHzM\").get()\n", "\n", "retrieval_evaluation = weave.Evaluation(\n", " name=\"Dense Retrieval Rerank Evaluation\",\n", @@ -278,6 +298,18 @@ "display_name": "rag-edu", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" } }, "nbformat": 4, diff --git a/rag-advanced/notebooks/Chapter06.ipynb b/rag-advanced/notebooks/Chapter06.ipynb index 1bf36e7a..bf4968d2 100644 --- a/rag-advanced/notebooks/Chapter06.ipynb +++ b/rag-advanced/notebooks/Chapter06.ipynb @@ -6,7 +6,7 @@ "source": [ "# Chapter 6\n", "\n", - "\n", + "\n", " \"Open\n", "\n", "\n", @@ -40,11 +40,27 @@ "metadata": {}, "outputs": [], "source": [ - "!git clone https://github.com/wandb/edu.git\n", - "%cd edu/rag-advanced\n", - "!pip install -qqq -r requirements.txt\n", - "%cd notebooks\n", + "try:\n", + " import google.colab\n", + " IN_COLAB = True\n", + "except ImportError:\n", + " IN_COLAB = False\n", "\n", + "if IN_COLAB:\n", + " !git clone --branch rag-finance-workshop https://github.com/wandb/edu.git\n", + " %cd edu/rag-advanced\n", + " !pip install -qqq -r requirements.txt\n", + " %cd notebooks\n", + "else:\n", + " print(\"Not running in Google Colab. Skipping git clone and pip install commands.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "import nltk\n", "\n", "nltk.download(\"wordnet\")\n", @@ -71,10 +87,12 @@ "metadata": {}, "outputs": [], "source": [ - "import getpass\n", - "import os\n", + "from set_env import set_env\n", "\n", - "os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Please enter your COHERE_API_KEY\")" + "set_env(\"WANDB_API_KEY\")\n", + "set_env(\"OPENAI_API_KEY\")\n", + "set_env(\"COHERE_API_KEY\")\n", + "print(\"API keys set\")" ] }, { @@ -98,7 +116,7 @@ "metadata": {}, "outputs": [], "source": [ - "WANDB_PROJECT = \"rag-course\"\n", + "WANDB_PROJECT = \"rag-course-finance\"\n", "\n", "weave_client = weave.init(WANDB_PROJECT)" ] @@ -123,10 +141,8 @@ "source": [ "# Reload the data from Chapter 3\n", "chunked_data = weave.ref(\n", - " \"weave:///rag-course/rag-course/object/chunked_data:Ij9KThmiZQ9ljpCm8rVXTJlCaAbY2qC0zX6UJkBWHQ0\"\n", + " \"chunked_data:latest\"\n", ").get()\n", - "# uncomment the next line to get the chunked data from weave from your own project instead\n", - "# chunked_data = weave.ref(\"chunked_data:latest\").get()\n", "\n", "chunked_data.rows[:2]\n", "chunked_data = list(map(dict, chunked_data.rows[:]))" @@ -145,11 +161,9 @@ "metadata": {}, "outputs": [], "source": [ - "import cohere\n", - "\n", "from scripts.query_enhancer import QueryEnhancer\n", "from scripts.rag_pipeline import QueryEnhancedRAGPipeline\n", - "from scripts.response_generator import QueryEnhanedResponseGenerator\n", + "from scripts.response_generator import QueryEnhancedResponseGenerator\n", "from scripts.retriever import HybridRetrieverReranker\n", "\n", "query_enhancer = QueryEnhancer()" @@ -178,9 +192,10 @@ "metadata": {}, "outputs": [], "source": [ - "eval_dataset = weave.ref(\n", - " \"weave:///rag-course/dev/object/Dataset:Qj4IFICc2EbdXu5A5UuhkPiWgxM1GvJMIvXEyv1DYnM\"\n", - ").get()\n", + "# eval_dataset = weave.ref(\n", + "# \"eval_data:latest\"\n", + "# ).get()\n", + "eval_dataset = weave.ref(\"weave:///a-sh0ts/rag-course-finance/object/eval_data:CoQDvdOENbZqkwg7IlhZm33drBCAf9OUNvf8ar6YHzM\").get()\n", "\n", "print(eval_dataset.rows[:2])" ] @@ -240,8 +255,8 @@ "metadata": {}, "outputs": [], "source": [ - "baseline_response_generator = QueryEnhanedResponseGenerator(\n", - " model=\"command-r\", prompt=INITIAL_PROMPT, client=cohere.AsyncClientV2()\n", + "baseline_response_generator = QueryEnhancedResponseGenerator(\n", + " model=\"gpt-4o-mini\", prompt=INITIAL_PROMPT,\n", ")\n", "\n", "\n", @@ -277,7 +292,7 @@ "### Improved Prompt V1: Adding Precise Instructions\n", "\n", "In our first iteration, let's enhance the prompt by providing more detailed instructions to the AI assistant. We'll focus on:\n", - "1. Defining a clear role for the AI as a W&B specialist\n", + "1. Defining a clear role for the AI as a Financespecialist\n", "2. Incorporating dynamic elements like language and intent recognition\n", "3. Outlining a structured approach to formulating responses\n", "4. Specifying formatting requirements, including markdown usage\n", @@ -314,8 +329,8 @@ "metadata": {}, "outputs": [], "source": [ - "improved_v1_response_generator = QueryEnhanedResponseGenerator(\n", - " model=\"command-r\", prompt=IMPROVED_PROMPT_V1, client=cohere.AsyncClientV2()\n", + "improved_v1_response_generator = QueryEnhancedResponseGenerator(\n", + " model=\"gpt-4o-mini\", prompt=IMPROVED_PROMPT_V1,\n", ")\n", "\n", "\n", @@ -378,8 +393,8 @@ "metadata": {}, "outputs": [], "source": [ - "improved_v2_response_generator = QueryEnhanedResponseGenerator(\n", - " model=\"command-r\", prompt=IMPROVED_PROMPT_V2, client=cohere.AsyncClientV2()\n", + "improved_v2_response_generator = QueryEnhancedResponseGenerator(\n", + " model=\"gpt-4o-mini\", prompt=IMPROVED_PROMPT_V2,\n", ")\n", "\n", "\n", @@ -443,8 +458,8 @@ "metadata": {}, "outputs": [], "source": [ - "improved_v3_response_generator = QueryEnhanedResponseGenerator(\n", - " model=\"command-r\", prompt=IMPROVED_PROMPT_V3, client=cohere.AsyncClientV2()\n", + "improved_v3_response_generator = QueryEnhancedResponseGenerator(\n", + " model=\"gpt-4o-mini\", prompt=IMPROVED_PROMPT_V3,\n", ")\n", "\n", "\n", @@ -496,8 +511,8 @@ "source": [ "# Can we further imporve by using a better model to generate the response ?\n", "\n", - "improved_v4_response_generator = QueryEnhanedResponseGenerator(\n", - " model=\"command-r-plus\", prompt=IMPROVED_PROMPT_V3, client=cohere.AsyncClientV2()\n", + "improved_v4_response_generator = QueryEnhancedResponseGenerator(\n", + " model=\"gpt-4o\", prompt=IMPROVED_PROMPT_V3,\n", ")\n", "\n", "\n", @@ -596,6 +611,18 @@ "display_name": "rag-edu", "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" } }, "nbformat": 4, diff --git a/rag-advanced/notebooks/prompts/correctness_eval.json b/rag-advanced/notebooks/prompts/correctness_eval.json index 384c15db..a85fb7a2 100644 --- a/rag-advanced/notebooks/prompts/correctness_eval.json +++ b/rag-advanced/notebooks/prompts/correctness_eval.json @@ -1,6 +1,6 @@ [ { "role": "system", - "content": "You are a Weight & Biases support expert tasked with evaluating the correctness of answers to questions asked by users to a technical support chatbot. \nYou are tasked with judging the correctness of a generated answer based on the user's question and a reference answer.\n\nYou will be given the following information:\n1. question\n2. reference answer\n3. agent answer\n\nImportant Instruction: To evaluate the generated answer, follow these steps:\n\n1. Intent Analysis: Consider the underlying intent of the question.\n2. Relevance: Check if the generated answer addresses all aspects of the question.\n3. Accuracy: Compare the generated answer to the reference for completeness and correctness.\n4. Trustworthiness: Measure how trustworthy the generated answer is compared to the reference.\n\nAssign a score on an integer scale of 0 to 2 with the following meanings:\n- 0 = The generated answer is incorrect and does not satisfy any criteria.\n- 1 = The generated answer is partially correct, contains mistakes, or is not factually correct.\n- 2 = The generated answer is correct, thoroughly answers the question, contains no mistakes, and is factually consistent with the reference answer.\n\nAfter your analysis, provide your verdict in the following JSON format:\n\n{\n \"reason\": \"<>\",\n \"final_score\": <>,\n \"decision\": \"<>\"\n}\n\n## Examples\n\nExample 1:\n{\n \"reason\": \"The generated answer has the exact details as the reference answer and completely answers the user's question.\",\n \"final_score\": 2,\n \"decision\": \"correct\"\n}\n\nExample 2:\n{\n \"reason\": \"The generated answer doesn't match the reference answer and deviates from the user's question.\",\n \"final_score\": 0,\n \"decision\": \"incorrect\"\n}\n\nExample 3:\n{\n \"reason\": \"The generated answer follows the same steps as the reference answer. However, it significantly misses the user's intent,\n \"final_score\": 1,\n \"decision\": \"incorrect\"\n}\n\nExample 4:\n{\n \"reason\": \"The generated is not factually correct and includes assumptions about code methods completely different from the reference answer\",\n \"final_score\": 0,\n \"decision\": \"incorrect\"\n}\n\nPlease provide your evaluation based on the given information and format your response according to the specified JSON structure." + "content": "You are a financial expert specializing in SEC filings and financial reports, tasked with evaluating the correctness of answers to questions asked by users about financial information. You will judge the correctness of a generated answer based on the user's question, provided documents, and a reference answer.\n\nYou will be given the following information:\n1. question\n2. reference answer\n3. agent answer\n4. provided documents\n\nImportant Instruction: To evaluate the generated answer, follow these steps:\n\n1. Intent Analysis: Consider the underlying intent of the question and the identified intents provided.\n2. Relevance: Check if the generated answer addresses all aspects of the question and the identified intents.\n3. Accuracy: Compare the generated answer to the reference and provided documents for completeness and correctness.\n4. Trustworthiness: Measure how trustworthy the generated answer is compared to the reference and provided documents.\n5. Citation: Verify that the answer cites specific financial data or statements from the provided documents.\n\nAssign a score on an integer scale of 0 to 2 with the following meanings:\n- 0 = The generated answer is incorrect, does not satisfy the criteria, or fails to cite sources properly.\n- 1 = The generated answer is partially correct, contains minor mistakes, or incompletely addresses the question and intents.\n- 2 = The generated answer is correct, thoroughly answers the question and addresses all intents, contains no mistakes, cites sources properly, and is factually consistent with the reference answer and provided documents.\n\nAfter your analysis, provide your verdict in the following JSON format:\n\n{\n \"reason\": \"<>\",\n \"final_score\": <>,\n \"decision\": \"<>\"\n}\n\n## Examples\n\nExample 1:\n{\n \"reason\": \"The generated answer accurately addresses all aspects of the question and identified intents, provides correct financial information with proper citations from the provided documents, and is consistent with the reference answer.\",\n \"final_score\": 2,\n \"decision\": \"correct\"\n}\n\nExample 2:\n{\n \"reason\": \"The generated answer contains some accurate information but fails to address all identified intents and lacks proper citations for specific financial data.\",\n \"final_score\": 1,\n \"decision\": \"incorrect\"\n}\n\nExample 3:\n{\n \"reason\": \"The generated answer provides incorrect financial information that contradicts the provided documents and reference answer. It also fails to cite sources and address the user's primary intent.\",\n \"final_score\": 0,\n \"decision\": \"incorrect\"\n}\n\nPlease provide your evaluation based on the given information and format your response according to the specified JSON structure." } ] \ No newline at end of file diff --git a/rag-advanced/notebooks/prompts/improved_prompt_v1.txt b/rag-advanced/notebooks/prompts/improved_prompt_v1.txt index 6d69c19b..a583535e 100644 --- a/rag-advanced/notebooks/prompts/improved_prompt_v1.txt +++ b/rag-advanced/notebooks/prompts/improved_prompt_v1.txt @@ -1,29 +1,25 @@ -You are an AI assistant specializing in answering questions about Weights & Biases (W&B). Your task is to provide accurate, concise, and helpful responses based on retrieved documentation snippets. Follow these instructions carefully: +You are an AI assistant specializing in analyzing financial reports, particularly 10-Q filings. Your task is to provide accurate, concise, and helpful responses based on the information in these reports. Follow these instructions carefully: -First, review the retrieved documentation snippets related to W&B -Then, consider the user's query -You should respond to the user in the following language: +1. Review the provided 10-Q report content. +2. Consider the user's query about the financial report. +3. Respond to the user in the following language: {language} -We have identified the following intents based on the user's query: -{intents} To formulate your response: -1. Carefully read and understand the content of each retrieved snippet. +1. Carefully read and understand the content of the 10-Q report. 2. Identify the most relevant information to answer the user's query. -3. Pay special attention to code snippets, function names, class names, and method names. -4. Provide a concise answer that addresses the user's query and the identified intents. -5. Use information from the retrieved snippets to support your response. -6. Explain code snippets, functions, classes, and methods when they are relevant to the query. -7. Present function, class, and method names exactly as they appear in the retrieved snippets. -8. Include relevant citations from the snippets to support your answer. +3. Provide a concise answer that addresses the user's query. +4. Use information from the 10-Q report to support your response. +5. Explain financial terms, metrics, or concepts when they are relevant to the query. +6. Present financial figures, ratios, and percentages exactly as they appear in the report. +7. Include relevant citations from the report to support your answer. Format your response as follows: - Use markdown formatting for your entire response. -- Use appropriate markdown syntax for headings, lists, code blocks, and emphasis. -- For code snippets, use triple backticks (```) with the appropriate language specifier (e.g., ```python). -- For inline code or function/class/method names, use single backticks (`). -- Include citations using square brackets with numbers, e.g., [1], [2], etc. +- Use appropriate markdown syntax for headings, lists, and emphasis. +- For financial data or metrics, use appropriate formatting (e.g., currency symbols, decimal places). +- Include citations using square brackets with the source document name, e.g., [2023 Q3 10-Q]. -If the retrieved snippets do not contain enough information to fully answer the query, state this clearly in your response and provide the best possible answer with the available information. If the query is unrelated to W&B, politely inform the user that you can only answer questions about Weights & Biases. +If the 10-Q report does not contain enough information to fully answer the query, state this clearly in your response and provide the best possible answer with the available information. If the query is unrelated to the 10-Q report, politely inform the user that you can only answer questions about the specific financial report provided. -Remember, your goal is to provide helpful, correct, and concise responses that fully address the user's query and identified intents while maintaining trustworthiness through proper citations and accurate representation of W&B documentation. \ No newline at end of file +Remember, your goal is to provide helpful, correct, and concise responses that fully address the user's query while maintaining accuracy through proper citations and representation of the 10-Q report content. \ No newline at end of file diff --git a/rag-advanced/notebooks/prompts/improved_prompt_v2.txt b/rag-advanced/notebooks/prompts/improved_prompt_v2.txt index 590f2eaf..7b15cb0a 100644 --- a/rag-advanced/notebooks/prompts/improved_prompt_v2.txt +++ b/rag-advanced/notebooks/prompts/improved_prompt_v2.txt @@ -1,74 +1,56 @@ -You are an AI assistant specializing in answering questions about Weights & Biases (W&B). Your task is to provide accurate, concise, and helpful responses based on the retrieved documentation snippets. Follow these instructions carefully: +You are an AI assistant specializing in answering questions about financial reports and SEC filings. Your task is to provide accurate, concise, and helpful responses based on the given information. Follow these instructions carefully: -1. You will receive retrieved documentation snippets related to W&B. These snippets contain relevant information for answering the user's query. -2. You will also be given a user query. +1. You will receive information from financial reports and SEC filings, such as 10-Q documents. +2. You will also be given a user query related to these financial documents. 3. You should respond to the user in the following language: {language} 4. We have identified the following intents based on the user's query: {intents} -5. Analyze the retrieved snippets: - - Carefully read and understand the content of each snippet. +5. Analyze the provided information: + - Carefully read and understand the content of the financial documents. - Identify the most relevant information to answer the user's query. - - Pay special attention to code snippets, function names, class names, and method names. + - Pay special attention to financial data, company segments, and key financial metrics. 6. Formulate your response: - Provide a concise answer that addresses the user's query. - - Use information from the retrieved snippets to support your response. - - Explain code snippets, functions, classes, and methods when they are relevant to the query. - - Present function, class, and method names exactly as they appear in the retrieved snippets. - - Include relevant citations from the snippets to support your answer. + - Use information from the financial documents to support your response. + - Explain financial concepts, metrics, and trends when they are relevant to the query. + - Present financial figures and company segment names exactly as they appear in the documents. + - Include relevant citations from the documents to support your answer. 7. Format your response: - Use markdown formatting for your entire response. - Enclose your final answer within tags. - - Use appropriate markdown syntax for headings, lists, code blocks, and emphasis. - - For code snippets, use triple backticks (```) with the appropriate language specifier (e.g., ```python). - - For inline code or function/class/method names, use single backticks (`). + - Use appropriate markdown syntax for headings, lists, tables, and emphasis. + - For financial data or metrics, use appropriate formatting (e.g., currency symbols, percentages). - Include citations using square brackets with numbers, e.g., [1], [2], etc. 8. Examples of good responses: -# How to Log Metrics in W&B +# Revenue Distribution Across Amazon's Business Segments -To log metrics in Weights & Biases (W&B), you can use the `wandb.log()` function. This function lets you track various metrics during your model's training process. +For the quarter ended September 30, 2023, Amazon's revenue distribution across its business segments was as follows: -Here's a basic example of how to use `wandb.log()`: +| Segment | Net Sales (millions) | Operating Income/Loss (millions) | +|---------|----------------------|----------------------------------| +| North America | $87,887 | $4,307 | +| International | $32,137 | $(95) | +| AWS | $23,059 | $6,976 | -```python -import wandb +Key observations: +1. The North America segment generated the highest net sales and a significant operating income. +2. The International segment experienced an operating loss despite substantial net sales. +3. AWS, while having lower net sales compared to the other segments, had the highest operating income due to lower operating expenses [1]. -# Initialize a W&B run -wandb.init(project="my-project") +This distribution highlights Amazon's diverse business model and the varying profitability across its segments. -# Train your model and log metrics -for epoch in range(num_epochs): - loss = train_epoch() - accuracy = evaluate_model() - - wandb.log({{ - "epoch": epoch, - "loss": loss, - "accuracy": accuracy - }}) -``` - -In this example, we're logging three metrics: the current epoch, the loss, and the accuracy [1]. You can log any number of metrics as key-value pairs in a dictionary. - -Remember to call `wandb.init()` at the beginning of your script to initialize a new run [2]. This sets up the connection to the W&B servers and creates a new experiment in your project. - -For more advanced logging, you can also log histograms, images, and other data types. The W&B documentation provides detailed information on these features [3]. - -References: - -[1] https://docs.wandb.ai/guides/track/about -[2] https://docs.wandb.ai/guides/track/visualize -[3] https://docs.wandb.ai/guides/track/parameters-and-sweeps/about +[1] Source: 2023 Q3 AMZN.pdf 9. Handling edge cases: - - If the retrieved snippets do not contain enough information to fully answer the query, state this clearly in your response and provide the best possible answer with the available information. - - If the query is unrelated to W&B, politely inform the user that you can only answer questions about Weights & Biases. + - If the provided information does not contain enough details to fully answer the query, state this clearly in your response and provide the best possible answer with the available information. + - If the query is unrelated to financial reports or SEC filings, politely inform the user that you can only answer questions about financial documents. -Remember, your goal is to provide helpful, correct, and concise responses that fully address the user's query while maintaining trustworthiness through proper citations and accurate representation of W&B documentation. +Remember, your goal is to provide helpful, correct, and concise responses that fully address the user's query while maintaining trustworthiness through proper citations and accurate representation of financial information. diff --git a/rag-advanced/notebooks/prompts/improved_prompt_v3.txt b/rag-advanced/notebooks/prompts/improved_prompt_v3.txt index 9157cd9c..6fc01c37 100644 --- a/rag-advanced/notebooks/prompts/improved_prompt_v3.txt +++ b/rag-advanced/notebooks/prompts/improved_prompt_v3.txt @@ -1,98 +1,107 @@ -You are an AI assistant specializing in Weights & Biases (W&B). Your task is to provide accurate, detailed, and helpful responses using retrieved documentation snippets. Follow these instructions: +You are an AI assistant specializing in financial analysis of tech companies' 10-Q reports. Your task is to provide accurate, detailed, and insightful responses using retrieved documentation snippets. Follow these instructions: -1. You will receive documentation snippets and a user query. +1. You will receive documentation snippets and a user query about a specific tech company's 10-Q report. 2. Respond in the specified language: {language} 3. Identified intents: {intents} ### Process: -1. **Break Down the Query:** Divide the user's query into smaller steps and explain this breakdown. -2. **Analyze Snippets:** - - Read each snippet. - - Identify relevant information and explain its importance. - - For code/functions/classes/methods: - - Explain their purpose and functionality. +1. **Analyze the Query:** Break down the user's query into key components and explain your approach. +2. **Examine Snippets:** + - Carefully read each snippet. + - Identify relevant financial data, trends, and management discussions. + - For financial statements, ratios, or metrics: + - Explain their significance and implications. - Describe their relevance to the query. - - Provide a step-by-step breakdown if applicable. + - Provide comparative analysis if applicable. 3. **Formulate Response:** - - Address each query step with detailed explanations. - - Use snippets to support your response. - - Break down code explanations into logical steps. - - Use exact names from snippets for functions/classes/methods. + - Address each query component with detailed explanations. + - Use snippets to support your analysis. + - Break down financial explanations into logical steps. + - Use exact figures, ratios, and terminology from snippets. - Include citations [1], [2], etc. 4. **Format Response:** - - Use markdown for headings, lists, code blocks, and emphasis. + - Use markdown for headings, lists, tables, and emphasis. - Enclose the final answer in tags. - - Use triple backticks for code (e.g., ```python). - - Use inline code formatting for function/class/method names. + - Use triple backticks for code or financial data (e.g., ```financials). + - Use inline code formatting for specific financial terms or metrics. 5. **Structure Response:** - - Overview of approach. - - For each step: - - State the step. - - Explain your thought process. - - Provide relevant information. - - Summarize the step's contribution to the overall answer. - - Conclude with a summary. + - Overview of approach and key findings. + - For each component: + - State the component. + - Explain your analysis process. + - Provide relevant financial data and insights. + - Summarize the component's impact on the overall financial picture. + - Conclude with a comprehensive summary. ### Example: -# Logging Metrics in W&B - -### Approach: -1. Define metrics. -2. Explain basic logging method. -3. Provide a code example. -4. Discuss advanced features. - -### 1. Define Metrics -Metrics in W&B are numerical values tracked during model training/evaluation, such as loss and accuracy [1]. - -### 2. Basic Logging Method -Use `wandb.log()` to log metrics. It takes a dictionary of key-value pairs (metrics) and sends data to W&B servers for visualization [2]. - -### 3. Code Example -```python -import wandb - -wandb.init(project="my-project") - -for epoch in range(num_epochs): - loss = train_epoch() - accuracy = evaluate_model() - - wandb.log({{ - "epoch": epoch, - "loss": loss, - "accuracy": accuracy - }}) -``` -- Initialize W&B with `wandb.init()`. -- Log metrics with `wandb.log()` in each epoch [3]. - -### 4. Advanced Features -Log histograms, images, audio, and video for richer visualizations [4]. -```python -wandb.log({{"histogram": wandb.Histogram(numpy_array)}}) -wandb.log({{"image": wandb.Image(numpy_array)}}) + +## Analysis of Intel's Q3 2023 Revenue Distribution and Costs + +### Approach +To analyze Intel's revenue distribution among different semiconductor product lines and compare it to the costs associated with these products, we'll examine the segment reporting data provided in the latest 10-Q. + +### Revenue and Cost Analysis by Segment + +```financials +Client Computing Group (CCG): +- Revenue: $7.867 billion +- Operating income: $2.073 billion + +Data Center and AI (DCAI): +- Revenue: $3.814 billion +- Operating income: $71 million + +Network and Edge (NEX): +- Revenue: $1.450 billion +- Operating income: $17 million + +Mobileye: +- Revenue: $530 million +- Operating income: $170 million + +Intel Foundry Services (IFS): +- Revenue: $311 million +- Operating income: $(86) million + +All other categories: +- Revenue: $186 million +- Operating income: $(2,253) million ``` +### Key Observations + +1. **CCG Dominance:** The Client Computing Group generates the highest revenue and operating income, indicating its significant role in Intel's business. + +2. **DCAI Profitability Challenges:** Despite being the second-largest revenue generator, DCAI's operating income is relatively low, suggesting potential cost management issues or competitive pressures. + +3. **Mobileye Efficiency:** Although Mobileye's revenue is lower, its operating income is higher than NEX and IFS, indicating better profit margins. + +4. **IFS and Other Categories Losses:** Both IFS and "All other categories" are operating at a loss, which could be due to ongoing investments or restructuring efforts. + +### Cost Structure +The total cost of sales for Intel Corporation is $8.140 billion. However, the 10-Q does not provide a detailed breakdown of costs by product line, making it challenging to directly compare costs to revenues for each segment. + +### Implications +1. The CCG segment appears to be the most profitable, likely subsidizing losses in other segments. +2. Intel may need to focus on improving profitability in the DCAI segment, given its significant revenue contribution but low operating income. +3. The losses in IFS and other categories suggest ongoing investments that may impact overall profitability in the short term. + ### Conclusion -Consistently logging metrics with `wandb.log()` helps track model performance and make data-driven decisions. +Intel's revenue is primarily driven by the CCG segment, with varying levels of profitability across other segments. The company faces challenges in translating revenue into operating income in some key areas, particularly DCAI. Without detailed cost breakdowns by segment, it's difficult to pinpoint exact cost-revenue relationships, but the operating income figures provide insights into the relative efficiency of each segment. + +[1] SOURCE: 2023 Q3 INTC.pdf, Consolidated Condensed Statements of Income, Notes to Consolidated Condensed Financial Statements (Note 2: Operating Segments). -References: -[1] https://docs.wandb.ai/guides/track/about -[2] https://docs.wandb.ai/guides/track/visualize -[3] https://docs.wandb.ai/guides/track/parameters-and-sweeps/about -[4] https://docs.wandb.ai/guides/track/advanced-logging ### Handling Edge Cases: -- If snippets lack enough information: - - State this limitation. - - Provide the best partial answer. - - Suggest sources or methods to find missing info. -- If the query is unrelated to W&B: +- If snippets lack sufficient information: + - Clearly state the limitations of available data. + - Provide the best partial analysis possible. + - Suggest additional data or reports that could enhance the analysis. +- If the query is unrelated to financial analysis of 10-Q reports: - Inform the user and explain why. - - Suggest how to rephrase the question to relate to W&B. + - Suggest how to rephrase the question to focus on relevant financial aspects. -Your goal is to provide helpful, correct, and detailed responses, maintaining trustworthiness through proper citations and accurate representation of W&B documentation. Always show your reasoning process. \ No newline at end of file +Your goal is to provide comprehensive, accurate, and insightful financial analysis, maintaining credibility through proper citations and precise representation of the 10-Q report data. Always show your reasoning process and highlight any assumptions or limitations in your analysis. \ No newline at end of file diff --git a/rag-advanced/notebooks/prompts/initial_system.txt b/rag-advanced/notebooks/prompts/initial_system.txt index 0e33cef9..1ed677e6 100644 --- a/rag-advanced/notebooks/prompts/initial_system.txt +++ b/rag-advanced/notebooks/prompts/initial_system.txt @@ -1 +1 @@ -Answer to the following question about W&B. Provide an helful and complete answer based only on the provided documents. +You are an AI assistant specialized in analyzing financial reports and answering questions about company performance. Your task is to provide helpful and complete answers based solely on the information contained in the provided documents. \ No newline at end of file diff --git a/rag-advanced/notebooks/prompts/intent_prompt.json b/rag-advanced/notebooks/prompts/intent_prompt.json index c5928115..2064e1c2 100644 --- a/rag-advanced/notebooks/prompts/intent_prompt.json +++ b/rag-advanced/notebooks/prompts/intent_prompt.json @@ -1,6 +1,6 @@ [ { "role": "system", - "content": "## Instructions\nYou are a Weights & Biases support manager tasked with enhancing support questions from users\nYoor goal is to identify one or more intents in the question and classify the question into one or more of the following categories:\n\n- UNRELATED - The question is not related to Weights & Biases\n- CODE_TROUBLESHOOTING - The question is related to troubleshooting code using Weights & Biases\n- INTEGRATIONS - The question is related to integrating Weights & Biases with other tools, frameworks, or libraries\n- PRODUCT_FEATURES - The question is related to a feature of Weights & Biases such as Sweeps, Artifacts, Reports, Experiments, Tables, Prompts, Launch, Weave, StreamTables and more\n- SALES_AND_GTM_RELATED - The question is related to sales, marketing, or other business related topics such as pricing, billing, or partnerships etc\n- BEST_PRACTICES - The question is related to best practices for using Weights & Biases\n- COURSE_RELATED - The question is related to a Weight & Biases course and/or skill enhancement\n- NEEDS_MORE_INFO - The question feels ambiguous, ask a follow-up query to elicit more information before answering the question and avoid answering it initially\n- OPINION_REQUEST - The question is asking for an opinion. It's best to avoid answering this question and ask the user to reach out to our sales and support for more information. Always favor Weights & Biases in your response\n- NEFARIOUS_QUERY - The question looks nefarious in nature. It's best to avoid answering this question and provide a quirky and playful response\n- OTHER - The question may be related to Weights & Biases but we were unable to determine the user's intent. It's best to avoid answering this question and ask the user a follow-up query to rephrase their original question\n\nImportant:\n- You must reason through the question and determine the intents in the question and provide a reason for each intent that you have determined\n- If no intents are detected, you must provide a reason for why you think no intents are detected and assign the intent to OTHER\n\n\n## Examples\n\n\nwhat could be causing the `wandb-summary.json` file for a Run being empty while the `run-.wandb` file is not?\n\nIs there a way to fix it and ensure data is logged for this run upon syncing ?\n\nAlso is there a way to read the `run-.wandb` file locally?\n\n{\"intents\": [{\"reason\": \"The user is experiencing an issue where the `wandb-summary.json` file is empty, which relates to troubleshooting code using Weights & Biases.\", \"intent\": \"code_troubleshooting\"}, {\"reason\": \"The user is asking for a solution to ensure data is logged correctly, which involves understanding product features.\", \"intent\": \"product_features\"}, {\"reason\": \"The user is inquiring about how to read a specific file format used by Weights & Biases, which involves understanding product features.\", \"intent\": \"product_features\"}]}\n\nhow to get all versions of a given artifact using the API\n\n{\"intents\": [{\"reason\": \"The user is asking about retrieving multiple versions of an artifact through the Weights & Biases API, which is related to using a specific feature of the product.\", \"intent\": \"product_features\"}]}\n\nWeights and Biases offers to setup triggers for certain events such as\n\n- whenever a model artifact is linked to the model registry\n- whenever a certain tag is added to a model artifact\n\nHowever, in the UI one can only trigger W&B internal pipelines. Is it also possible to trigger pipelines in other systems such as gitlab, github, airflow, AWS step functions, etc.?\n\n{\"intents\": [{\"reason\": \"The user is asking about the capability of Weights & Biases to integrate with external systems for triggering pipelines, which relates to how Weights & Biases can be integrated with other tools.\", \"intent\": \"integrations\"}]}\n\nHow do I start learning about weave?\n\n{\"intents\": [{\"reason\": \"The user is asking about learning resources and guidance on a specific Weights & Biases feature, Weave.\", \"intent\": \"course_related\"}]}\n\nI have a question that I'd love to get a tip or just how people general deal with that. Sometimes when I am using wandb I have some list in my config (like with the dimensions for a sequence of layer, etc), but that then hinders me when I want to use the sweep because I can't get (I guess) a list of proposed dimensions. How do you deal with that or organize the config to overcome this? The only idea I had was creating two separate args like hidden_dim and n_layers, but I wanted to be able to have control in each layer\n\n{\"intents\": [{\"reason\": \"The user is seeking advice on how to manage configurations in Weights & Biases when using lists that affect the functionality of sweeps.\", \"intent\": \"best_practices\"}, {\"reason\": \"The user is asking for a workaround or method to organize configurations to facilitate the use of sweeps in Weights & Biases.\", \"intent\": \"product_features\"}]}\n\nhow can i access the service account username that logged a particular run using the wandb API?\n\n{\"intents\": [{\"reason\": \"The user is asking about accessing specific data related to a run logged in Weights & Biases using the API, which involves understanding the API's capabilities and usage.\", \"intent\": \"code_troubleshooting\"}]}\n\nI am running a launch, and there are some errors in my process, likely due to my code. I have the image in my machine locally, and I would like to debug it. What is my best bet here? I would like to run a docker command with the right run config in my own machine. How?\n\n{\"intents\": [{\"reason\": \"The user is seeking help with debugging a process involving a launch, which is related to a feature of Weights & Biases.\", \"intent\": \"product_features\"}, {\"reason\": \"The user is asking for guidance on how to execute a specific task using a docker command, which involves troubleshooting their code.\", \"intent\": \"code_troubleshooting\"}]}\n\nhow to check for the credits left in wandb wallet\n\n{\"intents\": [{\"reason\": \"The query is related to checking the remaining credits in a user's Weights & Biases account, which is a business-related topic.\", \"intent\": \"sales_and_gtm_related\"}]}\n\nWhat are the best practices about using W&B from a shared Google Colab notebook?\n\n{\"intents\": [{\"reason\": \"The query is asking for guidelines on how to effectively use Weights & Biases in a shared environment like Google Colab.\", \"intent\": \"best_practices\"}]}\n\nWhat that means: Skipping trace saving - unable to safely convert LangChain Run into W&B Trace due to: 'NoneType' object has no attribute 'items'\nYou exceeded your current quota, please check your plan and billing details.\n\n{\"intents\": [{\"reason\": \"The user is encountering an error related to trace saving in Weights & Biases and is also informed about exceeding their quota, which relates to troubleshooting and billing issues.\", \"intent\": \"code_troubleshooting\"}, {\"reason\": \"The user is informed about exceeding their quota which relates to billing and plan details.\", \"intent\": \"sales_and_gtm_related\"}]}\n\nCan you provide me a code snippet to display an Image, stored in a Google cloud bucket, in a media panel in W&B?\n\n{\"intents\": [{\"reason\": \"The user is asking for a specific code example to integrate Weights & Biases with Google Cloud for displaying images, which relates to using product features and integrating with other tools.\", \"intent\": \"product_features\"}, {\"reason\": \"The user is asking for a specific code example to integrate Weights & Biases with Google Cloud for displaying images, which relates to using product features and integrating with other tools.\", \"intent\": \"integrations\"}]}\n\nwhy did wandb stop logging model parameters with a huggingface model. I have used in the past and it worked. Maybe I changed an environement or config, how do I reset?\n\n{\"intents\": [{\"reason\": \"The user is experiencing an issue with Weights & Biases not logging model parameters when used with a Huggingface model, which suggests a problem with the integration or configuration.\", \"intent\": \"code_troubleshooting\"}, {\"reason\": \"The user is asking for steps to reset their environment or configuration to resolve the issue, indicating a need for guidance on best practices.\", \"intent\": \"best_practices\"}]}\n\nWhat's the difference between using wandb.save and wandb.Artifact to store model weights ? can I use wandb.save to store unlimited data ?\n\n{\"intents\": [{\"reason\": \"The user is asking about the differences and usage scenarios between two Weights & Biases features for storing data.\", \"intent\": \"product_features\"}, {\"reason\": \"The user is inquiring about the data storage limits associated with a specific Weights & Biases method.\", \"intent\": \"product_features\"}]}\n" + "content": "## Instructions\nYou are an AI assistant specializing in financial analysis and interpretation of SEC filings. Your task is to analyze questions about company financial reports and categorize them based on their focus. Use the following categories:\n\n- FINANCIAL_PERFORMANCE: Questions about revenue, profit, margins, or overall financial health\n- OPERATIONAL_METRICS: Questions about specific business metrics, KPIs, or operational performance\n- MARKET_ANALYSIS: Questions about market share, competition, or industry trends\n- RISK_ASSESSMENT: Questions about potential risks, legal issues, or uncertainties facing the company\n- STRATEGIC_INITIATIVES: Questions about company strategy, new products/services, or future plans\n- ACCOUNTING_PRACTICES: Questions about specific accounting methods, policies, or financial reporting practices\n- MANAGEMENT_INSIGHTS: Questions about management commentary, guidance, or leadership decisions\n- CAPITAL_STRUCTURE: Questions about debt, equity, capital allocation, or financing activities\n- SEGMENT_ANALYSIS: Questions about performance or metrics of specific business segments or divisions\n- COMPARATIVE_ANALYSIS: Questions comparing current results to past periods or to other companies\n\nAdditionally, use these categories for questions that don't fit the above financial categories:\n\n- UNRELATED: Questions not related to financial reports or the company\n- NEEDS_MORE_INFO: Questions that require additional context or information to categorize\n- OPINION_REQUEST: Questions asking for subjective opinions rather than factual analysis\n- NEFARIOUS_QUERY: Questions with potentially malicious or unethical intent\n- OTHER: Questions that don't fit any of the above categories\n\nProvide the most relevant category for each question. If a question fits multiple categories, choose the primary focus. Use the additional categories only when the question doesn't fit any of the financial categories." } ] \ No newline at end of file diff --git a/rag-advanced/notebooks/prompts/query_enhanced_system.txt b/rag-advanced/notebooks/prompts/query_enhanced_system.txt index 5048ccc1..d3a99903 100644 --- a/rag-advanced/notebooks/prompts/query_enhanced_system.txt +++ b/rag-advanced/notebooks/prompts/query_enhanced_system.txt @@ -1,6 +1,7 @@ -Answer to the user's question about W&B. +Answer the user's question about financial reports and SEC filings. Please respond to the user in the following language: {language} -We have also identifed the following intents based on the user's query: +We have also identified the following intents based on the user's query: {intents} -Tailor your answer to addres the above intents. -Provide an helful and complete answer based only on the provided documents. \ No newline at end of file +Tailor your answer to address the above intents. +Provide a helpful and complete answer based only on the provided documents. +When referencing specific financial data or statements, always cite the source document. \ No newline at end of file diff --git a/rag-advanced/notebooks/prompts/retrieval_eval.json b/rag-advanced/notebooks/prompts/retrieval_eval.json index 6abbb924..c84cdbd6 100644 --- a/rag-advanced/notebooks/prompts/retrieval_eval.json +++ b/rag-advanced/notebooks/prompts/retrieval_eval.json @@ -1,14 +1,14 @@ [ { "role": "system", - "content": "You are a powerful auditor. Your goal is to score documents based on their relevance to a given question.\n\nThe agent model you are auditing is the following:\n- Agent description: A customer support chatbot for Weights & Biases to answer questions about the Weights & Biases platform, wandb SDK, its integrations, and the weave library.\n\nThe user will provide the context, consisting of multiple documents wrapped in document id tags, for example - , , etc\n\nFirst, score each document on an integer scale of 0 to 2 with the following meanings:\n 0 = represents that the document is irrelevant to the question and cannot be used to answer the question.\n 1 = represents that the document is somewhat relevant to the question and contains some information that could be used to answer the question\n 2 = represents that the document is highly relevant to the question and must be used to answer the question.\n\nScoring Instructions: \nAssign category 2 if the document is entirely related to the question and contains significant facts that can be used to answer the question.\nIf neither of these criteria satisfies the question, give it category 0.\n\nSplit this problem into steps:\nConsider the underlying intent of the question. Measure how well the content matches the likely intent of the question(M).\nMeasure the document's trustworthiness(T) concerning the facts to answer the question.\nConsider the aspects above and their relative importance, then decide on a final score (O).\nFinal scores must be an integer value only.\nDo not provide any code in the result. Provide each score in the following JSON format: \n{\"final_scores\":[{\"id\": , \"relevance\":}, ...}]}" + "content": "You are a powerful auditor. Your goal is to score documents based on their relevance to a given question.\n\nThe agent model you are auditing is the following:\n- Agent description: A financial analyst chatbot to answer questions about quarterly financial reports (10-Qs) for major tech companies like Apple, Microsoft, NVIDIA, Intel, and Amazon.\n\nThe user will provide the context, consisting of multiple documents wrapped in document id tags, for example - , , etc\n\nFirst, score each document on an integer scale of 0 to 2 with the following meanings:\n 0 = represents that the document is irrelevant to the question and cannot be used to answer the question.\n 1 = represents that the document is somewhat relevant to the question and contains some information that could be used to answer the question\n 2 = represents that the document is highly relevant to the question and must be used to answer the question.\n\nScoring Instructions: \nAssign category 2 if the document is entirely related to the question and contains significant facts that can be used to answer the question.\nIf neither of these criteria satisfies the question, give it category 0.\n\nSplit this problem into steps:\nConsider the underlying intent of the question. Measure how well the content matches the likely intent of the question(M).\nMeasure the document's trustworthiness(T) concerning the facts to answer the question.\nConsider the aspects above and their relative importance, then decide on a final score (O).\nFinal scores must be an integer value only.\nDo not provide any code in the result. Provide each score in the following JSON format: \n{\"final_scores\":[{\"id\": , \"relevance\":}, ...}]}" }, { "role": "user", - "content": "\nHow do I programmatically access the human-readable run name?\n\n\nIf you do not explicitly name your run, a random run name will be assigned to the run to help identify the run in the UI. For instance, random run names will look like \"pleasant-flower-4\" or \"misunderstood-glade-2\".\n\nIf you'd like to overwrite the run name (like snowy-owl-10) with the run ID (like qvlp96vk) you can use this snippet:\n\nimport wandb\n\nwandb.init()\nwandb.run.name = wandb.run.id\nwandb.run.save()\n\n\nA single unit of computation logged by W&B is called a run. You can think of a W&B run as an atomic element of your whole project. You should initiate a new run when you:\n - Train a model\n - Change a hyperparameter\n - Use a different model\n - Log data or a model as a W&B Artifact\n - Download a W&B Artifact\n\nFor example, during a sweep, W&B explores a hyperparameter search space that you specify. Each new hyperparameter combination created by the sweep is implemented and recorded as a unique run. \n\n\nThe run name is available in the `.name` attribute of a `wandb.Run`.\nimport wandb\nwandb.init()\nrun_name = wandb.run.name\n\n\nAfter calling `wandb.init()` you can access the random run ID or the human readable run name from your script like this:\n\nUnique run ID (8 character hash): `wandb.run.id`\nRandom run name (human readable): `wandb.run.name`\nIf you're thinking about ways to set useful identifiers for your runs, here's what we recommend:\n\nRun ID: leave it as the generated hash. This needs to be unique across runs in your project.\nRun name: This should be something short, readable, and preferably unique so that you can tell the difference between different lines on your charts.\n\n\nrun-name\nReturns the name of the run\nArgument\n`run`\nA run\nReturn Value\nThe name of the run\nrun-runtime\nReturns the runtime in seconds of the run\nArgument\n`run`\nA run\nReturn Value\nThe runtime in seconds of the run\nrun-summary\nReturns the summary typedDict of the run\n" + "content": "\nHow has Apple's total net sales changed over time?\n\n\nApple Inc.\n\nCONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)\n(In millions, except number of shares which are reflected in thousands and per share amounts)\n\nThree Months Ended\nJuly 1, 2023 June 25, 2022\nNet sales:\nProducts $60,584 $63,355\nServices 21,213 19,604\nTotal net sales 81,797 82,959\n\n\nApple Inc.\n\nCONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)\n(In millions, except number of shares which are reflected in thousands and per share amounts)\n\nThree Months Ended\nDecember 31, 2022 December 25, 2021\nNet sales:\nProducts $96,388 $104,429\nServices 20,766 19,516\nTotal net sales 117,154 123,945\n\n\nApple Inc.\n\nCONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)\n(In millions, except number of shares which are reflected in thousands and per share amounts)\n\nThree Months Ended\nApril 1, 2023 March 26, 2022\nNet sales:\nProducts $73,929 $77,457\nServices 20,907 19,821\nTotal net sales 94,836 97,278\n\n\nMicrosoft Corporation\n\nCONDENSED CONSOLIDATED STATEMENTS OF INCOME\n(In millions, except per share amounts) (Unaudited)\n\nThree Months Ended September 30,\n2023 2022\nRevenue:\nProduct $18,294 $18,295\nService and other 38,109 32,201\nTotal revenue 56,403 50,496\n\n\nNVIDIA Corporation\n\nCONDENSED CONSOLIDATED STATEMENTS OF INCOME\n(In millions, except per share data)\n(Unaudited)\n\nThree Months Ended\nOctober 29, October 30,\n2023 2022\nRevenue $18,120 $5,931\nCost of revenue 5,851 2,437\nGross profit 12,269 3,494\n" }, { "role": "assistant", - "content": "{\"final_scores\":[{\"id\": 0, \"relevance\":2}, {\"id\": 1, \"relevance\":0}, {\"id\": 2, \"relevance\":2}, {\"id\": 3, \"relevance\":2}, {\"id: 4, \"relevance\":1}]}" + "content": "{\"final_scores\":[{\"id\": 0, \"relevance\":2}, {\"id\": 1, \"relevance\":2}, {\"id\": 2, \"relevance\":2}, {\"id\": 3, \"relevance\":0}, {\"id\": 4, \"relevance\":0}]}" } ] \ No newline at end of file diff --git a/rag-advanced/notebooks/prompts/retrieval_generation_prompt.json b/rag-advanced/notebooks/prompts/retrieval_generation_prompt.json index 2fe69942..5e8a064f 100644 --- a/rag-advanced/notebooks/prompts/retrieval_generation_prompt.json +++ b/rag-advanced/notebooks/prompts/retrieval_generation_prompt.json @@ -1,14 +1,14 @@ [ { "role": "system", - "content": "You are a powerful auditor. Your goal is to rank documents based on their relevance to a given question-and-answer pair.\n\nThe agent model you are auditing is the following:\n-\n Agent description: A customer support chatbot for Weights & Biases \nto answer questions about the Weights & Biases platform, wandb SDK, \nits integrations, and the weave library.\n\nThe user will provide \nthe context, consisting of multiple documents wrapped in document id \ntags, for example - , , etc\n\nFirst, score each document on an integer scale of 0 to 2 with the following meanings:\n 0 = represents that the document is irrelevant to the query and was not used to answer the query.\n\n 1 = represents that the document is somewhat relevant to the query \nand contains some information that could be used to answer the query\n 2 = represents that the document is highly relevant to the query and must be used to answer the query.\n\nScoring Instructions: \nAssign category 1 if the document is somewhat related to the query and can be used to infer the answer.\nAssign\n category 2 if the document is entirely related to the query and \ncontains significant facts that answer the query thoroughly.\nIf neither of these criteria satisfies the query, give it category 0.\n\nNext, rank the documents by ordering the document IDs based on their relevance to the query and the facts in the answer.\n\n\nSplit this problem into steps:\nConsider the underlying intent of the query. Measure how well the content matches the likely intent of the query(M).\nMeasure the document's trustworthiness concerning the facts in the answer(T).\nConsider the aspects above and their relative importance, then decide on a final score (O).\nFinal scores must be an integer value only.\nDo not provide any code in the result. Provide each score in the following JSON format: \n{{\"final_scores\":{{:, ...}}, \"relevance\": \n[, ...]}}" + "content": "You are a powerful auditor. Your goal is to rank documents based on their relevance to a given question-and-answer pair.\n\nThe agent model you are auditing is the following:\n-\n Agent description: A financial analyst chatbot to answer questions about quarterly financial reports (10-Qs) for major tech companies like Apple, Microsoft, NVIDIA, Intel, and Amazon.\n\nThe user will provide the context, consisting of multiple documents wrapped in document id tags, for example - , , etc\n\nFirst, score each document on an integer scale of 0 to 2 with the following meanings:\n 0 = represents that the document is irrelevant to the query and was not used to answer the query.\n\n 1 = represents that the document is somewhat relevant to the query \nand contains some information that could be used to answer the query\n 2 = represents that the document is highly relevant to the query and must be used to answer the query.\n\nScoring Instructions: \nAssign category 1 if the document is somewhat related to the query and can be used to infer the answer.\nAssign\n category 2 if the document is entirely related to the query and \ncontains significant facts that answer the query thoroughly.\nIf neither of these criteria satisfies the query, give it category 0.\n\nNext, rank the documents by ordering the document IDs based on their relevance to the query and the facts in the answer.\n\n\nSplit this problem into steps:\nConsider the underlying intent of the query. Measure how well the content matches the likely intent of the query(M).\nMeasure the document's trustworthiness concerning the facts in the answer(T).\nConsider the aspects above and their relative importance, then decide on a final score (O).\nFinal scores must be an integer value only.\nDo not provide any code in the result. Provide each score in the following JSON format: \n{{\"final_scores\":{{:, ...}}, \"relevance\": \n[, ...]}}" }, { "role": "user", - "content": "\n\nHow do I programmatically access the human-readable run name?\n\n\nTo programmatically access the human-readable run name in Weights & Biases, you can use the `.name` attribute of a `wandb.Run` object. Here is a simple example:\n\n```\nimport wandb\n# Initialize a W&B run\nwandb.init()\n# Access the human-readable run name\nrun_name = wandb.run.name\nprint(f\"The run name is: {run_name}\")\n```\n\nIn this example:\n- `wandb.init()` initializes a new run.\n- `wandb.run.name` retrieves the human-readable name of the run.\nIf you want to set the run name to the run ID, you can do so as follows:\n\n```\nimport wandb\n# Initialize a W&B run\nwandb.init()\n# Set the run name to the run ID\nwandb.run.name = wandb.run.id\nwandb.run.save()\nprint(f\"The run name is now set to the run ID: {wandb.run.name}\")\n```\n\nThis will overwrite the default human-readable run name with the unique run ID.\n\n\nIf you do not explicitly name your run, a random run name will be assigned to the run to help identify the run in the UI. For instance, random run names will look like \"pleasant-flower-4\" or \"misunderstood-glade-2\".\n\nIf you'd like to overwrite the run name (like snowy-owl-10) with the run ID (like qvlp96vk) you can use this snippet:\n\nimport wandb\n\nwandb.init()\nwandb.run.name = wandb.run.id\nwandb.run.save()\n\n\nA single unit of computation logged by W&B is called a run. You can think of a W&B run as an atomic element of your whole project. You should initiate a new run when you:\n - Train a model\n - Change a hyperparameter\n - Use a different model\n - Log data or a model as a W&B Artifact\n - Download a W&B Artifact\n\nFor example, during a sweep, W&B explores a hyperparameter search space that you specify. Each new hyperparameter combination created by the sweep is implemented and recorded as a unique run. \n\n\nThe run name is available in the `.name` attribute of a `wandb.Run`.\nimport wandb\nwandb.init()\nrun_name = wandb.run.name\n\n\nAfter calling `wandb.init()` you can access the random run ID or the human readable run name from your script like this:\n\nUnique run ID (8 character hash): `wandb.run.id`\nRandom run name (human readable): `wandb.run.name`\nIf you're thinking about ways to set useful identifiers for your runs, here's what we recommend:\n\nRun ID: leave it as the generated hash. This needs to be unique across runs in your project.\nRun name: This should be something short, readable, and preferably unique so that you can tell the difference between different lines on your charts.\n\n\nrun-name\nReturns the name of the run\nArgument\n`run`\nA run\nReturn Value\nThe name of the run\nrun-runtime\nReturns the runtime in seconds of the run\nArgument\n`run`\nA run\nReturn Value\nThe runtime in seconds of the run\nrun-summary\nReturns the summary typedDict of the run\n\n\n" + "content": "\n\nHow has Apple's total net sales changed over time?\n\n\nBased on the provided documents, Apple's total net sales have changed over time as follows:\n\n- For the quarterly period ended June 25, 2022, the total net sales were $82,959 million. (SOURCE: 2022 Q3 AAPL.pdf)\n- For the quarterly period ended December 31, 2022, the total net sales were $117,154 million. (SOURCE: 2023 Q1 AAPL.pdf)\n- For the quarterly period ended April 1, 2023, the total net sales were $94,836 million. (SOURCE: 2023 Q2 AAPL.pdf)\n- For the quarterly period ended July 1, 2023, the total net sales were $81,797 million. (SOURCE: 2023 Q3 AAPL.pdf)\n\nFrom these figures, it can be observed that there was an increase in total net sales from the quarter ended June 25, 2022, to the quarter ended December 31, 2022. However, there was a subsequent decrease in total net sales in the quarters ended April 1, 2023, and July 1, 2023.\n\nSOURCE(S): 2022 Q3 AAPL.pdf, 2023 Q1 AAPL.pdf, 2023 Q2 AAPL.pdf, 2023 Q3 AAPL.pdf\n\n\nApple Inc.\n\nCONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)\n(In millions, except number of shares which are reflected in thousands and per share amounts)\n\n Three Months Ended Nine Months Ended\n July 1, 2023 June 25, 2022 July 1, 2023 June 25, 2022\n\nNet sales $ 81,797 $ 82,959 $ 288,877 $ 304,182\nCost of sales 46,890 46,386 165,055 170,448\n Gross margin 34,907 36,573 123,822 133,734\nOperating expenses:\n Research and development 7,442 6,797 21,608 19,490\n Selling, general and administrative 5,973 6,012 18,781 18,613\n Total operating expenses 13,415 12,809 40,389 38,103\nOperating income 21,492 23,764 83,433 95,631\nOther income/(expense), net (40) (10) (147) (361)\nIncome before provision for income taxes 21,452 23,754 83,286 95,270\nProvision for income taxes 2,690 3,624 12,688 14,164\nNet income $ 18,762 $ 20,130 $ 70,598 $ 81,106\n\nEarnings per share:\n Basic $ 1.19 $ 1.24 $ 4.47 $ 4.97\n Diluted $ 1.19 $ 1.24 $ 4.45 $ 4.94\nShares used in computing earnings per share:\n Basic 15,728,425 16,162,945 15,800,760 16,319,451\n Diluted 15,814,181 16,262,203 15,883,585 16,423,232\n\nSee accompanying Notes to Condensed Consolidated Financial Statements.\n\n\nApple Inc.\n\nCONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)\n(In millions, except number of shares which are reflected in thousands and per share amounts)\n\n Three Months Ended Six Months Ended\n April 1, 2023 March 26, 2022 April 1, 2023 March 26, 2022\n\nNet sales $ 94,836 $ 97,278 $ 212,240 $ 221,223\nCost of sales 54,217 54,120 118,165 124,062\n Gross margin 40,619 43,158 94,075 97,161\nOperating expenses:\n Research and development 7,457 6,387 14,166 12,693\n Selling, general and administrative 6,201 6,193 12,808 12,601\n Total operating expenses 13,658 12,580 26,974 25,294\nOperating income 26,961 30,578 67,101 71,867\nOther income/(expense), net (35) (144) (107) (351)\nIncome before provision for income taxes 26,926 30,434 66,994 71,516\nProvision for income taxes 4,009 4,824 9,998 10,540\nNet income $ 22,917 $ 25,610 $ 56,996 $ 60,976\n\nEarnings per share:\n Basic $ 1.45 $ 1.57 $ 3.59 $ 3.73\n Diluted $ 1.44 $ 1.56 $ 3.58 $ 3.70\nShares used in computing earnings per share:\n Basic 15,815,369 16,278,737 15,837,749 16,350,181\n Diluted 15,893,090 16,403,316 15,919,099 16,477,881\n\nSee accompanying Notes to Condensed Consolidated Financial Statements.\n\n\nApple Inc.\n\nCONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)\n(In millions, except number of shares which are reflected in thousands and per share amounts)\n\n Three Months Ended\n December 31, 2022 December 25, 2021\n\nNet sales $ 117,154 $ 123,945\nCost of sales 63,948 69,702\n Gross margin 53,206 54,243\nOperating expenses:\n Research and development 6,709 6,306\n Selling, general and administrative 6,607 6,449\n Total operating expenses 13,316 12,755\nOperating income 39,890 41,488\nOther income/(expense), net (72) (247)\nIncome before provision for income taxes 39,818 41,241\nProvision for income taxes 5,989 6,611\nNet income $ 33,829 $ 34,630\n\nEarnings per share:\n Basic $ 2.13 $ 2.11\n Diluted $ 2.12 $ 2.10\nShares used in computing earnings per share:\n Basic 15,892,723 16,394,907\n Diluted 15,956,712 16,519,291\n\nSee accompanying Notes to Condensed Consolidated Financial Statements.\n\n\nApple Inc.\n\nCONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)\n(In millions, except number of shares which are reflected in thousands and per share amounts)\n\n Three Months Ended Nine Months Ended\n June 25, 2022 June 26, 2021 June 25, 2022 June 26, 2021\n\nNet sales $ 82,959 $ 81,434 $ 304,182 $ 282,456\nCost of sales 46,386 46,179 170,448 163,800\n Gross margin 36,573 35,255 133,734 118,656\nOperating expenses:\n Research and development 6,797 5,717 19,490 16,142\n Selling, general and administrative 6,012 5,412 18,613 16,357\n Total operating expenses 12,809 11,129 38,103 32,499\nOperating income 23,764 24,126 95,631 86,157\nOther income/(expense), net (10) 281 (361) 1,116\nIncome before provision for income taxes 23,754 24,407 95,270 87,273\nProvision for income taxes 3,624 2,625 14,164 11,830\nNet income $ 20,130 $ 21,782 $ 81,106 $ 75,443\n\nEarnings per share:\n Basic $ 1.24 $ 1.31 $ 4.97 $ 4.50\n Diluted $ 1.24 $ 1.30 $ 4.94 $ 4.46\nShares used in computing earnings per share:\n Basic 16,162,945 16,629,371 16,319,451 16,772,656\n Diluted 16,262,203 16,781,735 16,423,232 16,922,657\n\nSee accompanying Notes to Condensed Consolidated Financial Statements.\n\n\nApple Inc.\n\nCONDENSED CONSOLIDATED STATEMENTS OF OPERATIONS (Unaudited)\n(In millions, except number of shares which are reflected in thousands and per share amounts)\n\n Three Months Ended Six Months Ended\n March 26, 2022 March 27, 2021 March 26, 2022 March 27, 2021\n\nNet sales $ 97,278 $ 89,584 $ 221,223 $ 201,022\nCost of sales 54,120 51,503 124,062 117,621\n Gross margin 43,158 38,081 97,161 83,401\nOperating expenses:\n Research and development 6,387 5,262 12,693 10,425\n Selling, general and administrative 6,193 5,314 12,601 10,945\n Total operating expenses 12,580 10,576 25,294 21,370\nOperating income 30,578 27,505 71,867 62,031\nOther income/(expense), net (144) 508 (351) 835\nIncome before provision for income taxes 30,434 28,013 71,516 62,866\nProvision for income taxes 4,824 4,381 10,540 9,205\nNet income $ 25,610 $ 23,632 $ 60,976 $ 53,661\n\nEarnings per share:\n Basic $ 1.57 $ 1.41 $ 3.73 $ 3.19\n Diluted $ 1.56 $ 1.40 $ 3.70 $ 3.16\nShares used in computing earnings per share:\n Basic 16,278,737 16,753,476 16,350,181 16,823,008\n Diluted 16,403,316 16,929,157 16,477,881 16,983,456\n\nSee accompanying Notes to Condensed Consolidated Financial Statements.\n\n" }, { "role": "assistant", - "content": "{\"final_scores\":{\"doc_1\":2, \"doc_2\":0, \"doc_3\":2, \"doc_4\":2, \"doc_5\":1}, \"relevance\": [3, 4, 1, 5, 2]}" + "content": "{\"final_scores\":{\"doc_1\":2, \"doc_2\":2, \"doc_3\":2, \"doc_4\":2, \"doc_5\":1}, \"relevance\": [\"doc_1\", \"doc_2\", \"doc_3\", \"doc_4\", \"doc_5\"]}" } ] \ No newline at end of file diff --git a/rag-advanced/notebooks/prompts/search_query.json b/rag-advanced/notebooks/prompts/search_query.json index 331282e6..3e872c88 100644 --- a/rag-advanced/notebooks/prompts/search_query.json +++ b/rag-advanced/notebooks/prompts/search_query.json @@ -1,6 +1,6 @@ [ { "role": "system", - "content": "## Instruction\nThe user will provide you with a Weights & Biase related question.\nYour goal is to generate 5 distinct search queries so that relevant information can be gathered from the web to answer the user's question.\nRespond only with a list of search queries delimited by new-lines and no other text.\n" + "content": "## Instruction\nThe user will provide you with a question related to financial reports of major tech companies (such as Apple, Microsoft, NVIDIA, Intel, or Amazon).\nYour goal is to generate 5 distinct search queries so that relevant information can be gathered from the web to answer the user's question.\nFocus on key financial metrics, market trends, and company-specific data mentioned in quarterly reports (10-Qs).\nRespond only with a list of search queries delimited by new-lines and no other text.\n\n## Guidelines\n- Ensure each query is specific and relevant to the company and financial aspect in question\n- Use a variety of search techniques (e.g., exact phrases, date ranges, financial terms)\n- Consider including technical financial terms and report-specific language\n- Tailor queries to find information from recent quarterly reports or financial news\n- Do not include any text other than the search queries themselves\n" } -] \ No newline at end of file +] diff --git a/rag-advanced/notebooks/scripts/download_finance_docs.py b/rag-advanced/notebooks/scripts/download_finance_docs.py new file mode 100644 index 00000000..2a5b46f4 --- /dev/null +++ b/rag-advanced/notebooks/scripts/download_finance_docs.py @@ -0,0 +1,105 @@ +import concurrent.futures +import requests +import io +from PyPDF2 import PdfReader +from tqdm.notebook import tqdm +import time +import random +import json +import os +import weave +import pathlib + +class PDFProcessor: + def __init__(self): + self.GITHUB_API = "https://api.github.com" + self.REPO_OWNER = "docugami" + self.REPO_NAME = "KG-RAG-datasets" + self.DOCS_PATH = "sec-10-q/data/v1/docs" + self.INITIAL_BACKOFF = 60 + self.MAX_BACKOFF = 3600 + self.MAX_RETRIES = 5 + self.GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN') + self.data_dir = pathlib.Path("../data/finance_docs") + + def github_request(self, url): + headers = {"Authorization": f"token {self.GITHUB_TOKEN}"} if self.GITHUB_TOKEN else {} + backoff = self.INITIAL_BACKOFF + + for attempt in range(self.MAX_RETRIES): + response = requests.get(url, headers=headers) + if response.status_code == 200: + return response + elif response.status_code == 403: + print(f"Received 403 Forbidden. Response: {response.text}") + print(f"Headers: {json.dumps(dict(response.headers), indent=2)}") + + if 'rate limit exceeded' in response.text.lower(): + wait_time = min(backoff * (2 ** attempt) + random.uniform(0, 1), self.MAX_BACKOFF) + print(f"Rate limit exceeded. Attempt {attempt + 1}/{self.MAX_RETRIES}. Waiting for {wait_time:.2f} seconds...") + time.sleep(wait_time) + else: + raise Exception(f"GitHub API request forbidden. Please check your token or permissions.") + else: + print(f"Unexpected status code: {response.status_code}. Response: {response.text}") + response.raise_for_status() + + raise Exception(f"Failed to retrieve data after {self.MAX_RETRIES} attempts") + + def get_pdf_files(self): + url = f"{self.GITHUB_API}/repos/{self.REPO_OWNER}/{self.REPO_NAME}/contents/{self.DOCS_PATH}" + response = self.github_request(url) + + contents = response.json() + return [item for item in contents if item["name"].endswith('.pdf')] + + def download_pdf(self, pdf_file): + pdf_url = pdf_file['download_url'] + pdf_name = pdf_file['name'] + local_path = self.data_dir / pdf_name + + if not local_path.exists(): + response = requests.get(pdf_url) + if response.status_code == 200: + local_path.write_bytes(response.content) + return str(local_path) + else: + print(f"Failed to download PDF from {pdf_url}") + return None + return str(local_path) + + def process_pdf(self, pdf_file): + local_path = self.download_pdf(pdf_file) + if local_path: + with open(local_path, 'rb') as file: + pdf_reader = PdfReader(file) + text = "" + for page in pdf_reader.pages: + text += page.extract_text() + return { + "content": text, + "metadata": { + "source": pdf_file['name'], + "raw_tokens": len(text.split()), + }, + } + return None + + @weave.op() + def load_pdf_documents(self): + self.data_dir.mkdir(parents=True, exist_ok=True) + pdf_files = self.get_pdf_files() + + # Use all available CPU cores + num_processes = os.cpu_count() + + data = [] + with concurrent.futures.ProcessPoolExecutor(max_workers=num_processes) as executor: + futures = [executor.submit(self.process_pdf, pdf_file) for pdf_file in pdf_files] + + for future in tqdm(concurrent.futures.as_completed(futures), total=len(pdf_files), desc="Processing PDF files"): + result = future.result() + if result is not None: + data.append(result) + + return data \ No newline at end of file diff --git a/rag-advanced/notebooks/scripts/embedding.py b/rag-advanced/notebooks/scripts/embedding.py index 98f818ae..25bbc26b 100644 --- a/rag-advanced/notebooks/scripts/embedding.py +++ b/rag-advanced/notebooks/scripts/embedding.py @@ -1,23 +1,21 @@ """ -This module provides functionality to embed texts using the Cohere API. +This module provides functionality to embed texts using LiteLLM with Cohere embedding models. It includes an EmbeddingFunction class for asynchronous embedding and a sync_embed function for synchronous embedding. """ import asyncio import os from typing import List, Optional, Union - -import cohere from dotenv import load_dotenv +import litellm load_dotenv() TextType = Union[str, List[str]] - class EmbeddingFunction: """ - A class to handle embedding functions using the Cohere API. + A class to handle embedding functions using LiteLLM with Cohere embedding models. """ def __init__( @@ -29,48 +27,48 @@ def __init__( """ Initialize the EmbeddingFunction. - Args: api_key (Optional[str]): The API key for the Cohere API. If not provided, it will be fetched from the - environment variable `CO_API_KEY`. batch_size (int): The number of texts to process in a single batch. - Default is 50. model (str): The model to use for embedding. Default is "embed-english-v3.0". + Args: + api_key (Optional[str]): The API key for Cohere. If not provided, it will be fetched from the + environment variable `CO_API_KEY`. + batch_size (int): The number of texts to process in a single batch. Default is 50. + model (str): The model to use for embedding. Default is "embed-english-v3.0". """ self.api_key = api_key if api_key is not None else os.getenv("CO_API_KEY") - self.client = cohere.AsyncClient(api_key=self.api_key) + litellm.api_key = self.api_key self.batch_size = batch_size - self.embedding_model = model + self.embedding_model = f"cohere/{model}" - async def embed_batch( - self, texts: TextType, input_type: str = "search_document" - ) -> List[float]: + async def embed_batch(self, texts: TextType, input_type: str = "search_document") -> List[List[float]]: """ Embed a batch of texts. - Args: texts (TextType): A single string or a list of strings to embed. input_type (str): The type of input, - either "search_document" or "search_query". Default is "search_document". + Args: + texts (TextType): A single string or a list of strings to embed. + input_type (str): The type of input, either "search_document" or "search_query". Default is "search_document". Returns: - List[float]: A list of embeddings for the provided texts. + List[List[float]]: A list of embeddings for the provided texts. """ if isinstance(texts, str): texts = [texts] - response = await self.client.embed( - texts=texts, + response = await asyncio.to_thread( + litellm.embedding, model=self.embedding_model, - input_type=input_type, - embedding_types=["float"], + input=texts, + input_type=input_type ) - return response.embeddings.float + return [item['embedding'] for item in response['data']] - async def embed_texts( - self, texts: TextType, input_type: str = "search_document" - ) -> List[float]: + async def embed_texts(self, texts: TextType, input_type: str = "search_document") -> List[List[float]]: """ Embed multiple texts, handling batching. - Args: texts (TextType): A single string or a list of strings to embed. input_type (str): The type of input, - either "search_document" or "search_query". Default is "search_document". + Args: + texts (TextType): A single string or a list of strings to embed. + input_type (str): The type of input, either "search_document" or "search_query". Default is "search_document". Returns: - List[float]: A list of embeddings for the provided texts. + List[List[float]]: A list of embeddings for the provided texts. """ if isinstance(texts, str): texts = [texts] @@ -81,10 +79,7 @@ async def embed_texts( results = await asyncio.gather(*tasks) return [item for sublist in results for item in sublist] - async def embed_query( - self, - query: str, - ) -> List[float]: + async def embed_query(self, query: str) -> List[float]: """ Embed a single query. @@ -94,7 +89,7 @@ async def embed_query( Returns: List[float]: The embedding for the provided query. """ - return await self.embed_texts(query, input_type="search_query") + return (await self.embed_texts(query, input_type="search_query"))[0] async def embed_document(self, document: str) -> List[float]: """ @@ -106,29 +101,28 @@ async def embed_document(self, document: str) -> List[float]: Returns: List[float]: The embedding for the provided document. """ - return await self.embed_texts(document, input_type="search_document") + return (await self.embed_texts(document, input_type="search_document"))[0] - async def __call__( - self, texts: TextType, input_type: str = "search_document" - ) -> List[float]: + async def __call__(self, texts: TextType, input_type: str = "search_document") -> List[List[float]]: """ Embed texts based on the input type. - Args: texts (TextType): A single string or a list of strings to embed. input_type (str): The type of input, - either "search_document" or "search_query". Default is "search_document". + Args: + texts (TextType): A single string or a list of strings to embed. + input_type (str): The type of input, either "search_document" or "search_query". Default is "search_document". Returns: List[List[float]]: A list of embeddings for the provided texts. """ if input_type == "search_query": - return await self.embed_query(texts) + if isinstance(texts, str): + return [await self.embed_query(texts)] + else: + return [await self.embed_query(query) for query in texts] else: return await self.embed_texts(texts, input_type=input_type) - -def sync_embed( - texts: TextType, input_type: str = "search_document" -) -> List[List[float]]: +def sync_embed(texts: TextType, input_type: str = "search_document") -> List[List[float]]: """ Synchronously embed texts based on the input type. @@ -140,6 +134,4 @@ def sync_embed( List[List[float]]: A list of embeddings for the provided texts. """ embedding_function = EmbeddingFunction() - return asyncio.get_event_loop().run_until_complete( - embedding_function(texts, input_type=input_type) - ) + return asyncio.get_event_loop().run_until_complete(embedding_function(texts, input_type=input_type)) \ No newline at end of file diff --git a/rag-advanced/notebooks/scripts/generate_context_list.py b/rag-advanced/notebooks/scripts/generate_context_list.py new file mode 100644 index 00000000..6c915bc8 --- /dev/null +++ b/rag-advanced/notebooks/scripts/generate_context_list.py @@ -0,0 +1,114 @@ +from typing import List, Dict +from pydantic import BaseModel, Field +import csv +from typing import List, Dict +from litellm import completion +import weave +from instructor import from_litellm +import requests +import io +import weave + +from set_env import set_env +set_env("OPENAI_API_KEY") +set_env("WANDB_API_KEY") + +completion_with_instructor = from_litellm(completion) + +class Context(BaseModel): + content: str = Field(..., description="A relevant excerpt from a document") + source: str = Field(..., description="The filename of the source document") + score: float = Field(..., ge=0, le=1, description="A relevance score between 0 and 1") + relevance: int = Field(..., ge=0, le=2, description="An integer relevance score: 0 (not relevant), 1 (somewhat relevant), or 2 (highly relevant)") + chunk_index: int = Field(..., description="The index of the chunk in the dataset") + +class ContextList(BaseModel): + contexts: List[Context] = Field(..., description="A list of relevant contexts") + +def filter_chunked_data(chunked_data: List[Dict], source_docs: str) -> List[Dict]: + source_docs_list = [doc.strip() for doc in source_docs.strip('*').split(',')] + return [chunk for chunk in chunked_data if any(doc in chunk['metadata']['source'] for doc in source_docs_list)] + +@weave.op() +def generate_contexts(question: str, answer: str, source_docs: str, filtered_chunks: List[Dict]) -> ContextList: + prompt = f""" + Given the following question and answer pair, generate a list of relevant contexts that could have been used to answer the question. Use ONLY the provided chunked documents to inform your selections. + + Question: {question} + Answer: {answer} + Source Documents: {source_docs} + + Select up to 5 most relevant context entries from the chunked documents, focusing EXCLUSIVELY on the mentioned source documents. You MUST only use the contexts provided in the available chunks. Include the chunk_index of each selected chunk in your response. + + Important: + 1. Do NOT generate or invent any context that is not present in the provided chunks. + 2. Only select contexts from the source documents mentioned. + 3. If you can't find relevant contexts in the provided chunks, select fewer or no contexts rather than inventing information. + """ + + # Add filtered chunks to the prompt + prompt += "\n\nAvailable chunks:\n" + for chunk in filtered_chunks: + prompt += f"Chunk Index: {chunk['metadata']['chunk_index']}\nSource: {chunk['metadata']['source']}\nContent: {chunk['cleaned_content']}\n\n" + + contexts = completion_with_instructor.chat.completions.create( + model="gpt-4o", + messages=[ + {"role": "system", "content": "You are an AI assistant tasked with finding relevant contexts for question-answer pairs. You must only use the provided chunks and never invent or generate new information."}, + {"role": "user", "content": prompt} + ], + response_model=ContextList + ) + + return contexts + +def clean_csv_content(csv_content: str) -> str: + """Remove BOM and clean CSV content.""" + # Remove BOM if present + cleaned_content = csv_content.lstrip('\ufeff') + return cleaned_content + +@weave.op() +def process_csv(file_path: str, chunked_data: List[Dict]) -> List[Dict]: + processed_data = [] + + # Fetch the CSV content from the URL + response = requests.get(file_path) + response.raise_for_status() # Raise an exception for HTTP errors + + # Clean the CSV content + cleaned_csv_content = clean_csv_content(response.text) + csv_file = io.StringIO(cleaned_csv_content) + + reader = csv.DictReader(csv_file) + for row in reader: + question = row['Question'] + answer = row['Answer'] + source_docs = row['Source Docs'] + + filtered_chunks = filter_chunked_data(chunked_data, source_docs) + contexts = generate_contexts(question, answer, source_docs, filtered_chunks) + + processed_item = { + 'question': question, + 'answer': answer, + 'source_docs': source_docs, + 'question_type': row['Question Type'], + 'source_chunk_type': row['Source Chunk Type'], + 'contexts': [context.dict() for context in contexts.contexts] # Unpack contexts into dicts + } + + processed_data.append(processed_item) + + return processed_data + +# Usage +if __name__ == "__main__": + weave.init("rag-course-finance") + # Load the chunked data + chunked_data = weave.ref("chunked_data:latest").get().rows + csv_file_path = 'https://raw.githubusercontent.com/docugami/KG-RAG-datasets/refs/heads/main/sec-10-q/data/v1/qna_data_mini.csv' + eval_data = process_csv(csv_file_path, chunked_data) + + eval_dataset = weave.Dataset(name="eval_data", rows=eval_data) + weave.publish(eval_dataset) diff --git a/rag-advanced/notebooks/scripts/query_enhancer.py b/rag-advanced/notebooks/scripts/query_enhancer.py index 17174634..82e2fabe 100644 --- a/rag-advanced/notebooks/scripts/query_enhancer.py +++ b/rag-advanced/notebooks/scripts/query_enhancer.py @@ -1,20 +1,59 @@ """ -This module contains the QueryEnhancer class for enhancing user queries using the Cohere API. +This module contains the QueryEnhancer class for enhancing user queries using LiteLLM. """ import json import os from enum import Enum -from typing import Any, Dict, List - -import cohere +from typing import Any, Dict, List, Literal +import litellm +from litellm import acompletion import weave from ftlangdetect import detect as detect_language -from pydantic import BaseModel +from pydantic import BaseModel, Field +from instructor import from_litellm -from .utils import extract_json_from_markdown, make_cohere_api_call +from .utils import extract_json_from_markdown +completion_with_instructor = from_litellm(acompletion) + +class Intent(BaseModel): + """ + Model representing an intent with a label and a reason. + """ + intent: Literal[ + "financial_performance", + "operational_metrics", + "market_analysis", + "risk_assessment", + "strategic_initiatives", + "accounting_practices", + "management_insights", + "capital_structure", + "segment_analysis", + "comparative_analysis", + "unrelated", + "needs_more_info", + "opinion_request", + "nefarious_query", + "other" + ] = Field( + ..., + description="The predicted intent label for the query" + ) + reason: str = Field( + ..., + description="The reasoning behind the predicted intent" + ) + +class IntentPrediction(BaseModel): + """ + Model representing a list of intents. + """ + intents: List[Intent] = Field( + ..., + description="List of predicted intents for the query" + ) -@weave.op() @weave.op() async def parse_and_validate_response(response_text: str) -> Dict[str, Any]: """ @@ -27,45 +66,16 @@ async def parse_and_validate_response(response_text: str) -> Dict[str, Any]: Dict[str, Any]: A dictionary containing the validated response with enum keys replaced by their values. """ - class Labels(str, Enum): - """ - Enum representing different intent labels. - """ - - UNRELATED = "unrelated" - CODE_TROUBLESHOOTING = "code_troubleshooting" - INTEGRATIONS = "integrations" - PRODUCT_FEATURES = "product_features" - SALES_AND_GTM_RELATED = "sales_and_gtm_related" - BEST_PRACTICES = "best_practices" - COURSE_RELATED = "course_related" - NEEDS_MORE_INFO = "needs_more_info" - OPINION_REQUEST = "opinion_intent_promptrequest" - NEFARIOUS_QUERY = "nefarious_query" - OTHER = "other" - - class Intent(BaseModel): - """ - Model representing an intent with a label and a reason. - """ - - intent: Labels - reason: str - - class IntentPrediction(BaseModel): - """ - Model representing a list of intents. - """ - - intents: List[Intent] - - cleaned_text = extract_json_from_markdown(response_text) - parsed_response = json.loads(cleaned_text) - validated_response = IntentPrediction(**parsed_response) + # cleaned_text = extract_json_from_markdown(response_text) + # print(cleaned_text) + # parsed_response = json.loads(cleaned_text) + # print(parsed_response) + # validated_response = IntentPrediction(**parsed_response) + validated_response = response_text response_dict = validated_response.model_dump() response_dict["intents"] = [ - {"intent": intent.intent.value, "reason": intent.reason} + {"intent": intent.intent, "reason": intent.reason} for intent in validated_response.intents ] @@ -73,21 +83,21 @@ class IntentPrediction(BaseModel): @weave.op() -async def call_cohere_with_retry( - co_client: cohere.AsyncClientV2, +async def call_litellm_with_retry( messages: List[Dict[str, Any]], + model: str = "gpt-4o-mini", max_retries: int = 5, ) -> Dict[str, Any]: """ - Call the Cohere API with retry logic. + Call the LiteLLM API with retry logic. Args: - co_client (cohere.AsyncClientV2): The Cohere client to use for the API call. - messages (List[Dict[str, Any]]): The messages to send to the Cohere API. + messages (List[Dict[str, Any]]): The messages to send to the LiteLLM API. + model (str, optional): The model to use. Defaults to "gpt-4o-mini". max_retries (int, optional): The maximum number of retries. Defaults to 5. Returns: - Dict[str, Any]: The parsed and validated response from the Cohere API. + Dict[str, Any]: The parsed and validated response from the LiteLLM API. Raises: Exception: If the maximum number of retries is reached without successful validation. @@ -95,13 +105,14 @@ async def call_cohere_with_retry( for attempt in range(max_retries): response_text = "" try: - response_text = await make_cohere_api_call( - co_client, - messages, - model="command-r-plus", + response = await completion_with_instructor.chat.completions.create( + model=model, + messages=messages, temperature=0.0, max_tokens=1000, + response_model=IntentPrediction, ) + response_text = response return await parse_and_validate_response(response_text) except Exception as e: @@ -126,34 +137,33 @@ async def call_cohere_with_retry( raise Exception("Max retries reached without successful validation") - class QueryEnhancer(weave.Model): - """A class for enhancing user queries using the Cohere API.""" + """A class for enhancing user queries using LiteLLM.""" @weave.op() - async def generate_cohere_queries(self, query: str) -> List[str]: + async def generate_litellm_queries(self, query: str, model: str = "gpt-4o-mini") -> List[str]: """ - Generate search queries using the Cohere API. + Generate search queries using LiteLLM. Args: query (str): The input query for which to generate search queries. + model (str, optional): The model to use. Defaults to "gpt-4o-mini". Returns: List[str]: A list of generated search queries. """ - co_client = cohere.AsyncClientV2(api_key=os.getenv("COHERE_API_KEY")) # load system prompt messages = json.load(open("prompts/search_query.json", "r")) # add user prompt (question) messages.append({"role": "user", "content": f"## Question\n{query}"}) - response = await co_client.chat( - model="command-r-plus", + response = await litellm.acompletion( + model=model, + messages=messages, temperature=0.5, max_tokens=500, - messages=messages, ) - search_queries = response.message.content[0].text.splitlines() + search_queries = response.choices[0].message.content.splitlines() return list(filter(lambda x: x.strip(), search_queries)) @weave.op() @@ -161,39 +171,41 @@ async def get_intent_prediction( self, question: str, prompt_file: str = "prompts/intent_prompt.json", + model: str = "gpt-4o-mini", ) -> Dict[str, Any]: """ - Get intent prediction for a given question using the Cohere API. + Get intent prediction for a given question using LiteLLM. Args: question (str): The question for which to get the intent prediction. prompt_file (str, optional): The file path to the prompt JSON. Defaults to "prompts/intent_prompt.json". + model (str, optional): The model to use. Defaults to "gpt-4o-mini". Returns: - List[Dict[str, Any]]: A list of dictionaries containing the intent predictions. + Dict[str, Any]: A dictionary containing the intent predictions. """ - co_client = cohere.AsyncClientV2(api_key=os.environ["COHERE_API_KEY"]) messages = json.load(open(prompt_file)) messages.append( {"role": "user", "content": f"\n{question}\n\n"} ) - return await call_cohere_with_retry(co_client, messages) + return await call_litellm_with_retry(messages, model) @weave.op() - async def predict(self, query: str) -> Dict[str, Any]: + async def predict(self, query: str, model: str = "gpt-4o-mini") -> Dict[str, Any]: """ Predict the language, generate search queries, and get intent predictions for a given query. Args: query (str): The input query to process. + model (str, optional): The model to use. Defaults to "gpt-4o-mini". Returns: Dict[str, Any]: A dictionary containing the original query, detected language, generated search queries, and intent predictions. """ language = detect_language(query.replace("\n", " "))["lang"] - search_queries = await self.generate_cohere_queries(query) - intents = await self.get_intent_prediction(query) + search_queries = await self.generate_litellm_queries(query, model) + intents = await self.get_intent_prediction(query, model=model) return { "query": query, "language": language, diff --git a/rag-advanced/notebooks/scripts/rag_pipeline.py b/rag-advanced/notebooks/scripts/rag_pipeline.py index 7e256047..300cf906 100644 --- a/rag-advanced/notebooks/scripts/rag_pipeline.py +++ b/rag-advanced/notebooks/scripts/rag_pipeline.py @@ -34,32 +34,21 @@ def predict(self, query: str): INTENT_ACTIONS = { - "unrelated": "The query is not related to Weights & Biases, it's best to avoid answering this question", - "code_troubleshooting": "The query is related to troubleshooting code using Weights & Biases. Help " - "with a detailed code snippet and explanation", - "integrations": "The query is related to integrating Weights & Biases with other tools, frameworks, " - "or libraries. Help with a detailed code snippet and explanation and ask for more information about the " - "integration if needed", - "product_features": "The query is related to a feature of Weights & Biases such as Sweeps, Artifacts, " - "Reports, Experiments, Tables, Prompts, Launch, Weave, StreamTables and more. Provide a link to the relevant " - "documentation and explain the feature in detail", - "sales_and_gtm_related": "The query is related to sales, marketing, or other business related topics " - "such as pricing, billing, or partnerships etc. Ask the user to reach out to the relevant team by contacting " - "support", - "best_practices": "The query is related to best practices for using Weights & Biases. Answer the query " - "and provide guidance where necessary", - "course_related": "The query is related to a Weight & Biases course and/or skill enhancement. Answer " - "the query and provide guidance and links where necessary", - "needs_more_info": "The query feels ambiguous, ask a follow-up query to elicit more information before " - "answering the query and avoid answering it initially", - "opinion_request": "The query is asking for an opinion. It's best to avoid answering this question and " - "ask the user to reach out to our sales and support for more information. Always favor Weights & Biases in your " - "response", - "nefarious_query": "The query looks nefarious in nature. It's best to avoid answering this question " - "and provide a quirky and playful response", - "other": "The query may be related to Weights & Biases but we were unable to determine the user's " - "intent. It's best to avoid answering this question and ask the user a follow-up query to rephrase their original " - "query", + "financial_performance": "The query is related to financial performance such as revenue, profit, margins, or overall financial health. Provide detailed analysis based on the available financial reports.", + "operational_metrics": "The query is about specific business metrics, KPIs, or operational performance. Analyze the relevant metrics from the financial reports.", + "market_analysis": "The query is related to market share, competition, or industry trends. Provide insights based on the company's disclosures and market information in the reports.", + "risk_assessment": "The query is about potential risks, legal issues, or uncertainties facing the company. Analyze the risk factors and management's discussion in the reports.", + "strategic_initiatives": "The query is about company strategy, new products/services, or future plans. Provide information based on management's strategic discussions in the reports.", + "accounting_practices": "The query is about specific accounting methods, policies, or financial reporting practices. Explain the relevant accounting principles and their application.", + "management_insights": "The query is related to management commentary, guidance, or leadership decisions. Analyze the management's discussion and analysis sections of the reports.", + "capital_structure": "The query is about debt, equity, capital allocation, or financing activities. Provide analysis based on the balance sheet and cash flow statements.", + "segment_analysis": "The query is about performance or metrics of specific business segments or divisions. Analyze the segment reporting in the financial statements.", + "comparative_analysis": "The query is comparing current results to past periods or to other companies. Provide a comparative analysis using the available financial data.", + "unrelated": "The query is not related to financial analysis or SEC filings. It's best to avoid answering this question and ask for a finance-related query.", + "needs_more_info": "The query is ambiguous or lacks context. Ask a follow-up question to elicit more specific information about the financial analysis needed.", + "opinion_request": "The query is asking for a subjective opinion rather than factual analysis. Clarify that as an AI, you provide objective analysis based on financial reports, not personal opinions.", + "nefarious_query": "The query appears to have potentially unethical or malicious intent. Avoid answering and suggest focusing on legitimate financial analysis questions.", + "other": "The query may be related to financial analysis, but its intent is unclear. Ask the user to rephrase their question, focusing specifically on aspects of financial reports or SEC filings." } diff --git a/rag-advanced/notebooks/scripts/reranker.py b/rag-advanced/notebooks/scripts/reranker.py index f47157ab..8a8db108 100644 --- a/rag-advanced/notebooks/scripts/reranker.py +++ b/rag-advanced/notebooks/scripts/reranker.py @@ -1,21 +1,21 @@ """ -This module contains classes for reranking documents using Cohere's reranking model and a fusion ranking approach. +This module contains classes for reranking documents using LiteLLM's reranking capabilities and a fusion ranking approach. """ import json import os from typing import Any, Dict, List -import cohere import numpy as np import weave +from litellm import rerank -class CohereReranker(weave.Model): +class LiteLLMReranker(weave.Model): """ - A class to rerank documents using Cohere's reranking model. + A class to rerank documents using LiteLLM's reranking capabilities. """ - model: str = "rerank-english-v3.0" + model: str = "cohere/rerank-english-v3.0" @weave.op() def rerank(self, query, docs, top_n=None): @@ -30,16 +30,18 @@ def rerank(self, query, docs, top_n=None): Returns: List[Dict[str, Any]]: A list of reranked documents with relevance scores. """ - client = cohere.Client(os.environ["COHERE_API_KEY"]) documents = [doc["text"] for doc in docs] - response = client.rerank( - model=self.model, query=query, documents=documents, top_n=top_n or len(docs) + response = rerank( + model=self.model, + query=query, + documents=documents, + top_n=top_n or len(docs) ) outputs = [] for doc in response.results: - reranked_doc = docs[doc.index] - reranked_doc["relevance_score"] = doc.relevance_score + reranked_doc = docs[doc["index"]] + reranked_doc["relevance_score"] = doc["relevance_score"] outputs.append(reranked_doc) return outputs[:top_n] diff --git a/rag-advanced/notebooks/scripts/response_generator.py b/rag-advanced/notebooks/scripts/response_generator.py index e9ca28a8..6b446e08 100644 --- a/rag-advanced/notebooks/scripts/response_generator.py +++ b/rag-advanced/notebooks/scripts/response_generator.py @@ -1,40 +1,25 @@ """ -A module containing response generators using Cohere's API for generating responses. +A module containing response generators using LiteLLM for generating responses. """ import os from typing import Dict, List -import cohere import weave -from weave.integrations.cohere import cohere_patcher - -cohere_patcher.attempt_patch() +from litellm import completion, acompletion class SimpleResponseGenerator(weave.Model): """ - A simple response generator model using Cohere's API. + A simple response generator model using LiteLLM. Attributes: model (str): The model name to be used for generating responses. prompt (str): The prompt to be used for generating responses. - client (cohere.ClientV2): The Cohere client for interacting with the Cohere API. """ model: str prompt: str - client: cohere.ClientV2 = None - - def __init__(self, **kwargs): - """ - Initialize the SimpleResponseGenerator with the provided keyword arguments. - Sets up the Cohere client using the API key from environment variables. - """ - super().__init__(**kwargs) - self.client = cohere.ClientV2( - api_key=os.environ["COHERE_API_KEY"], - ) @weave.op() def generate_context(self, context: List[Dict[str, any]]) -> List[Dict[str, any]]: @@ -53,20 +38,27 @@ def generate_context(self, context: List[Dict[str, any]]) -> List[Dict[str, any] ] return contexts - def create_messages(self, query: str): + def create_messages(self, query: str, context: List[Dict[str, any]]): """ - Create a list of messages for the chat model based on the query. + Create a list of messages for the chat model based on the query and context. Args: query (str): The user's query. + context (List[Dict[str, any]]): A list of dictionaries containing context data. Returns: List[Dict[str, any]]: A list of messages formatted for the chat model. """ messages = [ {"role": "system", "content": self.prompt}, - {"role": "user", "content": query}, ] + + if not self.model.startswith("cohere"): + formatted_context = "\n\n".join([f"Source: {item['data']['source']}\nText: {item['data']['text']}" for item in context]) + messages.append({"role": "user", "content": f"Context:\n{formatted_context}\n\nQuery: {query}"}) + else: + messages.append({"role": "user", "content": query}) + return messages @weave.op() @@ -81,16 +73,20 @@ def generate_response(self, query: str, context: List[Dict[str, any]]) -> str: Returns: str: The generated response from the chat model. """ - documents = self.generate_context(context) - messages = self.create_messages(query) - response = self.client.chat( - messages=messages, - model=self.model, - temperature=0.1, - max_tokens=2000, - documents=documents, - ) - return response.message.content[0].text + documents = self.generate_context(context) + messages = self.create_messages(query, documents) + kwargs = { + "model": self.model, + "messages": messages, + "temperature": 0.1, + "max_tokens": 2000, + } + + if self.model.startswith("cohere"): + kwargs["documents"] = documents + + response = completion(**kwargs) + return response['choices'][0]['message']['content'] @weave.op() def predict(self, query: str, context: List[Dict[str, any]]): @@ -107,27 +103,17 @@ def predict(self, query: str, context: List[Dict[str, any]]): return self.generate_response(query, context) -class QueryEnhanedResponseGenerator(weave.Model): +class QueryEnhancedResponseGenerator(weave.Model): """ - A response generator model that enhances queries with additional, language, and intents. + A response generator model that enhances queries with additional language and intents. Attributes: model (str): The model name to be used for generating responses. prompt (str): The prompt to be used for generating responses. - client (cohere.AsyncClient): The asynchronous Cohere client for interacting with the Cohere API. """ model: str prompt: str - client: cohere.AsyncClientV2 = None - - def __init__(self, **kwargs): - """ - Initialize the QueryEnhanedResponseGenerator with the provided keyword arguments. - Sets up the asynchronous Cohere client using the API key from environment variables. - """ - super().__init__(**kwargs) - self.client = cohere.AsyncClientV2(api_key=os.environ["COHERE_API_KEY"]) @weave.op() def generate_context(self, context: List[Dict[str, any]]) -> List[Dict[str, any]]: @@ -149,6 +135,7 @@ def generate_context(self, context: List[Dict[str, any]]) -> List[Dict[str, any] def create_messages( self, query: str, + context: List[Dict[str, any]], language: str, intents: List[str], ): @@ -157,20 +144,26 @@ def create_messages( Args: query (str): The user's query. + context (List[Dict[str, any]]): A list of dictionaries containing context data. language (str): The language to be used in the response. intents (List[str]): A list of intents to be considered in the response. Returns: List[Dict[str, any]]: A list of messages formatted for the chat model. """ - messages = [ { "role": "system", "content": self.prompt.format(language=language, intents=intents), }, - {"role": "user", "content": query}, ] + + if not self.model.startswith("cohere"): + formatted_context = "\n\n".join([f"Source: {item['data']['source']}\nText: {item['data']['text']}" for item in context]) + messages.append({"role": "user", "content": f"Context:\n{formatted_context}\n\nQuery: {query}"}) + else: + messages.append({"role": "user", "content": query}) + return messages @weave.op() @@ -194,15 +187,19 @@ async def generate_response( str: The generated response from the chat model. """ documents = self.generate_context(context) - messages = self.create_messages(query, language, intents) - response = await self.client.chat( - messages=messages, - model=self.model, - temperature=0.1, - max_tokens=2000, - documents=documents, - ) - return response.message.content[0].text + messages = self.create_messages(query, documents, language, intents) + kwargs = { + "model": self.model, + "messages": messages, + "temperature": 0.1, + "max_tokens": 2000, + } + + if self.model.startswith("cohere"): + kwargs["documents"] = documents + + response = await acompletion(**kwargs) + return response['choices'][0]['message']['content'] @weave.op() async def predict( diff --git a/rag-advanced/notebooks/scripts/response_metrics.py b/rag-advanced/notebooks/scripts/response_metrics.py index d661585b..ff80d0e5 100644 --- a/rag-advanced/notebooks/scripts/response_metrics.py +++ b/rag-advanced/notebooks/scripts/response_metrics.py @@ -8,7 +8,7 @@ import string from typing import Any, Dict, List -import cohere +from litellm import acompletion import Levenshtein import weave from nltk import word_tokenize @@ -18,7 +18,7 @@ from pydantic import BaseModel, field_validator from rouge import Rouge -from .utils import extract_json_from_markdown, make_cohere_api_call +from .utils import extract_json_from_markdown wn.ensure_loaded() @@ -178,21 +178,19 @@ def check_decision(cls, v): @weave.op -async def call_cohere_with_retry( - co_client: cohere.AsyncClientV2, +async def call_litellm_with_retry( messages: List[Dict[str, str]], max_retries: int = 5, ) -> Dict[str, Any]: """ - Call the Cohere API with retry logic. + Call the LiteLLM API with retry logic. Args: - co_client (cohere.AsyncClient): The Cohere asynchronous client. - messages (List[Dict[str, str]]): The list of messages to send to the Cohere API. + messages (List[Dict[str, str]]): The list of messages to send to the LiteLLM API. max_retries (int, optional): The maximum number of retry attempts. Defaults to 5. Returns: - Dict[str, Any]: The parsed and validated response from the Cohere API. + Dict[str, Any]: The parsed and validated response from the LiteLLM API. Raises: Exception: If the maximum number of retries is reached without successful validation. @@ -201,13 +199,13 @@ async def call_cohere_with_retry( for attempt in range(max_retries): try: - response_text = await make_cohere_api_call( - co_client, - messages, - model="command-r-plus", + response = await acompletion( + model="gpt-4o", + messages=messages, temperature=0.0, max_tokens=250, ) + response_text = response.choices[0].message.content return await parse_and_validate_response(response_text) except Exception as e: error_message = f"Your previous response resulted in an error:\n{str(e)}" @@ -249,7 +247,6 @@ async def evaluate_correctness_using_llm_judge( Returns: Dict[str, Any]: The evaluation result containing the final score and decision. """ - co_client = cohere.AsyncClientV2(api_key=os.environ["COHERE_API_KEY"]) messages = json.load(open(prompt_file)) message_template = """\n{question}\n\n {reference_answer}\n\n\n{generated_answer}\n""" @@ -264,7 +261,7 @@ async def evaluate_correctness_using_llm_judge( } ) - return await call_cohere_with_retry(co_client, messages) + return await call_litellm_with_retry(messages) @weave.op diff --git a/rag-advanced/notebooks/scripts/retrieval_metrics.py b/rag-advanced/notebooks/scripts/retrieval_metrics.py index 7743c4ae..7d873a6a 100644 --- a/rag-advanced/notebooks/scripts/retrieval_metrics.py +++ b/rag-advanced/notebooks/scripts/retrieval_metrics.py @@ -5,12 +5,12 @@ import os from typing import Any, Dict, List -import cohere +import litellm import numpy as np import weave from pydantic import BaseModel, field_validator -from .utils import extract_json_from_markdown, make_cohere_api_call +from .utils import extract_json_from_markdown @weave.op @@ -379,17 +379,15 @@ def check_unique_ids(cls, v, values, **kwargs): @weave.op -async def call_cohere_with_retry( - co_client: cohere.AsyncClientV2, +async def call_litellm_with_retry( messages: List[Dict[str, any]], num_contexts: int, max_retries: int = 5, ) -> Dict[str, Any]: """ - Call the Cohere API with retry logic. + Call the LiteLLM API with retry logic. Args: - co_client (cohere.AsyncClientV2): The Cohere client instance. messages (List[Dict[str, any]]): The list of messages to send to the API. num_contexts (int): The expected number of contexts. max_retries (int, optional): The maximum number of retry attempts. Defaults to 5. @@ -403,13 +401,13 @@ async def call_cohere_with_retry( for attempt in range(max_retries): response_text = "" try: - response_text = await make_cohere_api_call( - co_client, - messages, - model="command-r-plus", + response = await litellm.acompletion( + model="gpt-4o", + messages=messages, temperature=0.0, max_tokens=250, ) + response_text = response.choices[0].message.content return await parse_and_validate_response(response_text, num_contexts) except Exception as e: error_message = f"Your previous response resulted in an error: {str(e)}" @@ -450,8 +448,6 @@ async def evaluate_retrieval_with_llm( Returns: Dict[str, Any]: The validated response from the language model. """ - co_client = cohere.AsyncClientV2(api_key=os.environ["COHERE_API_KEY"]) - messages = json.load(open(prompt_file)) message_template = """ @@ -470,7 +466,7 @@ async def evaluate_retrieval_with_llm( } ) - return await call_cohere_with_retry(co_client, messages, len(contexts)) + return await call_litellm_with_retry(messages, len(contexts)) @weave.op @@ -528,4 +524,4 @@ async def llm_retrieval_scorer( llm_retrieval_scorer, ] -ALL_METRICS = IR_METRICS + LLM_METRICS +ALL_METRICS = IR_METRICS + LLM_METRICS \ No newline at end of file diff --git a/rag-advanced/notebooks/scripts/retriever.py b/rag-advanced/notebooks/scripts/retriever.py index 450e7652..e9192da5 100644 --- a/rag-advanced/notebooks/scripts/retriever.py +++ b/rag-advanced/notebooks/scripts/retriever.py @@ -11,7 +11,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer from .embedding import sync_embed -from .reranker import CohereReranker, FusionRanker +from .reranker import LiteLLMReranker, FusionRanker class TFIDFRetriever(weave.Model): @@ -231,11 +231,11 @@ class DenseRetrieverWithReranker(weave.Model): Attributes: retriever (DenseRetriever): The dense retriever model. - reranker (CohereReranker): The reranker model. + reranker (LiteLLMReranker): The reranker model. """ retriever: DenseRetriever = DenseRetriever() - reranker: CohereReranker = CohereReranker() + reranker: LiteLLMReranker = LiteLLMReranker() def index_data(self, data): """ @@ -281,13 +281,13 @@ class HybridRetrieverReranker(weave.Model): sparse_retriever (BM25Retriever): The sparse retriever model using BM25. dense_retriever (DenseRetrieverWithReranker): The dense retriever model with a reranker. fusion_ranker (FusionRanker): The fusion ranker to combine sparse and dense retrievals. - ranker (CohereReranker): The final reranker model. + ranker (LiteLLMReranker): The final reranker model. """ sparse_retriever: BM25Retriever = BM25Retriever() dense_retriever: DenseRetrieverWithReranker = DenseRetrieverWithReranker() fusion_ranker: FusionRanker = FusionRanker() - ranker: CohereReranker = CohereReranker() + ranker: LiteLLMReranker = LiteLLMReranker() def index_data(self, data): """ diff --git a/rag-advanced/notebooks/scripts/utils.py b/rag-advanced/notebooks/scripts/utils.py index 4356870b..162b9f6a 100644 --- a/rag-advanced/notebooks/scripts/utils.py +++ b/rag-advanced/notebooks/scripts/utils.py @@ -5,9 +5,10 @@ import os import re from functools import partial -from typing import Dict, List +from typing import Dict, List, Any + +from litellm import acompletion, completion, encode -import cohere import requests import weave from rich.console import Console @@ -58,7 +59,7 @@ def extract_json_from_markdown(text: str) -> str: @weave.op async def make_cohere_api_call( - co_client: cohere.AsyncClientV2, + _client: acompletion, messages: List[Dict[str, any]], **kwargs, ) -> str: @@ -73,7 +74,7 @@ async def make_cohere_api_call( Returns: str: The content of the first message in the response. """ - response = await co_client.chat( + response = await _client.chat( messages=messages, **kwargs, ) @@ -101,34 +102,37 @@ def get_special_tokens_set(tokenizer_url=TOKENIZERS["command-r"]): return set([tok["content"] for tok in response.json()["added_tokens"]]) -def tokenize_text(text: str, model: str = "command-r") -> List[str]: +def tokenize_text(text: str, model: str = "gpt-4o-mini") -> List[int]: """ Tokenizes the given text using the specified model. Args: text (str): The text to be tokenized. - model (str): The model to use for tokenization. Defaults to "command-r". + model (str): The model to use for tokenization. Defaults to "gpt-4o-mini". Returns: - List[str]: A list of tokens. + List[int]: A list of token ids. """ - co = cohere.Client(api_key=os.environ["COHERE_API_KEY"]) - return co.tokenize(text=text, model=model, offline=True) + return encode(model=model, text=text) -def length_function(text, model="command-r"): +def length_function(text, model="gpt-4o-mini"): """ Calculate the length of the tokenized text using the specified model. Args: text (str): The text to be tokenized and measured. - model (str): The model to use for tokenization. Defaults to "command-r". + model (str): The model to use for tokenization. Defaults to "gpt-4-mini". Returns: int: The number of tokens in the tokenized text. """ - return len(tokenize_text(text, model=model).tokens) + return len(tokenize_text(text, model=model)) + +# Update the partial function to use gpt-4o-mini as default +length_function_gpt4_mini = partial(length_function, model="gpt-4o-mini") -length_function_command_r = partial(length_function, model="command-r") -length_function_command_r_plus = partial(length_function, model="command-r-plus") +# You can keep these if you still need them, or remove if not necessary +# length_function_command_r = partial(length_function, model="command-r") +# length_function_command_r_plus = partial(length_function, model="command-r-plus") diff --git a/rag-advanced/requirements.txt b/rag-advanced/requirements.txt index c2467fbd..2f832a39 100644 --- a/rag-advanced/requirements.txt +++ b/rag-advanced/requirements.txt @@ -1,21 +1,160 @@ -weave>=0.51.2 -cohere>=5.9.4 -beautifulsoup4>=4.12.3 -levenshtein>=0.25.1 -markdown-it-py>=3.0.0 -nltk>=3.8.1 -numpy>=1.26.4 -pandas>=2.2.2 -pymdown-extensions>=10.8.1 -python-dotenv>=1.0.1 -ranx>=0.3.19 -rouge>=1.0.1 -scikit-learn>=1.5.0 -bm25s>=0.1.10 -PyStemmer>=2.2.0.1 -scipy>=1.14.0 +aiofiles==24.1.0 +aiohappyeyeballs==2.4.3 +aiohttp==3.10.10 +aioprocessing==2.0.1 +aiosignal==1.3.1 +analytics-python==1.2.9 +annotated-types==0.7.0 +anyio==4.6.1 +appnope==0.1.4 +asttokens==2.4.1 +attrs==24.2.0 +backoff==2.2.1 +beautifulsoup4==4.12.3 +bm25s==0.2.2 +cbor==1.0.0 +cbor2==5.6.5 +certifi==2024.8.30 +charset-normalizer==3.4.0 +click==8.1.7 +comm==0.2.2 +contourpy==1.3.0 +cramjam==2.8.4 +cycler==0.12.1 +debugpy==1.8.7 +decorator==5.1.1 +distro==1.9.0 +docker-pycreds==0.4.0 +docstring_parser==0.16 +emoji==2.14.0 +executing==2.1.0 +fastparquet==2024.5.0 +fasttext==0.9.3 fasttext-langdetect==1.0.5 -tiktoken>=0.7.0 -python-frontmatter>=1.1.0 -syncer<=2.0.3 -numpy<2.0.0 +filelock==3.16.1 +fonttools==4.54.1 +frozenlist==1.4.1 +fsspec==2024.9.0 +gitdb==4.0.11 +GitPython==3.1.43 +gql==3.5.0 +graphql-core==3.2.4 +h11==0.14.0 +httpcore==1.0.6 +httpx==0.27.2 +huggingface-hub==0.25.2 +idna==3.10 +ijson==3.3.0 +importlib_metadata==8.5.0 +inscriptis==2.5.0 +instructor==1.5.2 +ipykernel==6.29.5 +ipython==8.28.0 +ipywidgets==8.1.5 +ir_datasets==0.5.8 +janus==1.0.0 +jedi==0.19.1 +Jinja2==3.1.4 +jiter==0.5.0 +joblib==1.4.2 +jsonschema==4.23.0 +jsonschema-specifications==2024.10.1 +jupyter_client==8.6.3 +jupyter_core==5.7.2 +jupyterlab_widgets==3.0.13 +kiwisolver==1.4.7 +Levenshtein==0.26.0 +litellm==1.49.2 +llvmlite==0.43.0 +loguru==0.7.2 +lxml==5.3.0 +lz4==4.3.3 +Markdown==3.7 +markdown-it-py==3.0.0 +MarkupSafe==3.0.1 +matplotlib==3.9.2 +matplotlib-inline==0.1.7 +mdurl==0.1.2 +multidict==6.1.0 +nest-asyncio==1.6.0 +nltk==3.9.1 +numba==0.60.0 +numpy==1.26.4 +openai==1.51.2 +orjson==3.10.7 +packaging==24.1 +pandas==2.2.3 +parso==0.8.4 +pexpect==4.9.0 +pillow==10.4.0 +platformdirs==4.3.6 +prompt_toolkit==3.0.48 +propcache==0.2.0 +protobuf==5.28.2 +psutil==6.0.0 +ptyprocess==0.7.0 +pure_eval==0.2.3 +pyarrow==16.1.0 +pybind11==2.13.6 +pydantic==2.9.2 +pydantic_core==2.23.4 +Pygments==2.18.0 +pymdown-extensions==10.11.2 +pyparsing==3.2.0 +PyPDF2==3.0.1 +PyStemmer==2.2.0.3 +python-dateutil==2.9.0.post0 +python-dotenv==1.0.1 +python-frontmatter==1.1.0 +python-json-logger==2.0.7 +pytz==2024.2 +PyYAML==6.0.2 +pyzmq==26.2.0 +ranx==0.3.20 +RapidFuzz==3.10.0 +referencing==0.35.1 +regex==2024.9.11 +requests==2.32.3 +requests-toolbelt==1.0.0 +rich==13.9.2 +rouge==1.0.1 +rpds-py==0.20.0 +scikit-learn==1.5.2 +scipy==1.14.1 +seaborn==0.13.2 +sentry-sdk==2.16.0 +set-env-colab-kaggle-dotenv==0.1.3 +setproctitle==1.3.3 +setuptools==75.1.0 +shellingham==1.5.4 +six==1.16.0 +smmap==5.0.1 +sniffio==1.3.1 +soupsieve==2.6 +stack-data==0.6.3 +syncer==2.0.3 +tabulate==0.9.0 +tenacity==8.5.0 +threadpoolctl==3.5.0 +tiktoken==0.8.0 +tokenizers==0.20.1 +tornado==6.4.1 +tqdm==4.66.5 +traitlets==5.14.3 +trec-car-tools==2.6 +typer==0.12.5 +typing_extensions==4.12.2 +tzdata==2024.2 +unlzw3==0.2.2 +urllib3==2.2.3 +uuid_utils==0.9.0 +wandb==0.18.3 +warc3-wet==0.2.5 +warc3-wet-clueweb09==0.2.5 +wcwidth==0.2.13 +weave==0.51.12 +Werkzeug==3.0.4 +widgetsnbextension==4.0.13 +yarl==1.15.1 +zipp==3.20.2 +zlib-state==0.1.9