From f0a574ba5c0e5d6d5aecba4dd81c4d7996819af1 Mon Sep 17 00:00:00 2001 From: Shreya Mishra <59368657+shrey2003@users.noreply.github.com> Date: Wed, 11 Mar 2026 07:22:02 -0700 Subject: [PATCH 1/9] Deleted devrev_search.ipynb --- devrev_search.ipynb | 1142 ------------------------------------------- 1 file changed, 1142 deletions(-) delete mode 100644 devrev_search.ipynb diff --git a/devrev_search.ipynb b/devrev_search.ipynb deleted file mode 100644 index 11d47a8..0000000 --- a/devrev_search.ipynb +++ /dev/null @@ -1,1142 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# DevRev Search Dataset\n", - "\n", - "Loading and exploring the `devrev/search` dataset from Hugging Face.\n", - "\n", - "**Dataset Structure:**\n", - "- `annotated_queries` — Queries paired with annotated (golden) article chunks\n", - "- `knowledge_base` — Article chunks from DevRev's customer-facing support documentation\n", - "- `test_queries` — Held-out queries used for evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "/Users/prakharagarwal/devrev/devrev-search-bench/.venv/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "from datasets import load_dataset\n", - "import pandas as pd" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Load Annotated Queries\n", - "Queries paired with annotated (golden) article chunks for training/validation." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Generating train split: 100%|██████████| 291/291 [00:00<00:00, 36370.05 examples/s]" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "DatasetDict({\n", - " train: Dataset({\n", - " features: ['query_id', 'query', 'retrievals'],\n", - " num_rows: 291\n", - " })\n", - "})\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "# Load annotated queries\n", - "annotated_queries = load_dataset(\"devrev/search\", \"annotated_queries\")\n", - "print(annotated_queries)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
| \n", - " | query_id | \n", - "query | \n", - "retrievals | \n", - "
|---|---|---|---|
| 0 | \n", - "0ae94217-c6a0-4895-83a2-841a95f01637 | \n", - "create DevRev ticket from Microsoft Teams | \n", - "[{'id': 'ART-4216_KNOWLEDGE_NODE-26', 'text': ... | \n", - "
| 1 | \n", - "d0b209b3-6cea-46d8-bfac-bd0e286ea21b | \n", - "workflow builder auto close ticket after 48 ho... | \n", - "[{'id': 'ART-2012_KNOWLEDGE_NODE-24', 'text': ... | \n", - "
| 2 | \n", - "40c1aa6f-cd21-46ab-8f6f-76fdc267b584 | \n", - "automated reminder to customer ticket will be ... | \n", - "[{'id': 'ART-3068_KNOWLEDGE_NODE-24', 'text': ... | \n", - "
| 3 | \n", - "e47d883f-b712-4f98-bd06-14ade143e3c2 | \n", - "connect Bitbucket account to DevRev account | \n", - "[{'id': 'ART-2030_KNOWLEDGE_NODE-27', 'text': ... | \n", - "
| 4 | \n", - "2e6f9413-15ac-4974-a380-7aa22fc98a61 | \n", - "use of workflows in DevRev | \n", - "[{'id': 'ART-1961_KNOWLEDGE_NODE-28', 'text': ... | \n", - "
| \n", - " | query_id | \n", - "query | \n", - "
|---|---|---|
| 0 | \n", - "a97f93d2-410a-431f-ae9a-1e23ed35d74c | \n", - "end customer organization name not appearing i... | \n", - "
| 1 | \n", - "7dd7e2b4-9349-4535-8007-1d706e0fabff | \n", - "Android SDK session generated with Unknown user | \n", - "
| 2 | \n", - "4bc92187-cdaa-4c20-b189-abd1672e5a71 | \n", - "email reply received on wrong ticket | \n", - "
| 3 | \n", - "4d9878e8-f746-4df5-8bf6-f9444989b385 | \n", - "manage access and privileges in DevRev | \n", - "
| 4 | \n", - "483151ec-aff4-4569-b3df-651f578b61d8 | \n", - "SSO setup SAML IDP metadata connection string ... | \n", - "
| \n", - " | id | \n", - "text | \n", - "title | \n", - "
|---|---|---|---|
| 0 | \n", - "ART-17711_KNOWLEDGE_NODE-0 | \n", - "b'We ran into a case where an AirSync was star... | \n", - "Sync fails when original sync owners loses per... | \n", - "
| 1 | \n", - "ART-17711_KNOWLEDGE_NODE-1 | \n", - "access.\\n\\nOnce Person A was re-added with the... | \n", - "Sync fails when original sync owners loses per... | \n", - "
| 2 | \n", - "ART-17650_KNOWLEDGE_NODE-0 | \n", - "b\"American cybersecurity leader unifies securi... | \n", - "American cybersecurity leader unifies security... | \n", - "
| 3 | \n", - "ART-17650_KNOWLEDGE_NODE-1 | \n", - "DevRev\\n======================================... | \n", - "American cybersecurity leader unifies security... | \n", - "
| 4 | \n", - "ART-17650_KNOWLEDGE_NODE-2 | \n", - "solutions help organisations build and deploy ... | \n", - "American cybersecurity leader unifies security... | \n", - "
| \n", + " | query_id | \n", + "query | \n", + "retrievals | \n", + "
|---|---|---|---|
| 0 | \n", + "0ae94217-c6a0-4895-83a2-841a95f01637 | \n", + "create DevRev ticket from Microsoft Teams | \n", + "[{'id': 'ART-4216_KNOWLEDGE_NODE-26', 'text': ... | \n", + "
| 1 | \n", + "d0b209b3-6cea-46d8-bfac-bd0e286ea21b | \n", + "workflow builder auto close ticket after 48 ho... | \n", + "[{'id': 'ART-2012_KNOWLEDGE_NODE-24', 'text': ... | \n", + "
| 2 | \n", + "40c1aa6f-cd21-46ab-8f6f-76fdc267b584 | \n", + "automated reminder to customer ticket will be ... | \n", + "[{'id': 'ART-3068_KNOWLEDGE_NODE-24', 'text': ... | \n", + "
| 3 | \n", + "e47d883f-b712-4f98-bd06-14ade143e3c2 | \n", + "connect Bitbucket account to DevRev account | \n", + "[{'id': 'ART-2030_KNOWLEDGE_NODE-27', 'text': ... | \n", + "
| 4 | \n", + "2e6f9413-15ac-4974-a380-7aa22fc98a61 | \n", + "use of workflows in DevRev | \n", + "[{'id': 'ART-1961_KNOWLEDGE_NODE-28', 'text': ... | \n", + "
| \n", + " | query_id | \n", + "query | \n", + "
|---|---|---|
| 0 | \n", + "a97f93d2-410a-431f-ae9a-1e23ed35d74c | \n", + "end customer organization name not appearing i... | \n", + "
| 1 | \n", + "7dd7e2b4-9349-4535-8007-1d706e0fabff | \n", + "Android SDK session generated with Unknown user | \n", + "
| 2 | \n", + "4bc92187-cdaa-4c20-b189-abd1672e5a71 | \n", + "email reply received on wrong ticket | \n", + "
| 3 | \n", + "4d9878e8-f746-4df5-8bf6-f9444989b385 | \n", + "manage access and privileges in DevRev | \n", + "
| 4 | \n", + "483151ec-aff4-4569-b3df-651f578b61d8 | \n", + "SSO setup SAML IDP metadata connection string ... | \n", + "
| \n", + " | id | \n", + "text | \n", + "title | \n", + "
|---|---|---|---|
| 0 | \n", + "ART-17711_KNOWLEDGE_NODE-0 | \n", + "b'We ran into a case where an AirSync was star... | \n", + "Sync fails when original sync owners loses per... | \n", + "
| 1 | \n", + "ART-17711_KNOWLEDGE_NODE-1 | \n", + "access.\\n\\nOnce Person A was re-added with the... | \n", + "Sync fails when original sync owners loses per... | \n", + "
| 2 | \n", + "ART-17650_KNOWLEDGE_NODE-0 | \n", + "b\"American cybersecurity leader unifies securi... | \n", + "American cybersecurity leader unifies security... | \n", + "
| 3 | \n", + "ART-17650_KNOWLEDGE_NODE-1 | \n", + "DevRev\\n======================================... | \n", + "American cybersecurity leader unifies security... | \n", + "
| 4 | \n", + "ART-17650_KNOWLEDGE_NODE-2 | \n", + "solutions help organisations build and deploy ... | \n", + "American cybersecurity leader unifies security... | \n", + "
\\\\r\\\\n\", \\n 12| \"inReplyToId\": { \\n 13| \"stringValues\": [ \\n 14| \"jksdnfjnsflkdsfkjaabcdefghiuK8BA@mail.gmail.com\" \\n 15| ] \\n 16| }, \\n 17|" + }, + { + "id": "ART-2027_KNOWLEDGE_NODE-44", + "title": "Email | Integrate | Snap-ins | DevRev", + "text": "ticket title.\\n\\nSpecifically, threading breaks when the order of words in the subject is changed, when words are replaced, or when words or symbols are inserted in the middle or appended. Threading is maintained if the subject change is limited to the addition of words before :, words between [], words between ##, or common prefixes, such as Re or Fwd.\\n\\nRate limiting\\n-------------\\n\\nTo ensure that your support system is protected from potential spam attacks as well as from issues arising" + }, + { + "id": "ART-1953_KNOWLEDGE_NODE-30", + "title": "Customer email notifications | Computer by DevRev | DevRev", + "text": "linked to a conversation\\n-------------------------------\\n\\n* **Trigger**: A ticket is linked to an existing conversation.\\n* **Action**: The system sends out a notification with the linked ticket number.\\n* **Sender**: {Company\\\\_Name} [support@yourdomain.com](mailto:support@yourdomain.com)\\n* **Subject**: \"test email body\\\\r\\\\n