From 75068a021f9f71047343b01f3e223c2f8c60a1e1 Mon Sep 17 00:00:00 2001
From: marauder37 <github@e.mab.pm>
Date: Tue, 4 Jul 2023 17:38:20 +1000
Subject: [PATCH 1/6] Update llama_index to 0.6.38.post1

---
 requirements/base.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/base.txt b/requirements/base.txt
index 3a67ecd..1f2178b 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -32,6 +32,6 @@ channels_redis
 
 # NLP-Related
 # ------------------------------------------------------------------------------
-llama_index==0.5.25  # https://github.com/jerryjliu/llama_index
+llama_index==0.6.38.post1  # https://github.com/jerryjliu/llama_index
 PyPDF2==3.*  # https://pypdf2.readthedocs.io/en/latest/
 docx2txt==0.8

From 8c3948334716dd8346a1b56cd42a7eb01b364cd1 Mon Sep 17 00:00:00 2001
From: marauder37 <github@e.mab.pm>
Date: Tue, 4 Jul 2023 17:47:56 +1000
Subject: [PATCH 2/6] Migrate deprecated GPTSimpleVectorIndex to
 GPTVectorStoreIndex

---
 delphic/tasks/index_tasks.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/delphic/tasks/index_tasks.py b/delphic/tasks/index_tasks.py
index 56e0307..57a84f3 100644
--- a/delphic/tasks/index_tasks.py
+++ b/delphic/tasks/index_tasks.py
@@ -1,3 +1,4 @@
+import json
 import logging
 import os
 import tempfile
@@ -8,7 +9,7 @@
 from django.core.files import File
 from langchain import OpenAI
 from llama_index import (
-    GPTSimpleVectorIndex,
+    GPTVectorStoreIndex,
     LLMPredictor,
     ServiceContext,
     download_loader,
@@ -23,11 +24,11 @@
 @celery_app.task
 def create_index(collection_id):
     """
-    Celery task to create a GPTSimpleVectorIndex for a given Collection object.
+    Celery task to create a GPTVectorStoreIndex for a given Collection object.
 
     This task takes the ID of a Collection object, retrieves it from the
     database along with its related documents, and saves the document files
-    to a temporary directory. Then, it creates a GPTSimpleVectorIndex using
+    to a temporary directory. Then, it creates a GPTVectorStoreIndex using
     the provided code and saves the index to the Comparison.model FileField.
 
     Args:
@@ -60,15 +61,18 @@ def create_index(collection_id):
                     with temp_file_path.open("wb") as f:
                         f.write(file_data)
 
-                # Create the GPTSimpleVectorIndex
-                SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
+                # Create the GPTVectorStoreIndex
+                try:
+                    SimpleDirectoryReader = download_loader("SimpleDirectoryReader")
+                except Exception as e:
+                    logger.error(f"Error downloading SimpleDirectoryReader: {e}")
+                    raise
+
                 loader = SimpleDirectoryReader(
                     tempdir_path, recursive=True, exclude_hidden=False
                 )
                 documents = loader.load_data()
-                # index = GPTSimpleVectorIndex(documents)
 
-                # documents = SimpleDirectoryReader(str(tempdir_path)).load_data()
                 llm_predictor = LLMPredictor(
                     llm=OpenAI(
                         temperature=0,
@@ -81,11 +85,11 @@ def create_index(collection_id):
                 )
 
                 # build index
-                index = GPTSimpleVectorIndex.from_documents(
+                index = GPTVectorStoreIndex.from_documents(
                     documents, service_context=service_context
                 )
 
-                index_str = index.save_to_string()
+                index_str = json.dumps(index.storage_context.to_dict())
 
                 # Save the index_str to the Comparison.model FileField
                 with tempfile.NamedTemporaryFile(delete=False) as f:
@@ -105,7 +109,9 @@ def create_index(collection_id):
             return True
 
         except Exception as e:
-            logger.error(f"Error creating index for collection {collection_id}: {e}")
+            logger.error(
+                f"{type(e).__name__} creating index for collection {collection_id}: {e}"
+            )
             collection.status = CollectionStatus.ERROR
             collection.save()
 

From 93fd9b37c46590957b8b0f8d324d04ca3179ac82 Mon Sep 17 00:00:00 2001
From: marauder37 <github@e.mab.pm>
Date: Tue, 4 Jul 2023 17:49:19 +1000
Subject: [PATCH 3/6] Make index query via `query_engine`

---
 config/api/websockets/queries.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/config/api/websockets/queries.py b/config/api/websockets/queries.py
index e4eb687..10c41ae 100644
--- a/config/api/websockets/queries.py
+++ b/config/api/websockets/queries.py
@@ -39,7 +39,9 @@ async def receive(self, text_data):
 
             {query_str}
             """
-            response = self.index.query(modified_query_str)
+
+            query_engine = self.index.as_query_engine()
+            response = query_engine.query(modified_query_str)
 
             # Format the response as markdown
             markdown_response = f"## Response\n\n{response}\n\n"

From 55ed2ab2ce8dec64d21eab5b20db913e3ef6dc0e Mon Sep 17 00:00:00 2001
From: marauder37 <github@e.mab.pm>
Date: Tue, 4 Jul 2023 17:51:26 +1000
Subject: [PATCH 4/6] Migrate deprecated GPTSimpleVectorIndex to
 VectorStoreIndex

---
 delphic/utils/collections.py | 32 ++++++++++++--------------------
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/delphic/utils/collections.py b/delphic/utils/collections.py
index 2b50385..8fd9bf6 100644
--- a/delphic/utils/collections.py
+++ b/delphic/utils/collections.py
@@ -1,10 +1,11 @@
+import json
 import logging
 import textwrap
 from pathlib import Path
 
 from django.conf import settings
-from langchain import OpenAI
-from llama_index import GPTSimpleVectorIndex, LLMPredictor, ServiceContext
+from llama_index import StorageContext, load_index_from_storage
+from llama_index.indices.base import BaseIndex
 
 from delphic.indexes.models import Collection
 
@@ -27,7 +28,7 @@ def format_source(source):
     return formatted_source
 
 
-async def load_collection_model(collection_id: str | int) -> GPTSimpleVectorIndex:
+async def load_collection_model(collection_id: str | int) -> "BaseIndex":
     """
     Load the Collection model from cache or the database, and return the index.
 
@@ -35,14 +36,14 @@ async def load_collection_model(collection_id: str | int) -> GPTSimpleVectorInde
         collection_id (Union[str, int]): The ID of the Collection model instance.
 
     Returns:
-        GPTSimpleVectorIndex: The loaded index.
+        VectorStoreIndex: The loaded index.
 
     This function performs the following steps:
     1. Retrieve the Collection object with the given collection_id.
     2. Check if a JSON file with the name '/cache/model_{collection_id}.json' exists.
-    3. If the JSON file doesn't exist, load the JSON from the Collection.model FileField and save it to
+    3. If the JSON file doesn't exist, load the JSON from the `Collection.model` FileField and save it to
        '/cache/model_{collection_id}.json'.
-    4. Call GPTSimpleVectorIndex.load_from_disk with the cache_file_path.
+    4. Call VectorStoreIndex.load_from_disk with the cache_file_path.
     """
     # Retrieve the Collection object
     collection = await Collection.objects.aget(id=collection_id)
@@ -61,21 +62,12 @@ async def load_collection_model(collection_id: str | int) -> GPTSimpleVectorInde
                 with cache_file_path.open("w+", encoding="utf-8") as cache_file:
                     cache_file.write(model_file.read().decode("utf-8"))
 
-        # define LLM
-        logger.info(
-            f"load_collection_model() - Setup service context with tokens {settings.MAX_TOKENS} and "
-            f"model {settings.MODEL_NAME}"
-        )
-        llm_predictor = LLMPredictor(
-            llm=OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=512)
-        )
-        service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)
-
-        # Call GPTSimpleVectorIndex.load_from_disk
+        # Call VectorStoreIndex.load_from_disk
         logger.info("load_collection_model() - Load llama index")
-        index = GPTSimpleVectorIndex.load_from_disk(
-            cache_file_path, service_context=service_context
-        )
+        with cache_file_path.open("r") as cache_file:
+            storage_context = StorageContext.from_dict(json.load(cache_file))
+        index = load_index_from_storage(storage_context)
+
         logger.info(
             "load_collection_model() - Llamaindex loaded and ready for query..."
         )

From 2f05aa5f1ccc7e0d513ba59e8efb62deb2cb6ff1 Mon Sep 17 00:00:00 2001
From: marauder37 <github@e.mab.pm>
Date: Tue, 4 Jul 2023 17:53:08 +1000
Subject: [PATCH 5/6] Auto-reload Celery tasks

---
 compose/local/django/celery/worker/start | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compose/local/django/celery/worker/start b/compose/local/django/celery/worker/start
index 5bbb5d2..e92cf8d 100644
--- a/compose/local/django/celery/worker/start
+++ b/compose/local/django/celery/worker/start
@@ -4,5 +4,5 @@ set -o errexit
 set -o nounset
 
 
-#exec watchfiles celery.__main__.main --args '-A config.celery_app worker -l INFO'
-exec celery -A config.celery_app worker -l INFO
+exec watchfiles --filter python celery.__main__.main --args '-A config.celery_app worker -l INFO'
+#exec celery -A config.celery_app worker -l INFO

From 3c9a1a7bc103189efba38637062b1357b01d5361 Mon Sep 17 00:00:00 2001
From: marauder37 <github@e.mab.pm>
Date: Tue, 4 Jul 2023 18:12:29 +1000
Subject: [PATCH 6/6] Exclude local configuration from version control Update
 .gitignore with current cookiecutter-django setup, which is to exclude .idea/
 entirely

---
 .gitignore | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/.gitignore b/.gitignore
index 75a0317..74f1a07 100644
--- a/.gitignore
+++ b/.gitignore
@@ -162,29 +162,6 @@ typings/
 # Local History for Visual Studio Code
 .history/
 
-
-# Provided default Pycharm Run/Debug Configurations should be tracked by git
-# In case of local modifications made by Pycharm, use update-index command
-# for each changed file, like this:
-# git update-index --assume-unchanged .idea/chat_all_the_docs.iml
-### JetBrains template
-# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
-# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
-
-# User-specific stuff:
-.idea/**/workspace.xml
-.idea/**/tasks.xml
-.idea/dictionaries
-
-# Sensitive or high-churn files:
-.idea/**/dataSources/
-.idea/**/dataSources.ids
-.idea/**/dataSources.xml
-.idea/**/dataSources.local.xml
-.idea/**/sqlDataSources.xml
-.idea/**/dynamic.xml
-.idea/**/uiDesigner.xml
-
 # Gradle:
 .idea/**/gradle.xml
 .idea/**/libraries
@@ -338,3 +315,14 @@ delphic/media/*
 
 ### Models for Question Answering
 cache/*
+
+# https://github.com/cookiecutter/cookiecutter-django/blob/de8759fdbd45ac288b97e050073a5d09f50029db/.gitignore#L211
+# Even though the project might be opened and edited
+# in any of the JetBrains IDEs, it makes no sense whatsoever
+# to 'run' anything within it since any particular cookiecutter
+# is declarative by nature.
+.idea/
+
+### Local configuration files
+/.envs/.local
+/frontend/.frontend