bitcrowd · xhr15 · Mar 9, 2025 · Mar 9, 2025 · Mar 9, 2025 · Mar 11, 2025
diff --git a/config/config.exs b/config/config.exs
@@ -6,6 +6,9 @@
 
 import Config
 
+config :chatbot, Chatbot.Repo, types: Chatbot.PostgrexTypes
+config :chatbot, openai_key: "your openai API key"
+config :nx, default_backend: EXLA.Backend
 import_config "config/endpoint.exs"
 import_config "config/logger.exs"
 import_config "config/phoenix.exs"

diff --git a/config/dev.exs b/config/dev.exs
@@ -0,0 +1 @@
+import Config
diff --git a/config/prod.exs b/config/prod.exs
@@ -0,0 +1 @@
+import Config
diff --git a/config/test.exs b/config/test.exs
@@ -0,0 +1 @@
+import Config
diff --git a/eval/rag_triad_eval.exs b/eval/rag_triad_eval.exs
@@ -0,0 +1,63 @@
+openai_key = Application.compile_env(:chatbot, :openai_key)
+
+dataset =
+  "https://huggingface.co/datasets/explodinggradients/amnesty_qa/resolve/main/english.json"
+
+IO.puts("downloading dataset")
+
+data =
+  Req.get!(dataset).body
+  |> Jason.decode!()
+
+IO.puts("indexing")
+
+data["contexts"]
+|> Enum.map(&Enum.join(&1, " "))
+|> Enum.with_index(fn context, index -> %{document: context, source: "#{index}"} end)
+|> Chatbot.Rag.index()
+
+IO.puts("generating responses")
+
+generations =
+  for question <- data["question"] do
+    Chatbot.Rag.query(question)
+  end
+
+provider = Rag.Ai.OpenAI.new(%{text_model: "gpt-4o-mini", api_key: openai_key})
+
+IO.puts("evaluating")
+
+generations =
+  for generation <- generations do
+    Rag.Evaluation.evaluate_rag_triad(generation, provider)
+  end
+
+json =
+  generations
+  |> Enum.map(fn generation ->
+    Map.from_struct(generation)
+    |> Map.take([:query, :context, :context_sources, :response, :evaluations])
+  end)
+  |> Jason.encode!()
+
+File.write!(Path.join(__DIR__, "triad_eval.json"), json)
+
+average_rag_triad_scores =
+  Enum.map(
+    generations,
+    fn gen ->
+      %{
+        evaluations: %{
+          "context_relevance_score" => context_relevance_score,
+          "groundedness_score" => groundedness_score,
+          "answer_relevance_score" => answer_relevance_score
+        }
+      } = gen
+
+      (context_relevance_score + groundedness_score + answer_relevance_score) / 3
+    end
+  )
+
+total_average_score = Enum.sum(average_rag_triad_scores) / Enum.count(average_rag_triad_scores)
+
+IO.puts("Score: ,#{total_average_score}")
diff --git a/lib/chatbot/application.ex b/lib/chatbot/application.ex
@@ -8,6 +8,18 @@ defmodule Chatbot.Application do
   @impl true
   def start(_type, _args) do
     children = [
+      {Nx.Serving,
+       [
+         serving: Chatbot.Rag.Serving.build_llm_serving(),
+         name: Rag.LLMServing,
+         batch_timeout: 100
+       ]},
+      {Nx.Serving,
+       [
+         serving: Chatbot.Rag.Serving.build_embedding_serving(),
+         name: Rag.EmbeddingServing,
+         batch_timeout: 100
+       ]},
       {Task.Supervisor, name: Chatbot.TaskSupervisor},
       ChatbotWeb.Telemetry,
       Chatbot.Repo,

diff --git a/lib/chatbot/rag.ex b/lib/chatbot/rag.ex
@@ -0,0 +1,125 @@
+defmodule Chatbot.Rag do
+  alias Chatbot.Repo
+  alias Rag.{Ai, Embedding, Generation, Retrieval}
+
+  import Ecto.Query
+  import Pgvector.Ecto.Query
+
+  @provider Ai.Nx.new(%{embeddings_serving: Rag.EmbeddingServing, text_serving: Rag.LLMServing})
+
+  def ingest(path) do
+    path
+    |> load()
+    |> index()
+  end
+
+  def load(path) do
+    path
+    |> list_text_files()
+    |> Enum.map(&%{source: &1})
+    |> Enum.map(&Rag.Loading.load_file(&1))
+  end
+
+  defp list_text_files(path) do
+    path
+    |> Path.join("/**/*.txt")
+    |> Path.wildcard()
+  end
+
+  def index(ingestions) do
+    chunks =
+      ingestions
+      |> Enum.flat_map(&chunk_text(&1, :document))
+      |> Embedding.generate_embeddings_batch(@provider,
+        text_key: :chunk,
+        embedding_key: :embedding
+      )
+      |> Enum.map(&to_chunk(&1))
+
+    Repo.insert_all(Chatbot.Rag.Chunk, chunks)
+  end
+
+  defp chunk_text(ingestion, text_key, opts \\ []) do
+    text = Map.fetch!(ingestion, text_key)
+    chunks = TextChunker.split(text, opts)
+
+    Enum.map(chunks, &Map.put(ingestion, :chunk, &1.text))
+  end
+
+  def query(query) do
+    generation =
+      Generation.new(query)
+      |> Embedding.generate_embedding(@provider)
+      |> Retrieval.retrieve(:fulltext_results, fn generation -> query_fulltext(generation) end)
+      |> Retrieval.retrieve(:semantic_results, fn generation ->
+        query_with_pgvector(generation)
+      end)
+      |> Retrieval.reciprocal_rank_fusion(
+        %{fulltext_results: 1, semantic_results: 1},
+        :rrf_result
+      )
+      |> Retrieval.deduplicate(:rrf_result, [:source])
+
+    context =
+      Generation.get_retrieval_result(generation, :rrf_result)
+      |> Enum.map_join("\n\n", & &1.document)
+
+    context_sources =
+      Generation.get_retrieval_result(generation, :rrf_result)
+      |> Enum.map(& &1.source)
+
+    prompt = smollm_prompt(query, context)
+
+    generation
+    |> Generation.put_context(context)
+    |> Generation.put_context_sources(context_sources)
+    |> Generation.put_prompt(prompt)
+    |> Generation.generate_response(@provider)
+  end
+
+  defp to_chunk(ingestion) do
+    now = NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second)
+
+    ingestion
+    |> Map.put_new(:inserted_at, now)
+    |> Map.put_new(:updated_at, now)
+  end
+
+  defp query_with_pgvector(%{query_embedding: query_embedding}, limit \\ 3) do
+    {:ok,
+     Repo.all(
+       from(c in Chatbot.Rag.Chunk,
+         order_by: l2_distance(c.embedding, ^Pgvector.new(query_embedding)),
+         limit: ^limit
+       )
+     )}
+  end
+
+  defp query_fulltext(%{query: query}, limit \\ 3) do
+    query = query |> String.trim() |> String.replace(" ", " & ")
+
+    {:ok,
+     Repo.all(
+       from(c in Chatbot.Rag.Chunk,
+         where: fragment("to_tsvector(?) @@ to_tsquery(?)", c.document, ^query),
+         limit: ^limit
+       )
+     )}
+  end
+
+  defp smollm_prompt(query, context) do
+    """
+    <|im_start|>system
+    You are a helpful assistant.<|im_end|>
+    <|im_start|>user
+    Context information is below.
+    ---------------------
+    #{context}
+    ---------------------
+    Given the context information and no prior knowledge, answer the query.
+    Query: #{query}
+    Answer: <|im_end|>
+    <|im_start|>assist
+    """
+  end
+end
diff --git a/lib/chatbot/rag/chunk.ex b/lib/chatbot/rag/chunk.ex
@@ -0,0 +1,16 @@
+defmodule Chatbot.Rag.Chunk do
+  use Ecto.Schema
+
+  schema "chunks" do
+    field(:document, :string)
+    field(:source, :string)
+    field(:chunk, :string)
+    field(:embedding, Pgvector.Ecto.Vector)
+
+    timestamps()
+  end
+
+  def changeset(chunk \\ %__MODULE__{}, attrs) do
+    Ecto.Changeset.cast(chunk, attrs, [:document, :source, :chunk, :embedding])
+  end
+end
diff --git a/lib/chatbot/rag/serving.ex b/lib/chatbot/rag/serving.ex
@@ -0,0 +1,32 @@
+defmodule Chatbot.Rag.Serving do
+  def build_embedding_serving() do
+    repo = {:hf, "thenlper/gte-small"}
+
+    {:ok, model_info} = Bumblebee.load_model(repo)
+
+    {:ok, tokenizer} = Bumblebee.load_tokenizer(repo)
+
+    Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer,
+      compile: [batch_size: 64, sequence_length: 512],
+      defn_options: [compiler: EXLA],
+      output_attribute: :hidden_state,
+      output_pool: :mean_pooling
+    )
+  end
+
+  def build_llm_serving() do
+    repo = {:hf, "HuggingFaceTB/SmolLM2-135M-Instruct"}
+
+    {:ok, model_info} = Bumblebee.load_model(repo)
+    {:ok, tokenizer} = Bumblebee.load_tokenizer(repo)
+    {:ok, generation_config} = Bumblebee.load_generation_config(repo)
+
+    generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)
+
+    Bumblebee.Text.generation(model_info, tokenizer, generation_config,
+      compile: [batch_size: 1, sequence_length: 6000],
+      defn_options: [compiler: EXLA],
+      stream: false
+    )
+  end
+end
diff --git a/lib/postgrex_types.ex b/lib/postgrex_types.ex
@@ -0,0 +1,5 @@
+Postgrex.Types.define(
+  Chatbot.PostgrexTypes,
+  [Pgvector.Extensions.Vector] ++ Ecto.Adapters.Postgres.extensions(),
+  []
+)
diff --git a/mix.exs b/mix.exs
@@ -33,6 +33,11 @@ defmodule Chatbot.MixProject do
   # Type `mix help deps` for examples and options.
   defp deps do
     [
+      {:pgvector, "~> 0.3.0"},
+      {:ecto, "~> 3.0"},
+      {:exla, "~> 0.9.1"},
+      {:bumblebee, "~> 0.6.0"},
+      {:text_chunker, "~> 0.3.1"},
       {:ex_machina, "~> 2.8"},
       {:bitcrowd_ecto, "~> 1.0"},
       {:credo, "~> 1.7", only: [:dev, :test], runtime: false},
@@ -47,6 +52,7 @@ defmodule Chatbot.MixProject do
       {:phoenix_live_reload, "~> 1.2", only: :dev},
       # TODO bump on release to {:phoenix_live_view, "~> 1.0.0"},
       {:phoenix_live_view, "~> 1.0.0-rc.1", override: true},
+      {:rag, github: "bitcrowd/rag"},
       {:floki, ">= 0.30.0", only: :test},
       {:phoenix_live_dashboard, "~> 0.8.3"},
       {:telemetry_metrics, "~> 1.0"},