Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@

import Config

config :chatbot, Chatbot.Repo, types: Chatbot.PostgrexTypes
config :chatbot, openai_key: "your openai API key"
config :nx, default_backend: EXLA.Backend
import_config "config/endpoint.exs"
import_config "config/logger.exs"
import_config "config/phoenix.exs"
Expand Down
1 change: 1 addition & 0 deletions config/dev.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import Config
1 change: 1 addition & 0 deletions config/prod.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import Config
1 change: 1 addition & 0 deletions config/test.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import Config
63 changes: 63 additions & 0 deletions eval/rag_triad_eval.exs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
openai_key = Application.compile_env(:chatbot, :openai_key)

dataset =
"https://huggingface.co/datasets/explodinggradients/amnesty_qa/resolve/main/english.json"

IO.puts("downloading dataset")

data =
Req.get!(dataset).body
|> Jason.decode!()

IO.puts("indexing")

data["contexts"]
|> Enum.map(&Enum.join(&1, " "))
|> Enum.with_index(fn context, index -> %{document: context, source: "#{index}"} end)
|> Chatbot.Rag.index()

IO.puts("generating responses")

generations =
for question <- data["question"] do
Chatbot.Rag.query(question)
end

provider = Rag.Ai.OpenAI.new(%{text_model: "gpt-4o-mini", api_key: openai_key})

IO.puts("evaluating")

generations =
for generation <- generations do
Rag.Evaluation.evaluate_rag_triad(generation, provider)
end

json =
generations
|> Enum.map(fn generation ->
Map.from_struct(generation)
|> Map.take([:query, :context, :context_sources, :response, :evaluations])
end)
|> Jason.encode!()

File.write!(Path.join(__DIR__, "triad_eval.json"), json)

average_rag_triad_scores =
Enum.map(
generations,
fn gen ->
%{
evaluations: %{
"context_relevance_score" => context_relevance_score,
"groundedness_score" => groundedness_score,
"answer_relevance_score" => answer_relevance_score
}
} = gen

(context_relevance_score + groundedness_score + answer_relevance_score) / 3
end
)

total_average_score = Enum.sum(average_rag_triad_scores) / Enum.count(average_rag_triad_scores)

IO.puts("Score: ,#{total_average_score}")
12 changes: 12 additions & 0 deletions lib/chatbot/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,18 @@ defmodule Chatbot.Application do
@impl true
def start(_type, _args) do
children = [
{Nx.Serving,
[
serving: Chatbot.Rag.Serving.build_llm_serving(),
name: Rag.LLMServing,
batch_timeout: 100
]},
{Nx.Serving,
[
serving: Chatbot.Rag.Serving.build_embedding_serving(),
name: Rag.EmbeddingServing,
batch_timeout: 100
]},
{Task.Supervisor, name: Chatbot.TaskSupervisor},
ChatbotWeb.Telemetry,
Chatbot.Repo,
Expand Down
125 changes: 125 additions & 0 deletions lib/chatbot/rag.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
defmodule Chatbot.Rag do
alias Chatbot.Repo
alias Rag.{Ai, Embedding, Generation, Retrieval}

import Ecto.Query
import Pgvector.Ecto.Query

@provider Ai.Nx.new(%{embeddings_serving: Rag.EmbeddingServing, text_serving: Rag.LLMServing})

def ingest(path) do
path
|> load()
|> index()
end

def load(path) do
path
|> list_text_files()
|> Enum.map(&%{source: &1})
|> Enum.map(&Rag.Loading.load_file(&1))
end

defp list_text_files(path) do
path
|> Path.join("/**/*.txt")
|> Path.wildcard()
end

def index(ingestions) do
chunks =
ingestions
|> Enum.flat_map(&chunk_text(&1, :document))
|> Embedding.generate_embeddings_batch(@provider,
text_key: :chunk,
embedding_key: :embedding
)
|> Enum.map(&to_chunk(&1))

Repo.insert_all(Chatbot.Rag.Chunk, chunks)
end

defp chunk_text(ingestion, text_key, opts \\ []) do
text = Map.fetch!(ingestion, text_key)
chunks = TextChunker.split(text, opts)

Enum.map(chunks, &Map.put(ingestion, :chunk, &1.text))
end

def query(query) do

Check warning on line 49 in lib/chatbot/rag.ex

View workflow job for this annotation

GitHub Actions / Build and test (27.0.1, 1.17.2-otp-27)

no_return

Function query/1 has no local return.
generation =
Generation.new(query)

Check warning on line 51 in lib/chatbot/rag.ex

View workflow job for this annotation

GitHub Actions / Build and test (27.0.1, 1.17.2-otp-27)

call

The function call generate_embedding will not succeed.
|> Embedding.generate_embedding(@provider)
|> Retrieval.retrieve(:fulltext_results, fn generation -> query_fulltext(generation) end)
|> Retrieval.retrieve(:semantic_results, fn generation ->
query_with_pgvector(generation)
end)
|> Retrieval.reciprocal_rank_fusion(
%{fulltext_results: 1, semantic_results: 1},
:rrf_result
)
|> Retrieval.deduplicate(:rrf_result, [:source])

context =
Generation.get_retrieval_result(generation, :rrf_result)
|> Enum.map_join("\n\n", & &1.document)

context_sources =
Generation.get_retrieval_result(generation, :rrf_result)
|> Enum.map(& &1.source)

prompt = smollm_prompt(query, context)

generation
|> Generation.put_context(context)
|> Generation.put_context_sources(context_sources)
|> Generation.put_prompt(prompt)
|> Generation.generate_response(@provider)
end

defp to_chunk(ingestion) do
now = NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second)

ingestion
|> Map.put_new(:inserted_at, now)
|> Map.put_new(:updated_at, now)
end

defp query_with_pgvector(%{query_embedding: query_embedding}, limit \\ 3) do

Check warning on line 88 in lib/chatbot/rag.ex

View workflow job for this annotation

GitHub Actions / Build and test (27.0.1, 1.17.2-otp-27)

unused_fun

Function query_with_pgvector/1 will never be called.

Check warning on line 88 in lib/chatbot/rag.ex

View workflow job for this annotation

GitHub Actions / Build and test (27.0.1, 1.17.2-otp-27)

unused_fun

Function query_with_pgvector/2 will never be called.
{:ok,
Repo.all(
from(c in Chatbot.Rag.Chunk,
order_by: l2_distance(c.embedding, ^Pgvector.new(query_embedding)),
limit: ^limit
)
)}
end

defp query_fulltext(%{query: query}, limit \\ 3) do

Check warning on line 98 in lib/chatbot/rag.ex

View workflow job for this annotation

GitHub Actions / Build and test (27.0.1, 1.17.2-otp-27)

unused_fun

Function query_fulltext/1 will never be called.

Check warning on line 98 in lib/chatbot/rag.ex

View workflow job for this annotation

GitHub Actions / Build and test (27.0.1, 1.17.2-otp-27)

unused_fun

Function query_fulltext/2 will never be called.
query = query |> String.trim() |> String.replace(" ", " & ")

{:ok,
Repo.all(
from(c in Chatbot.Rag.Chunk,
where: fragment("to_tsvector(?) @@ to_tsquery(?)", c.document, ^query),
limit: ^limit
)
)}
end

defp smollm_prompt(query, context) do

Check warning on line 110 in lib/chatbot/rag.ex

View workflow job for this annotation

GitHub Actions / Build and test (27.0.1, 1.17.2-otp-27)

unused_fun

Function smollm_prompt/2 will never be called.
"""
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Context information is below.
---------------------
#{context}
---------------------
Given the context information and no prior knowledge, answer the query.
Query: #{query}
Answer: <|im_end|>
<|im_start|>assist
"""
end
end
16 changes: 16 additions & 0 deletions lib/chatbot/rag/chunk.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
defmodule Chatbot.Rag.Chunk do
use Ecto.Schema

schema "chunks" do
field(:document, :string)
field(:source, :string)
field(:chunk, :string)
field(:embedding, Pgvector.Ecto.Vector)

timestamps()
end

def changeset(chunk \\ %__MODULE__{}, attrs) do
Ecto.Changeset.cast(chunk, attrs, [:document, :source, :chunk, :embedding])
end
end
32 changes: 32 additions & 0 deletions lib/chatbot/rag/serving.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
defmodule Chatbot.Rag.Serving do
def build_embedding_serving() do
repo = {:hf, "thenlper/gte-small"}

{:ok, model_info} = Bumblebee.load_model(repo)

{:ok, tokenizer} = Bumblebee.load_tokenizer(repo)

Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer,
compile: [batch_size: 64, sequence_length: 512],
defn_options: [compiler: EXLA],
output_attribute: :hidden_state,
output_pool: :mean_pooling
)
end

def build_llm_serving() do
repo = {:hf, "HuggingFaceTB/SmolLM2-135M-Instruct"}

{:ok, model_info} = Bumblebee.load_model(repo)
{:ok, tokenizer} = Bumblebee.load_tokenizer(repo)
{:ok, generation_config} = Bumblebee.load_generation_config(repo)

generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100)

Bumblebee.Text.generation(model_info, tokenizer, generation_config,
compile: [batch_size: 1, sequence_length: 6000],
defn_options: [compiler: EXLA],
stream: false
)
end
end
5 changes: 5 additions & 0 deletions lib/postgrex_types.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Postgrex.Types.define(
Chatbot.PostgrexTypes,
[Pgvector.Extensions.Vector] ++ Ecto.Adapters.Postgres.extensions(),
[]
)
6 changes: 6 additions & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ defmodule Chatbot.MixProject do
# Type `mix help deps` for examples and options.
defp deps do
[
{:pgvector, "~> 0.3.0"},
{:ecto, "~> 3.0"},
{:exla, "~> 0.9.1"},
{:bumblebee, "~> 0.6.0"},
{:text_chunker, "~> 0.3.1"},
{:ex_machina, "~> 2.8"},
{:bitcrowd_ecto, "~> 1.0"},
{:credo, "~> 1.7", only: [:dev, :test], runtime: false},
Expand All @@ -47,6 +52,7 @@ defmodule Chatbot.MixProject do
{:phoenix_live_reload, "~> 1.2", only: :dev},
# TODO bump on release to {:phoenix_live_view, "~> 1.0.0"},
{:phoenix_live_view, "~> 1.0.0-rc.1", override: true},
{:rag, github: "bitcrowd/rag"},
{:floki, ">= 0.30.0", only: :test},
{:phoenix_live_dashboard, "~> 0.8.3"},
{:telemetry_metrics, "~> 1.0"},
Expand Down
Loading
Loading