From dc948c635e18ddea396b8fa876bfffcdfeb9853a Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 9 Mar 2025 22:41:46 +0100 Subject: [PATCH 1/6] [#INTERNAL-498] Remove repeated sentence in seeds https://bitcrowd.atlassian.net/browse/INTERNAL-498 --- .github/workflows/pull.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 2a7dbe7..257a77d 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -23,6 +23,7 @@ jobs: --health-timeout 5s --health-retries 5 + runs-on: ubuntu-22.04 name: Build and test strategy: @@ -125,7 +126,11 @@ jobs: assets: +<<<<<<< HEAD runs-on: ubuntu-22.04 +======= + runs-on: ubuntu-22 +>>>>>>> 0fdc37d ([#INTERNAL-498] Remove repeated sentence in seeds) name: Assets steps: - uses: actions/checkout@v3 From 777d85a4fa9933f9539fbf95bcbcaeb1d28e141a Mon Sep 17 00:00:00 2001 From: Chris Date: Sun, 9 Mar 2025 22:58:16 +0100 Subject: [PATCH 2/6] [#INTERNAL-499] Standard system prompt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The assistant gets a bit to funny in itโ€™s answers ๐Ÿ˜„ https://bitcrowd.atlassian.net/browse/INTERNAL-499 --- .github/workflows/pull.yml | 5 ++++- lib/chatbot/chat.ex | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 257a77d..13518cd 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -23,7 +23,6 @@ jobs: --health-timeout 5s --health-retries 5 - runs-on: ubuntu-22.04 name: Build and test strategy: @@ -126,11 +125,15 @@ jobs: assets: +<<<<<<< HEAD <<<<<<< HEAD runs-on: ubuntu-22.04 ======= runs-on: ubuntu-22 >>>>>>> 0fdc37d ([#INTERNAL-498] Remove repeated sentence in seeds) +======= + runs-on: ubuntu-22.04 +>>>>>>> f108e37 ([#INTERNAL-499] Standard system prompt) name: Assets steps: - uses: actions/checkout@v3 diff --git a/lib/chatbot/chat.ex b/lib/chatbot/chat.ex index d172f50..bfb1d83 100644 --- a/lib/chatbot/chat.ex +++ b/lib/chatbot/chat.ex @@ -28,9 +28,9 @@ defmodule Chatbot.Chat do end @llm LangChain.ChatModels.ChatOllamaAI.new!(%{ - model: "llama3.2:latest", - stream: false - }) + model: "llama3.2:latest", + stream: false + }) @chain LLMChain.new!(%{llm: @llm}) |> LLMChain.add_message(LangChain.Message.new_system!("You are a helpful assistant.")) From ec2d2cd2c1e832a832fee349362ab79e5d67b4ad Mon Sep 17 00:00:00 2001 From: Chris Date: Mon, 10 Mar 2025 00:12:36 +0100 Subject: [PATCH 3/6] [#INTERNAL-500] Add Markdown to chatbot rendering LLM frequently use Markdown to structure their output. sth like \ # lib/chatbot_web/live/chat_live.ex # (..) markdown_html = String.trim(assigns.content) |> Earmark.as_html!() |> Phoenix.HTML.raw() assigns = assigns |> assign(:class, "u-max-width-75 u-bg-white " <> justify_self) |> assign(:markdown, markdown_html) ~H""" <.ui_card id={@id} class={@class}> <%= @markdown %> https://bitcrowd.atlassian.net/browse/INTERNAL-500 --- .github/workflows/pull.yml | 8 -------- lib/chatbot/chat.ex | 6 +++--- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 13518cd..2a7dbe7 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -125,15 +125,7 @@ jobs: assets: -<<<<<<< HEAD -<<<<<<< HEAD runs-on: ubuntu-22.04 -======= - runs-on: ubuntu-22 ->>>>>>> 0fdc37d ([#INTERNAL-498] Remove repeated sentence in seeds) -======= - runs-on: ubuntu-22.04 ->>>>>>> f108e37 ([#INTERNAL-499] Standard system prompt) name: Assets steps: - uses: actions/checkout@v3 diff --git a/lib/chatbot/chat.ex b/lib/chatbot/chat.ex index bfb1d83..d172f50 100644 --- a/lib/chatbot/chat.ex +++ b/lib/chatbot/chat.ex @@ -28,9 +28,9 @@ defmodule Chatbot.Chat do end @llm LangChain.ChatModels.ChatOllamaAI.new!(%{ - model: "llama3.2:latest", - stream: false - }) + model: "llama3.2:latest", + stream: false + }) @chain LLMChain.new!(%{llm: @llm}) |> LLMChain.add_message(LangChain.Message.new_system!("You are a helpful assistant.")) From 50749cc76813ba234f58a798429e8a9857c4073a Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 11 Mar 2025 22:37:40 +0100 Subject: [PATCH 4/6] [#STEP-1-Generator] [#STEP-1-Generator](https://bitcrowd.dev/a-rag-library-for-elixir) From e3f360ee2670f0d605c82ab7dee9fba9a21b8006 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 11 Mar 2025 22:59:08 +0100 Subject: [PATCH 5/6] added rag --- mix.exs | 1 + mix.lock | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/mix.exs b/mix.exs index 26baa8b..a24511b 100644 --- a/mix.exs +++ b/mix.exs @@ -47,6 +47,7 @@ defmodule Chatbot.MixProject do {:phoenix_live_reload, "~> 1.2", only: :dev}, # TODO bump on release to {:phoenix_live_view, "~> 1.0.0"}, {:phoenix_live_view, "~> 1.0.0-rc.1", override: true}, + {:rag, github: "bitcrowd/rag"}, {:floki, ">= 0.30.0", only: :test}, {:phoenix_live_dashboard, "~> 0.8.3"}, {:telemetry_metrics, "~> 1.0"}, diff --git a/mix.lock b/mix.lock index a71cc19..d12f8d0 100644 --- a/mix.lock +++ b/mix.lock @@ -5,6 +5,7 @@ "bitstyles_phoenix": {:hex, :bitstyles_phoenix, "2.5.0", "c02aae26fcf6ff752738b35aa97f3991c67533412eda381abaad3e22aa2e2215", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 0.18.12 or ~> 0.19.0 or ~> 0.20.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}], "hexpm", "fe018f0eeec6c84afedc1785495efe0a651ecf34656c4b52f6da09d5149a9f28"}, "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, "castore": {:hex, :castore, "1.0.9", "5cc77474afadf02c7c017823f460a17daa7908e991b0cc917febc90e466a375c", [:mix], [], "hexpm", "5ea956504f1ba6f2b4eb707061d8e17870de2bee95fb59d512872c2ef06925e7"}, + "complex": {:hex, :complex, "0.6.0", "b0130086a7a8c33574d293b2e0e250f4685580418eac52a5658a4bd148f3ccf1", [:mix], [], "hexpm", "0a5fa95580dcaf30fcd60fe1aaf24327c0fe401e98c24d892e172e79498269f9"}, "credo": {:hex, :credo, "1.7.10", "6e64fe59be8da5e30a1b96273b247b5cf1cc9e336b5fd66302a64b25749ad44d", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "71fbc9a6b8be21d993deca85bf151df023a3097b01e09a2809d460348561d8cd"}, "db_connection": {:hex, :db_connection, "2.7.0", "b99faa9291bb09892c7da373bb82cba59aefa9b36300f6145c5f201c7adf48ec", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "dcf08f31b2701f857dfc787fbad78223d61a32204f217f15e881dd93e4bdd3ff"}, "decimal": {:hex, :decimal, "2.1.1", "5611dca5d4b2c3dd497dec8f68751f1f1a54755e8ed2a966c2633cf885973ad6", [:mix], [], "hexpm", "53cfe5f497ed0e7771ae1a475575603d77425099ba5faef9394932b35020ffcc"}, @@ -21,13 +22,18 @@ "finch": {:hex, :finch, "0.19.0", "c644641491ea854fc5c1bbaef36bfc764e3f08e7185e1f084e35e0672241b76d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "fc5324ce209125d1e2fa0fcd2634601c52a787aff1cd33ee833664a5af4ea2b6"}, "floki": {:hex, :floki, "0.36.2", "a7da0193538c93f937714a6704369711998a51a6164a222d710ebd54020aa7a3", [:mix], [], "hexpm", "a8766c0bc92f074e5cb36c4f9961982eda84c5d2b8e979ca67f5c268ec8ed580"}, "gettext": {:hex, :gettext, "0.26.1", "38e14ea5dcf962d1fc9f361b63ea07c0ce715a8ef1f9e82d3dfb8e67e0416715", [:mix], [{:expo, "~> 0.5.1 or ~> 1.0", [hex: :expo, repo: "hexpm", optional: false]}], "hexpm", "01ce56f188b9dc28780a52783d6529ad2bc7124f9744e571e1ee4ea88bf08734"}, + "glob_ex": {:hex, :glob_ex, "0.1.11", "cb50d3f1ef53f6ca04d6252c7fde09fd7a1cf63387714fe96f340a1349e62c93", [:mix], [], "hexpm", "342729363056e3145e61766b416769984c329e4378f1d558b63e341020525de4"}, "hpax": {:hex, :hpax, "1.0.0", "28dcf54509fe2152a3d040e4e3df5b265dcb6cb532029ecbacf4ce52caea3fd2", [:mix], [], "hexpm", "7f1314731d711e2ca5fdc7fd361296593fc2542570b3105595bb0bc6d0fad601"}, + "igniter": {:hex, :igniter, "0.5.33", "799a49a8eb7e2fbebd6af2b770a856fa8ede9acad73a74269a04b8e775ead199", [:mix], [{:glob_ex, "~> 0.1.7", [hex: :glob_ex, repo: "hexpm", optional: false]}, {:inflex, "~> 2.0", [hex: :inflex, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:owl, "~> 0.11", [hex: :owl, repo: "hexpm", optional: false]}, {:phx_new, "~> 1.7", [hex: :phx_new, repo: "hexpm", optional: true]}, {:req, "~> 0.5", [hex: :req, repo: "hexpm", optional: false]}, {:rewrite, ">= 1.1.1 and < 2.0.0-0", [hex: :rewrite, repo: "hexpm", optional: false]}, {:sourceror, "~> 1.4", [hex: :sourceror, repo: "hexpm", optional: false]}, {:spitfire, ">= 0.1.3 and < 1.0.0-0", [hex: :spitfire, repo: "hexpm", optional: false]}], "hexpm", "ac962445d426dd3815e6b5568daa86586487293d126a946a67d9cf17d0665005"}, + "inflex": {:hex, :inflex, "2.1.0", "a365cf0821a9dacb65067abd95008ca1b0bb7dcdd85ae59965deef2aa062924c", [:mix], [], "hexpm", "14c17d05db4ee9b6d319b0bff1bdf22aa389a25398d1952c7a0b5f3d93162dd8"}, "jason": {:hex, :jason, "1.4.4", "b9226785a9aa77b6857ca22832cffa5d5011a667207eb2a0ad56adb5db443b8a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "c5eb0cab91f094599f94d55bc63409236a8ec69a21a67814529e8d5f6cc90b3b"}, "langchain": {:hex, :langchain, "0.3.0-rc.0", "930d22170fff2c599e8a63a664e437f896555b3cebe3055276bca37e7ae17d1b", [:mix], [{:abacus, "~> 2.1.0", [hex: :abacus, repo: "hexpm", optional: false]}, {:ecto, "~> 3.10", [hex: :ecto, repo: "hexpm", optional: false]}, {:gettext, "~> 0.20", [hex: :gettext, repo: "hexpm", optional: false]}, {:nx, ">= 0.7.0", [hex: :nx, repo: "hexpm", optional: true]}, {:req, ">= 0.5.0", [hex: :req, repo: "hexpm", optional: false]}], "hexpm", "c1f4f563cfddc502d3cfa5180fef154b8e194ef0f6f7bf0fe540761d2439b7ab"}, "mime": {:hex, :mime, "2.0.6", "8f18486773d9b15f95f4f4f1e39b710045fa1de891fada4516559967276e4dc2", [:mix], [], "hexpm", "c9945363a6b26d747389aac3643f8e0e09d30499a138ad64fe8fd1d13d9b153e"}, "mint": {:hex, :mint, "1.6.2", "af6d97a4051eee4f05b5500671d47c3a67dac7386045d87a904126fd4bbcea2e", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "5ee441dffc1892f1ae59127f74afe8fd82fda6587794278d924e4d90ea3d63f9"}, "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, + "nx": {:hex, :nx, "0.9.2", "17563029c01bf749aad3c31234326d7665abd0acc33ee2acbe531a4759f29a8a", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "914d74741617d8103de8ab1f8c880353e555263e1c397b8a1109f79a3716557f"}, + "owl": {:hex, :owl, "0.12.2", "65906b525e5c3ef51bab6cba7687152be017aebe1da077bb719a5ee9f7e60762", [:mix], [{:ucwidth, "~> 0.2", [hex: :ucwidth, repo: "hexpm", optional: true]}], "hexpm", "6398efa9e1fea70a04d24231e10dcd66c1ac1aa2da418d20ef5357ec61de2880"}, "phoenix": {:hex, :phoenix, "1.7.14", "a7d0b3f1bc95987044ddada111e77bd7f75646a08518942c72a8440278ae7825", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "c7859bc56cc5dfef19ecfc240775dae358cbaa530231118a9e014df392ace61a"}, "phoenix_ecto": {:hex, :phoenix_ecto, "4.6.2", "3b83b24ab5a2eb071a20372f740d7118767c272db386831b2e77638c4dcc606d", [:mix], [{:ecto, "~> 3.5", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.1", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.16 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "3f94d025f59de86be00f5f8c5dd7b5965a3298458d21ab1c328488be3b5fcd59"}, "phoenix_html": {:hex, :phoenix_html, "4.1.1", "4c064fd3873d12ebb1388425a8f2a19348cef56e7289e1998e2d2fa758aa982e", [:mix], [], "hexpm", "f2f2df5a72bc9a2f510b21497fd7d2b86d932ec0598f0210fed4114adc546c6f"}, @@ -39,10 +45,15 @@ "plug": {:hex, :plug, "1.16.1", "40c74619c12f82736d2214557dedec2e9762029b2438d6d175c5074c933edc9d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "a13ff6b9006b03d7e33874945b2755253841b238c34071ed85b0e86057f8cddc"}, "plug_crypto": {:hex, :plug_crypto, "2.1.0", "f44309c2b06d249c27c8d3f65cfe08158ade08418cf540fd4f72d4d6863abb7b", [:mix], [], "hexpm", "131216a4b030b8f8ce0f26038bc4421ae60e4bb95c5cf5395e1421437824c4fa"}, "postgrex": {:hex, :postgrex, "0.19.1", "73b498508b69aded53907fe48a1fee811be34cc720e69ef4ccd568c8715495ea", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "8bac7885a18f381e091ec6caf41bda7bb8c77912bb0e9285212829afe5d8a8f8"}, + "rag": {:git, "https://github.com/bitcrowd/rag.git", "d59d25e997aff214aff3e507cad00b9d537d4e68", []}, "req": {:hex, :req, "0.5.6", "8fe1eead4a085510fe3d51ad854ca8f20a622aae46e97b302f499dfb84f726ac", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "cfaa8e720945d46654853de39d368f40362c2641c4b2153c886418914b372185"}, + "rewrite": {:hex, :rewrite, "1.1.2", "f5a5d10f5fed1491a6ff48e078d4585882695962ccc9e6c779bae025d1f92eda", [:mix], [{:glob_ex, "~> 0.1", [hex: :glob_ex, repo: "hexpm", optional: false]}, {:sourceror, "~> 1.0", [hex: :sourceror, repo: "hexpm", optional: false]}, {:text_diff, "~> 0.1", [hex: :text_diff, repo: "hexpm", optional: false]}], "hexpm", "7f8b94b1e3528d0a47b3e8b7bfeca559d2948a65fa7418a9ad7d7712703d39d4"}, + "sourceror": {:hex, :sourceror, "1.7.1", "599d78f4cc2be7d55c9c4fd0a8d772fd0478e3a50e726697c20d13d02aa056d4", [:mix], [], "hexpm", "cd6f268fe29fa00afbc535e215158680a0662b357dc784646d7dff28ac65a0fc"}, + "spitfire": {:hex, :spitfire, "0.2.0", "0de1f519a23f65bde40d316adad53c07a9563f25cc68915d639d8a509a0aad8a", [:mix], [], "hexpm", "743daaee2d81a0d8095431729f478ce49b47ea8943c7d770de86704975cb7775"}, "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, "telemetry_metrics": {:hex, :telemetry_metrics, "1.0.0", "29f5f84991ca98b8eb02fc208b2e6de7c95f8bb2294ef244a176675adc7775df", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "f23713b3847286a534e005126d4c959ebcca68ae9582118ce436b521d1d47d5d"}, "telemetry_poller": {:hex, :telemetry_poller, "1.1.0", "58fa7c216257291caaf8d05678c8d01bd45f4bdbc1286838a28c4bb62ef32999", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "9eb9d9cbfd81cbd7cdd24682f8711b6e2b691289a0de6826e58452f28c103c8f"}, + "text_diff": {:hex, :text_diff, "0.1.0", "1caf3175e11a53a9a139bc9339bd607c47b9e376b073d4571c031913317fecaa", [:mix], [], "hexpm", "d1ffaaecab338e49357b6daa82e435f877e0649041ace7755583a0ea3362dbd7"}, "thousand_island": {:hex, :thousand_island, "1.3.5", "6022b6338f1635b3d32406ff98d68b843ba73b3aa95cfc27154223244f3a6ca5", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "2be6954916fdfe4756af3239fb6b6d75d0b8063b5df03ba76fd8a4c87849e180"}, "websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"}, "websock_adapter": {:hex, :websock_adapter, "0.5.7", "65fa74042530064ef0570b75b43f5c49bb8b235d6515671b3d250022cb8a1f9e", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "d0f478ee64deddfec64b800673fd6e0c8888b079d9f3444dd96d2a98383bdbd1"}, From c847dc5c9feadee64061c4e97eac6dd6954b1629 Mon Sep 17 00:00:00 2001 From: Chris Date: Tue, 11 Mar 2025 23:03:07 +0100 Subject: [PATCH 6/6] changes generated by mix rag.install pgvector --- config/config.exs | 3 + config/dev.exs | 1 + config/prod.exs | 1 + config/test.exs | 1 + eval/rag_triad_eval.exs | 63 +++++++++ lib/chatbot/application.ex | 12 ++ lib/chatbot/rag.ex | 125 ++++++++++++++++++ lib/chatbot/rag/chunk.ex | 16 +++ lib/chatbot/rag/serving.ex | 32 +++++ lib/postgrex_types.ex | 5 + mix.exs | 5 + mix.lock | 16 +++ .../20250311215959_create_chunks_table.exs | 23 ++++ 13 files changed, 303 insertions(+) create mode 100644 config/dev.exs create mode 100644 config/prod.exs create mode 100644 config/test.exs create mode 100644 eval/rag_triad_eval.exs create mode 100644 lib/chatbot/rag.ex create mode 100644 lib/chatbot/rag/chunk.ex create mode 100644 lib/chatbot/rag/serving.ex create mode 100644 lib/postgrex_types.ex create mode 100644 priv/repo/migrations/20250311215959_create_chunks_table.exs diff --git a/config/config.exs b/config/config.exs index d88837a..03a84ef 100644 --- a/config/config.exs +++ b/config/config.exs @@ -6,6 +6,9 @@ import Config +config :chatbot, Chatbot.Repo, types: Chatbot.PostgrexTypes +config :chatbot, openai_key: "your openai API key" +config :nx, default_backend: EXLA.Backend import_config "config/endpoint.exs" import_config "config/logger.exs" import_config "config/phoenix.exs" diff --git a/config/dev.exs b/config/dev.exs new file mode 100644 index 0000000..becde76 --- /dev/null +++ b/config/dev.exs @@ -0,0 +1 @@ +import Config diff --git a/config/prod.exs b/config/prod.exs new file mode 100644 index 0000000..becde76 --- /dev/null +++ b/config/prod.exs @@ -0,0 +1 @@ +import Config diff --git a/config/test.exs b/config/test.exs new file mode 100644 index 0000000..becde76 --- /dev/null +++ b/config/test.exs @@ -0,0 +1 @@ +import Config diff --git a/eval/rag_triad_eval.exs b/eval/rag_triad_eval.exs new file mode 100644 index 0000000..73d8f2e --- /dev/null +++ b/eval/rag_triad_eval.exs @@ -0,0 +1,63 @@ +openai_key = Application.compile_env(:chatbot, :openai_key) + +dataset = + "https://huggingface.co/datasets/explodinggradients/amnesty_qa/resolve/main/english.json" + +IO.puts("downloading dataset") + +data = + Req.get!(dataset).body + |> Jason.decode!() + +IO.puts("indexing") + +data["contexts"] +|> Enum.map(&Enum.join(&1, " ")) +|> Enum.with_index(fn context, index -> %{document: context, source: "#{index}"} end) +|> Chatbot.Rag.index() + +IO.puts("generating responses") + +generations = + for question <- data["question"] do + Chatbot.Rag.query(question) + end + +provider = Rag.Ai.OpenAI.new(%{text_model: "gpt-4o-mini", api_key: openai_key}) + +IO.puts("evaluating") + +generations = + for generation <- generations do + Rag.Evaluation.evaluate_rag_triad(generation, provider) + end + +json = + generations + |> Enum.map(fn generation -> + Map.from_struct(generation) + |> Map.take([:query, :context, :context_sources, :response, :evaluations]) + end) + |> Jason.encode!() + +File.write!(Path.join(__DIR__, "triad_eval.json"), json) + +average_rag_triad_scores = + Enum.map( + generations, + fn gen -> + %{ + evaluations: %{ + "context_relevance_score" => context_relevance_score, + "groundedness_score" => groundedness_score, + "answer_relevance_score" => answer_relevance_score + } + } = gen + + (context_relevance_score + groundedness_score + answer_relevance_score) / 3 + end + ) + +total_average_score = Enum.sum(average_rag_triad_scores) / Enum.count(average_rag_triad_scores) + +IO.puts("Score: ,#{total_average_score}") diff --git a/lib/chatbot/application.ex b/lib/chatbot/application.ex index 93625a7..6eaa18e 100644 --- a/lib/chatbot/application.ex +++ b/lib/chatbot/application.ex @@ -8,6 +8,18 @@ defmodule Chatbot.Application do @impl true def start(_type, _args) do children = [ + {Nx.Serving, + [ + serving: Chatbot.Rag.Serving.build_llm_serving(), + name: Rag.LLMServing, + batch_timeout: 100 + ]}, + {Nx.Serving, + [ + serving: Chatbot.Rag.Serving.build_embedding_serving(), + name: Rag.EmbeddingServing, + batch_timeout: 100 + ]}, {Task.Supervisor, name: Chatbot.TaskSupervisor}, ChatbotWeb.Telemetry, Chatbot.Repo, diff --git a/lib/chatbot/rag.ex b/lib/chatbot/rag.ex new file mode 100644 index 0000000..745516d --- /dev/null +++ b/lib/chatbot/rag.ex @@ -0,0 +1,125 @@ +defmodule Chatbot.Rag do + alias Chatbot.Repo + alias Rag.{Ai, Embedding, Generation, Retrieval} + + import Ecto.Query + import Pgvector.Ecto.Query + + @provider Ai.Nx.new(%{embeddings_serving: Rag.EmbeddingServing, text_serving: Rag.LLMServing}) + + def ingest(path) do + path + |> load() + |> index() + end + + def load(path) do + path + |> list_text_files() + |> Enum.map(&%{source: &1}) + |> Enum.map(&Rag.Loading.load_file(&1)) + end + + defp list_text_files(path) do + path + |> Path.join("/**/*.txt") + |> Path.wildcard() + end + + def index(ingestions) do + chunks = + ingestions + |> Enum.flat_map(&chunk_text(&1, :document)) + |> Embedding.generate_embeddings_batch(@provider, + text_key: :chunk, + embedding_key: :embedding + ) + |> Enum.map(&to_chunk(&1)) + + Repo.insert_all(Chatbot.Rag.Chunk, chunks) + end + + defp chunk_text(ingestion, text_key, opts \\ []) do + text = Map.fetch!(ingestion, text_key) + chunks = TextChunker.split(text, opts) + + Enum.map(chunks, &Map.put(ingestion, :chunk, &1.text)) + end + + def query(query) do + generation = + Generation.new(query) + |> Embedding.generate_embedding(@provider) + |> Retrieval.retrieve(:fulltext_results, fn generation -> query_fulltext(generation) end) + |> Retrieval.retrieve(:semantic_results, fn generation -> + query_with_pgvector(generation) + end) + |> Retrieval.reciprocal_rank_fusion( + %{fulltext_results: 1, semantic_results: 1}, + :rrf_result + ) + |> Retrieval.deduplicate(:rrf_result, [:source]) + + context = + Generation.get_retrieval_result(generation, :rrf_result) + |> Enum.map_join("\n\n", & &1.document) + + context_sources = + Generation.get_retrieval_result(generation, :rrf_result) + |> Enum.map(& &1.source) + + prompt = smollm_prompt(query, context) + + generation + |> Generation.put_context(context) + |> Generation.put_context_sources(context_sources) + |> Generation.put_prompt(prompt) + |> Generation.generate_response(@provider) + end + + defp to_chunk(ingestion) do + now = NaiveDateTime.utc_now() |> NaiveDateTime.truncate(:second) + + ingestion + |> Map.put_new(:inserted_at, now) + |> Map.put_new(:updated_at, now) + end + + defp query_with_pgvector(%{query_embedding: query_embedding}, limit \\ 3) do + {:ok, + Repo.all( + from(c in Chatbot.Rag.Chunk, + order_by: l2_distance(c.embedding, ^Pgvector.new(query_embedding)), + limit: ^limit + ) + )} + end + + defp query_fulltext(%{query: query}, limit \\ 3) do + query = query |> String.trim() |> String.replace(" ", " & ") + + {:ok, + Repo.all( + from(c in Chatbot.Rag.Chunk, + where: fragment("to_tsvector(?) @@ to_tsquery(?)", c.document, ^query), + limit: ^limit + ) + )} + end + + defp smollm_prompt(query, context) do + """ + <|im_start|>system + You are a helpful assistant.<|im_end|> + <|im_start|>user + Context information is below. + --------------------- + #{context} + --------------------- + Given the context information and no prior knowledge, answer the query. + Query: #{query} + Answer: <|im_end|> + <|im_start|>assist + """ + end +end diff --git a/lib/chatbot/rag/chunk.ex b/lib/chatbot/rag/chunk.ex new file mode 100644 index 0000000..91e1152 --- /dev/null +++ b/lib/chatbot/rag/chunk.ex @@ -0,0 +1,16 @@ +defmodule Chatbot.Rag.Chunk do + use Ecto.Schema + + schema "chunks" do + field(:document, :string) + field(:source, :string) + field(:chunk, :string) + field(:embedding, Pgvector.Ecto.Vector) + + timestamps() + end + + def changeset(chunk \\ %__MODULE__{}, attrs) do + Ecto.Changeset.cast(chunk, attrs, [:document, :source, :chunk, :embedding]) + end +end diff --git a/lib/chatbot/rag/serving.ex b/lib/chatbot/rag/serving.ex new file mode 100644 index 0000000..a08f763 --- /dev/null +++ b/lib/chatbot/rag/serving.ex @@ -0,0 +1,32 @@ +defmodule Chatbot.Rag.Serving do + def build_embedding_serving() do + repo = {:hf, "thenlper/gte-small"} + + {:ok, model_info} = Bumblebee.load_model(repo) + + {:ok, tokenizer} = Bumblebee.load_tokenizer(repo) + + Bumblebee.Text.TextEmbedding.text_embedding(model_info, tokenizer, + compile: [batch_size: 64, sequence_length: 512], + defn_options: [compiler: EXLA], + output_attribute: :hidden_state, + output_pool: :mean_pooling + ) + end + + def build_llm_serving() do + repo = {:hf, "HuggingFaceTB/SmolLM2-135M-Instruct"} + + {:ok, model_info} = Bumblebee.load_model(repo) + {:ok, tokenizer} = Bumblebee.load_tokenizer(repo) + {:ok, generation_config} = Bumblebee.load_generation_config(repo) + + generation_config = Bumblebee.configure(generation_config, max_new_tokens: 100) + + Bumblebee.Text.generation(model_info, tokenizer, generation_config, + compile: [batch_size: 1, sequence_length: 6000], + defn_options: [compiler: EXLA], + stream: false + ) + end +end diff --git a/lib/postgrex_types.ex b/lib/postgrex_types.ex new file mode 100644 index 0000000..0850089 --- /dev/null +++ b/lib/postgrex_types.ex @@ -0,0 +1,5 @@ +Postgrex.Types.define( + Chatbot.PostgrexTypes, + [Pgvector.Extensions.Vector] ++ Ecto.Adapters.Postgres.extensions(), + [] +) diff --git a/mix.exs b/mix.exs index a24511b..386a925 100644 --- a/mix.exs +++ b/mix.exs @@ -33,6 +33,11 @@ defmodule Chatbot.MixProject do # Type `mix help deps` for examples and options. defp deps do [ + {:pgvector, "~> 0.3.0"}, + {:ecto, "~> 3.0"}, + {:exla, "~> 0.9.1"}, + {:bumblebee, "~> 0.6.0"}, + {:text_chunker, "~> 0.3.1"}, {:ex_machina, "~> 2.8"}, {:bitcrowd_ecto, "~> 1.0"}, {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, diff --git a/mix.lock b/mix.lock index d12f8d0..bb1b56d 100644 --- a/mix.lock +++ b/mix.lock @@ -1,8 +1,10 @@ %{ "abacus": {:hex, :abacus, "2.1.0", "b6db5c989ba3d9dd8c36d1cb269e2f0058f34768d47c67eb8ce06697ecb36dd4", [:mix], [], "hexpm", "255de08b02884e8383f1eed8aa31df884ce0fb5eb394db81ff888089f2a1bbff"}, + "axon": {:hex, :axon, "0.7.0", "2e2c6d93b4afcfa812566b8922204fa022b60081e86ebd411df4db7ea30f5457", [:mix], [{:kino, "~> 0.7", [hex: :kino, repo: "hexpm", optional: true]}, {:kino_vega_lite, "~> 0.1.7", [hex: :kino_vega_lite, repo: "hexpm", optional: true]}, {:nx, "~> 0.9", [hex: :nx, repo: "hexpm", optional: false]}, {:polaris, "~> 0.1", [hex: :polaris, repo: "hexpm", optional: false]}, {:table_rex, "~> 3.1.1", [hex: :table_rex, repo: "hexpm", optional: true]}], "hexpm", "ee9857a143c9486597ceff434e6ca833dc1241be6158b01025b8217757ed1036"}, "bandit": {:hex, :bandit, "1.5.7", "6856b1e1df4f2b0cb3df1377eab7891bec2da6a7fd69dc78594ad3e152363a50", [:mix], [{:hpax, "~> 1.0.0", [hex: :hpax, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:thousand_island, "~> 1.0", [hex: :thousand_island, repo: "hexpm", optional: false]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "f2dd92ae87d2cbea2fa9aa1652db157b6cba6c405cb44d4f6dd87abba41371cd"}, "bitcrowd_ecto": {:hex, :bitcrowd_ecto, "1.0.0", "b255cf7b8e22bc17adeb8bbc9907ef02dcdc751fd68ab3e444b0a098dac99b65", [:mix], [{:ecto, "~> 3.6", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "~> 3.6", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:ex_money, "~> 5.12", [hex: :ex_money, repo: "hexpm", optional: true]}, {:jason, "> 0.0.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "862effc5eba425176472c2f01dd50b9994622089fa4bc2a11d75732b47dcdab9"}, "bitstyles_phoenix": {:hex, :bitstyles_phoenix, "2.5.0", "c02aae26fcf6ff752738b35aa97f3991c67533412eda381abaad3e22aa2e2215", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_live_view, "~> 0.18.12 or ~> 0.19.0 or ~> 0.20.0", [hex: :phoenix_live_view, repo: "hexpm", optional: false]}], "hexpm", "fe018f0eeec6c84afedc1785495efe0a651ecf34656c4b52f6da09d5149a9f28"}, + "bumblebee": {:hex, :bumblebee, "0.6.0", "1c731313308ff9fde2effc4a2f366742dbd78e227b84e980eb2804f6b9281724", [:mix], [{:axon, "~> 0.7.0", [hex: :axon, repo: "hexpm", optional: false]}, {:jason, "~> 1.4.0", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.9.0", [hex: :nx, repo: "hexpm", optional: false]}, {:nx_image, "~> 0.1.0", [hex: :nx_image, repo: "hexpm", optional: false]}, {:nx_signal, "~> 0.2.0", [hex: :nx_signal, repo: "hexpm", optional: false]}, {:progress_bar, "~> 3.0", [hex: :progress_bar, repo: "hexpm", optional: false]}, {:safetensors, "~> 0.1.3", [hex: :safetensors, repo: "hexpm", optional: false]}, {:tokenizers, "~> 0.4", [hex: :tokenizers, repo: "hexpm", optional: false]}, {:unpickler, "~> 0.1.0", [hex: :unpickler, repo: "hexpm", optional: false]}, {:unzip, "~> 0.12.0", [hex: :unzip, repo: "hexpm", optional: false]}], "hexpm", "a8b863179d314e9615b00291d5dcd2dc043b294edc25b4483d5c88d1c8d21c89"}, "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, "castore": {:hex, :castore, "1.0.9", "5cc77474afadf02c7c017823f460a17daa7908e991b0cc917febc90e466a375c", [:mix], [], "hexpm", "5ea956504f1ba6f2b4eb707061d8e17870de2bee95fb59d512872c2ef06925e7"}, "complex": {:hex, :complex, "0.6.0", "b0130086a7a8c33574d293b2e0e250f4685580418eac52a5658a4bd148f3ccf1", [:mix], [], "hexpm", "0a5fa95580dcaf30fcd60fe1aaf24327c0fe401e98c24d892e172e79498269f9"}, @@ -14,9 +16,11 @@ "earmark": {:hex, :earmark, "1.4.47", "7e7596b84fe4ebeb8751e14cbaeaf4d7a0237708f2ce43630cfd9065551f94ca", [:mix], [], "hexpm", "3e96bebea2c2d95f3b346a7ff22285bc68a99fbabdad9b655aa9c6be06c698f8"}, "ecto": {:hex, :ecto, "3.12.4", "267c94d9f2969e6acc4dd5e3e3af5b05cdae89a4d549925f3008b2b7eb0b93c3", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ef04e4101688a67d061e1b10d7bc1fbf00d1d13c17eef08b71d070ff9188f747"}, "ecto_sql": {:hex, :ecto_sql, "3.12.1", "c0d0d60e85d9ff4631f12bafa454bc392ce8b9ec83531a412c12a0d415a3a4d0", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.12", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aff5b958a899762c5f09028c847569f7dfb9cc9d63bdb8133bff8a5546de6bf5"}, + "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"}, "erlex": {:hex, :erlex, "0.2.7", "810e8725f96ab74d17aac676e748627a07bc87eb950d2b83acd29dc047a30595", [:mix], [], "hexpm", "3ed95f79d1a844c3f6bf0cea61e0d5612a42ce56da9c03f01df538685365efb0"}, "esbuild": {:hex, :esbuild, "0.8.1", "0cbf919f0eccb136d2eeef0df49c4acf55336de864e63594adcea3814f3edf41", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "25fc876a67c13cb0a776e7b5d7974851556baeda2085296c14ab48555ea7560f"}, "ex_machina": {:hex, :ex_machina, "2.8.0", "a0e847b5712065055ec3255840e2c78ef9366634d62390839d4880483be38abe", [:mix], [{:ecto, "~> 2.2 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}], "hexpm", "79fe1a9c64c0c1c1fab6c4fa5d871682cb90de5885320c187d117004627a7729"}, + "exla": {:hex, :exla, "0.9.2", "2b5cb7334f79fedc301502a793ffd10bc1ec8de2c61eebabcabf213fc98ae7e6", [:make, :mix], [{:elixir_make, "~> 0.6", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.0", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:nx, "~> 0.9.0", [hex: :nx, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:xla, "~> 0.8.0", [hex: :xla, repo: "hexpm", optional: false]}], "hexpm", "e51085e196b466d235e93d9f5ea2cbf7d90315d216aa02e996f99bcaaa19c593"}, "expo": {:hex, :expo, "1.1.0", "f7b9ed7fb5745ebe1eeedf3d6f29226c5dd52897ac67c0f8af62a07e661e5c75", [:mix], [], "hexpm", "fbadf93f4700fb44c331362177bdca9eeb8097e8b0ef525c9cc501cb9917c960"}, "file_system": {:hex, :file_system, "1.0.1", "79e8ceaddb0416f8b8cd02a0127bdbababe7bf4a23d2a395b983c1f8b3f73edd", [:mix], [], "hexpm", "4414d1f38863ddf9120720cd976fce5bdde8e91d8283353f0e31850fa89feb9e"}, "finch": {:hex, :finch, "0.19.0", "c644641491ea854fc5c1bbaef36bfc764e3f08e7185e1f084e35e0672241b76d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "fc5324ce209125d1e2fa0fcd2634601c52a787aff1cd33ee833664a5af4ea2b6"}, @@ -33,7 +37,10 @@ "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"}, "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"}, "nx": {:hex, :nx, "0.9.2", "17563029c01bf749aad3c31234326d7665abd0acc33ee2acbe531a4759f29a8a", [:mix], [{:complex, "~> 0.5", [hex: :complex, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "914d74741617d8103de8ab1f8c880353e555263e1c397b8a1109f79a3716557f"}, + "nx_image": {:hex, :nx_image, "0.1.2", "0c6e3453c1dc30fc80c723a54861204304cebc8a89ed3b806b972c73ee5d119d", [:mix], [{:nx, "~> 0.4", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "9161863c42405ddccb6dbbbeae078ad23e30201509cc804b3b3a7c9e98764b81"}, + "nx_signal": {:hex, :nx_signal, "0.2.0", "e1ca0318877b17c81ce8906329f5125f1e2361e4c4235a5baac8a95ee88ea98e", [:mix], [{:nx, "~> 0.6", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "7247e5e18a177a59c4cb5355952900c62fdeadeb2bad02a9a34237b68744e2bb"}, "owl": {:hex, :owl, "0.12.2", "65906b525e5c3ef51bab6cba7687152be017aebe1da077bb719a5ee9f7e60762", [:mix], [{:ucwidth, "~> 0.2", [hex: :ucwidth, repo: "hexpm", optional: true]}], "hexpm", "6398efa9e1fea70a04d24231e10dcd66c1ac1aa2da418d20ef5357ec61de2880"}, + "pgvector": {:hex, :pgvector, "0.3.0", "c55c7c0f6224b06105fc3214965c6217e4cfe907d7524cd8c27ba7612b7f8582", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: true]}, {:postgrex, ">= 0.0.0", [hex: :postgrex, repo: "hexpm", optional: false]}], "hexpm", "aeb7c36c5851881fd1d8a39e213472fa0b07bd72cdb0acabc693055aa14693ab"}, "phoenix": {:hex, :phoenix, "1.7.14", "a7d0b3f1bc95987044ddada111e77bd7f75646a08518942c72a8440278ae7825", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "c7859bc56cc5dfef19ecfc240775dae358cbaa530231118a9e014df392ace61a"}, "phoenix_ecto": {:hex, :phoenix_ecto, "4.6.2", "3b83b24ab5a2eb071a20372f740d7118767c272db386831b2e77638c4dcc606d", [:mix], [{:ecto, "~> 3.5", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.1", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:plug, "~> 1.9", [hex: :plug, repo: "hexpm", optional: false]}, {:postgrex, "~> 0.16 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}], "hexpm", "3f94d025f59de86be00f5f8c5dd7b5965a3298458d21ab1c328488be3b5fcd59"}, "phoenix_html": {:hex, :phoenix_html, "4.1.1", "4c064fd3873d12ebb1388425a8f2a19348cef56e7289e1998e2d2fa758aa982e", [:mix], [], "hexpm", "f2f2df5a72bc9a2f510b21497fd7d2b86d932ec0598f0210fed4114adc546c6f"}, @@ -44,17 +51,26 @@ "phoenix_template": {:hex, :phoenix_template, "1.0.4", "e2092c132f3b5e5b2d49c96695342eb36d0ed514c5b252a77048d5969330d639", [:mix], [{:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}], "hexpm", "2c0c81f0e5c6753faf5cca2f229c9709919aba34fab866d3bc05060c9c444206"}, "plug": {:hex, :plug, "1.16.1", "40c74619c12f82736d2214557dedec2e9762029b2438d6d175c5074c933edc9d", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "a13ff6b9006b03d7e33874945b2755253841b238c34071ed85b0e86057f8cddc"}, "plug_crypto": {:hex, :plug_crypto, "2.1.0", "f44309c2b06d249c27c8d3f65cfe08158ade08418cf540fd4f72d4d6863abb7b", [:mix], [], "hexpm", "131216a4b030b8f8ce0f26038bc4421ae60e4bb95c5cf5395e1421437824c4fa"}, + "polaris": {:hex, :polaris, "0.1.0", "dca61b18e3e801ecdae6ac9f0eca5f19792b44a5cb4b8d63db50fc40fc038d22", [:mix], [{:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "13ef2b166650e533cb24b10e2f3b8ab4f2f449ba4d63156e8c569527f206e2c2"}, "postgrex": {:hex, :postgrex, "0.19.1", "73b498508b69aded53907fe48a1fee811be34cc720e69ef4ccd568c8715495ea", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "8bac7885a18f381e091ec6caf41bda7bb8c77912bb0e9285212829afe5d8a8f8"}, + "progress_bar": {:hex, :progress_bar, "3.0.0", "f54ff038c2ac540cfbb4c2bfe97c75e7116ead044f3c2b10c9f212452194b5cd", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "6981c2b25ab24aecc91a2dc46623658e1399c21a2ae24db986b90d678530f2b7"}, "rag": {:git, "https://github.com/bitcrowd/rag.git", "d59d25e997aff214aff3e507cad00b9d537d4e68", []}, "req": {:hex, :req, "0.5.6", "8fe1eead4a085510fe3d51ad854ca8f20a622aae46e97b302f499dfb84f726ac", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "cfaa8e720945d46654853de39d368f40362c2641c4b2153c886418914b372185"}, "rewrite": {:hex, :rewrite, "1.1.2", "f5a5d10f5fed1491a6ff48e078d4585882695962ccc9e6c779bae025d1f92eda", [:mix], [{:glob_ex, "~> 0.1", [hex: :glob_ex, repo: "hexpm", optional: false]}, {:sourceror, "~> 1.0", [hex: :sourceror, repo: "hexpm", optional: false]}, {:text_diff, "~> 0.1", [hex: :text_diff, repo: "hexpm", optional: false]}], "hexpm", "7f8b94b1e3528d0a47b3e8b7bfeca559d2948a65fa7418a9ad7d7712703d39d4"}, + "rustler_precompiled": {:hex, :rustler_precompiled, "0.8.2", "5f25cbe220a8fac3e7ad62e6f950fcdca5a5a5f8501835d2823e8c74bf4268d5", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, "~> 0.23", [hex: :rustler, repo: "hexpm", optional: true]}], "hexpm", "63d1bd5f8e23096d1ff851839923162096364bac8656a4a3c00d1fff8e83ee0a"}, + "safetensors": {:hex, :safetensors, "0.1.3", "7ff3c22391e213289c713898481d492c9c28a49ab1d0705b72630fb8360426b2", [:mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:nx, "~> 0.5", [hex: :nx, repo: "hexpm", optional: false]}], "hexpm", "fe50b53ea59fde4e723dd1a2e31cfdc6013e69343afac84c6be86d6d7c562c14"}, "sourceror": {:hex, :sourceror, "1.7.1", "599d78f4cc2be7d55c9c4fd0a8d772fd0478e3a50e726697c20d13d02aa056d4", [:mix], [], "hexpm", "cd6f268fe29fa00afbc535e215158680a0662b357dc784646d7dff28ac65a0fc"}, "spitfire": {:hex, :spitfire, "0.2.0", "0de1f519a23f65bde40d316adad53c07a9563f25cc68915d639d8a509a0aad8a", [:mix], [], "hexpm", "743daaee2d81a0d8095431729f478ce49b47ea8943c7d770de86704975cb7775"}, "telemetry": {:hex, :telemetry, "1.3.0", "fedebbae410d715cf8e7062c96a1ef32ec22e764197f70cda73d82778d61e7a2", [:rebar3], [], "hexpm", "7015fc8919dbe63764f4b4b87a95b7c0996bd539e0d499be6ec9d7f3875b79e6"}, "telemetry_metrics": {:hex, :telemetry_metrics, "1.0.0", "29f5f84991ca98b8eb02fc208b2e6de7c95f8bb2294ef244a176675adc7775df", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "f23713b3847286a534e005126d4c959ebcca68ae9582118ce436b521d1d47d5d"}, "telemetry_poller": {:hex, :telemetry_poller, "1.1.0", "58fa7c216257291caaf8d05678c8d01bd45f4bdbc1286838a28c4bb62ef32999", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "9eb9d9cbfd81cbd7cdd24682f8711b6e2b691289a0de6826e58452f28c103c8f"}, + "text_chunker": {:hex, :text_chunker, "0.3.2", "bf587de84dcd405215095201ac9e2cff0d0a2c49d0fe1d8f16461944fdb124ae", [:mix], [{:nimble_options, "~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}], "hexpm", "84061ef0a861065ca3be66c5e3a8e0725cb0b31210f614b8b7eb135c9e6cb6b2"}, "text_diff": {:hex, :text_diff, "0.1.0", "1caf3175e11a53a9a139bc9339bd607c47b9e376b073d4571c031913317fecaa", [:mix], [], "hexpm", "d1ffaaecab338e49357b6daa82e435f877e0649041ace7755583a0ea3362dbd7"}, "thousand_island": {:hex, :thousand_island, "1.3.5", "6022b6338f1635b3d32406ff98d68b843ba73b3aa95cfc27154223244f3a6ca5", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "2be6954916fdfe4756af3239fb6b6d75d0b8063b5df03ba76fd8a4c87849e180"}, + "tokenizers": {:hex, :tokenizers, "0.5.1", "b0975d92b4ee5b18e8f47b5d65b9d5f1e583d9130189b1a2620401af4e7d4b35", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: false]}, {:rustler, ">= 0.0.0", [hex: :rustler, repo: "hexpm", optional: true]}, {:rustler_precompiled, "~> 0.6", [hex: :rustler_precompiled, repo: "hexpm", optional: false]}], "hexpm", "5f08d97cc7f2ed3d71d370d68120da6d3de010948ccf676c9c0eb591ba4bacc9"}, + "unpickler": {:hex, :unpickler, "0.1.0", "c2262c0819e6985b761e7107546cef96a485f401816be5304a65fdd200d5bd6a", [:mix], [], "hexpm", "e2b3f61e62406187ac52afead8a63bfb4e49394028993f3c4c42712743cab79e"}, + "unzip": {:hex, :unzip, "0.12.0", "beed92238724732418b41eba77dcb7f51e235b707406c05b1732a3052d1c0f36", [:mix], [], "hexpm", "95655b72db368e5a84951f0bed586ac053b55ee3815fd96062fce10ce4fc998d"}, "websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"}, "websock_adapter": {:hex, :websock_adapter, "0.5.7", "65fa74042530064ef0570b75b43f5c49bb8b235d6515671b3d250022cb8a1f9e", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "d0f478ee64deddfec64b800673fd6e0c8888b079d9f3444dd96d2a98383bdbd1"}, + "xla": {:hex, :xla, "0.8.0", "fef314d085dd3ee16a0816c095239938f80769150e15db16dfaa435553d7cb16", [:make, :mix], [{:elixir_make, "~> 0.4", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "739c61c8d93b97e12ba0369d10e76130224c208f1a76ad293e3581f056833e57"}, } diff --git a/priv/repo/migrations/20250311215959_create_chunks_table.exs b/priv/repo/migrations/20250311215959_create_chunks_table.exs new file mode 100644 index 0000000..afce8ff --- /dev/null +++ b/priv/repo/migrations/20250311215959_create_chunks_table.exs @@ -0,0 +1,23 @@ +defmodule Chatbot.Repo.Migrations.CreateChunksTable do + use Ecto.Migration + + def up() do + execute("CREATE EXTENSION IF NOT EXISTS vector") + + flush() + + create table(:chunks) do + add(:document, :text) + add(:source, :text) + add(:chunk, :text) + add(:embedding, :vector, size: 384) + + timestamps() + end + end + + def down() do + drop(table(:chunks)) + execute("DROP EXTENSION vector") + end +end