Skip to content

Commit 910b03b

Browse files
committed
Update pipeline to ingest ecto docs and code
1 parent 2b20da7 commit 910b03b

File tree

1 file changed

+30
-13
lines changed

1 file changed

+30
-13
lines changed

lib/chatbot/rag.ex

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,40 @@ defmodule Chatbot.Rag do
77

88
@provider Ai.Nx.new(%{embeddings_serving: Rag.EmbeddingServing})
99

10-
def ingest(path) do
11-
path
12-
|> load()
13-
|> index()
10+
def ingest_ecto() do
11+
docs_url = "https://repo.hex.pm/docs/ecto-3.12.5.tar.gz"
12+
13+
code_url = "https://repo.hex.pm/tarballs/ecto-3.12.5.tar"
14+
15+
req = Req.new(url: docs_url) |> ReqHex.attach()
16+
docs_tarball = Req.get!(req).body
17+
18+
docs =
19+
for {file, content} <- docs_tarball, text_file?(file) do
20+
file = to_string(file)
21+
%{source: file, document: content}
22+
end
23+
24+
req = Req.new(url: code_url) |> ReqHex.attach()
25+
code_tarball = Req.get!(req).body
26+
27+
code =
28+
for {file, content} <- code_tarball["contents.tar.gz"] do
29+
%{source: file, document: content}
30+
end
31+
32+
index(docs ++ code)
1433
end
1534

16-
def load(path) do
17-
path
18-
|> list_text_files()
19-
|> Enum.map(&%{source: &1})
20-
|> Enum.map(&Rag.Loading.load_file(&1))
35+
defp text_file?(file) when is_list(file) do
36+
file
37+
|> to_string()
38+
|> String.ends_with?([".html", ".md", ".txt"])
2139
end
2240

23-
defp list_text_files(path) do
24-
path
25-
|> Path.join("/**/*.txt")
26-
|> Path.wildcard()
41+
defp text_file?(file) when is_binary(file) do
42+
file
43+
|> String.ends_with?([".html", ".md", ".txt"])
2744
end
2845

2946
def index(ingestions) do

0 commit comments

Comments
 (0)