File tree Expand file tree Collapse file tree 1 file changed +30
-13
lines changed
Expand file tree Collapse file tree 1 file changed +30
-13
lines changed Original file line number Diff line number Diff line change @@ -7,23 +7,40 @@ defmodule Chatbot.Rag do
77
88 @ provider Ai.Nx . new ( % { embeddings_serving: Rag.EmbeddingServing } )
99
10- def ingest ( path ) do
11- path
12- |> load ( )
13- |> index ( )
10+ def ingest_ecto ( ) do
11+ docs_url = "https://repo.hex.pm/docs/ecto-3.12.5.tar.gz"
12+
13+ code_url = "https://repo.hex.pm/tarballs/ecto-3.12.5.tar"
14+
15+ req = Req . new ( url: docs_url ) |> ReqHex . attach ( )
16+ docs_tarball = Req . get! ( req ) . body
17+
18+ docs =
19+ for { file , content } <- docs_tarball , text_file? ( file ) do
20+ file = to_string ( file )
21+ % { source: file , document: content }
22+ end
23+
24+ req = Req . new ( url: code_url ) |> ReqHex . attach ( )
25+ code_tarball = Req . get! ( req ) . body
26+
27+ code =
28+ for { file , content } <- code_tarball [ "contents.tar.gz" ] do
29+ % { source: file , document: content }
30+ end
31+
32+ index ( docs ++ code )
1433 end
1534
16- def load ( path ) do
17- path
18- |> list_text_files ( )
19- |> Enum . map ( & % { source: & 1 } )
20- |> Enum . map ( & Rag.Loading . load_file ( & 1 ) )
35+ defp text_file? ( file ) when is_list ( file ) do
36+ file
37+ |> to_string ( )
38+ |> String . ends_with? ( [ ".html" , ".md" , ".txt" ] )
2139 end
2240
23- defp list_text_files ( path ) do
24- path
25- |> Path . join ( "/**/*.txt" )
26- |> Path . wildcard ( )
41+ defp text_file? ( file ) when is_binary ( file ) do
42+ file
43+ |> String . ends_with? ( [ ".html" , ".md" , ".txt" ] )
2744 end
2845
2946 def index ( ingestions ) do
You can’t perform that action at this time.
0 commit comments