Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 67 additions & 5 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@
ui.nav('Ask File',
ui.layout_sidebar(
ui.panel_sidebar(
ui.input_file('document_input_file',
'Select a PDF file you wish to ask a question about',
multiple=False, accept='.pdf', button_label='Select',
ui.input_file('document_input_files',
'Select one or more PDF file(s) you wish to ask a question about',
multiple=True, accept='.pdf', button_label='Select',
placeholder='Your PDF here..'),
ui.input_checkbox_group('selected_files', '', []),
# TODO: Hide remove button behind panel_conditional when no file is uploaded yet
ui.input_action_button('remove_selected', label="Remove selected PDF(s)"),
ui.input_text_area('question_input_file', 'What wisdom do you seek from this file?', rows=4),
ui.input_slider('n_chunks_file', 'Number of chunks', min=1, max=5, value=3),
ui.input_action_button(id="run_process_file", label="Do Magic", class_='btn-success'),
Expand All @@ -45,7 +48,7 @@
),
ui.panel_main(
ui.panel_conditional(
"""input.run_process_file > 0 && input.question_input_file != ''""", # && input.document_input_file != null
"""input.run_process_file > 0 && input.question_input_file != ''""", # && input.document_input_files != null
ui.output_text('get_answer_file'),
),
width=8,
Expand All @@ -60,6 +63,19 @@

def server(input, output, session):

documents = reactive.Value([])

@reactive.Effect
@reactive.event(input.document_input_files)
def _():
docs = documents.get()
document_names = [ file['name'] for file in docs ]

for file in input.document_input_files():
if file['name'] not in document_names:
docs.append(file)
documents.set(docs)

val = reactive.Value(3)
@reactive.Effect
@reactive.event(input.n_chunks_db)
Expand All @@ -83,11 +99,57 @@ async def get_answer_db():
answer = re.sub('\s*</s>$', '', answer)
return answer

@reactive.Effect
@reactive.event(input.document_input_files)
def _():
"""
Update checkbox_group after uploading file(s).
"""
docs = documents.get()
choices = [ file['name'] for file in docs ]
ui.update_checkbox_group('selected_files', label="Selected file(s):", choices=choices, selected=choices)

@reactive.Effect
@reactive.event(documents.get)
def _():
"""
Update checkbox_group after deleting file(s).
"""
docs = documents.get()
choices = [ file['name'] for file in docs ]
if choices:
label = "Selected file(s):"
else:
label = ""
ui.update_checkbox_group('selected_files', label=label, choices=choices, selected=[])
# Use this to auto select remaining documents after deletion
# ui.update_checkbox_group('selected_files', label=label, choices=choices, selected=choices)

@reactive.Effect
@reactive.event(input.remove_selected)
def _():
"""
Remove selected file(s).
"""
docs = documents.get()
docs_to_keep = [ file for file in docs if file['name'] not in input.selected_files() ]
# Use this to keep not selected files
# docs_to_keep = [ file for file in docs if file['name'] in input.selected_files() ]
documents.set(docs_to_keep)

@output()
@render.text
@reactive.event(input.run_process_file)
async def get_answer_file():
db_items = embedding_loaded_pdf(file_path=input.document_input_file()[0]['datapath'], chunk_size=200, overlap=10)
db_items = []

docs = documents.get()

for file in docs:
if file['name'] in input.selected_files():
file['db_items'] = embedding_loaded_pdf(file_path=input.document_input_files()[0]['datapath'], chunk_size=200, overlap=10)
db_items.extend(file['db_items'])

answer = pipeline_return_question_and_answer(query=input.question_input_file(),
db_items=db_items,
n_chunks=input.n_chunks_file())
Expand Down
2 changes: 2 additions & 0 deletions pipelines.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from functools import lru_cache
from filepreprocessing import pdf_get_text_chunks
from tqdm import tqdm
from embedding import get_embedding_sentence_transformer
from similarity import model_qa, cosine_similarity


@lru_cache(maxsize=64)
def embedding_loaded_pdf(file_path, chunk_size, overlap):

# FIRST WE LOAD PDF
Expand Down