diff --git a/Makefile b/Makefile index 784daed..bdeb0f7 100644 --- a/Makefile +++ b/Makefile @@ -5,3 +5,7 @@ run: .PHONY: backend backend: python supervised/backend.py + +.PHONY: summarize +summarize: + python supervised/summarize.py \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index dea5b0d..624b739 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1771,6 +1771,21 @@ files = [ {file = "numpy-2.2.2.tar.gz", hash = "sha256:ed6906f61834d687738d25988ae117683705636936cc605be0bb208b23df4d8f"}, ] +[[package]] +name = "ollama" +version = "0.4.7" +description = "The official Python client for Ollama." +optional = false +python-versions = "<4.0,>=3.8" +files = [ + {file = "ollama-0.4.7-py3-none-any.whl", hash = "sha256:85505663cca67a83707be5fb3aeff0ea72e67846cea5985529d8eca4366564a1"}, + {file = "ollama-0.4.7.tar.gz", hash = "sha256:891dcbe54f55397d82d289c459de0ea897e103b86a3f1fad0fdb1895922a75ff"}, +] + +[package.dependencies] +httpx = ">=0.27,<0.29" +pydantic = ">=2.9.0,<3.0.0" + [[package]] name = "outcome" version = "1.3.0.post0" @@ -3780,4 +3795,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "4d122ffb03fa3cae4e80994683486a0e1211592259480a30dd3a884ed628acfb" +content-hash = "758de2d61f7e28a4d488aa5576f12daa43a4f831663076ad364b45b7c62e4644" diff --git a/pyproject.toml b/pyproject.toml index fe6fff6..b415d01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ scholarly = "^1.7.11" supabase = "^2.12.0" streamlit-supabase = "^0.5" streamlit-extras = "^0.5.0" +ollama = "^0.4.7" [build-system] diff --git a/supervised/prof1.jpg b/supervised/prof1.jpg deleted file mode 100644 index fa27e39..0000000 Binary files a/supervised/prof1.jpg and /dev/null differ diff --git a/supervised/prof2.jpg b/supervised/prof2.jpg deleted file mode 100644 index fa27e39..0000000 Binary files a/supervised/prof2.jpg and /dev/null differ diff --git a/supervised/summarize.py b/supervised/summarize.py new file mode 100644 index 0000000..5a78332 --- /dev/null +++ b/supervised/summarize.py @@ -0,0 +1,57 @@ +import asyncio +from concurrent.futures import ThreadPoolExecutor +from typing import List, Dict +import ollama +from supervised.cache import papers + +class OllamaSummarizer: + def __init__(self, model_name: str = "dolphin-mistral:latest"): + self.model_name = model_name + + async def generate_summary(self, text: str) -> str: + prompt_template = ( + "Please provide a one sentence summary of the following academic paper abstract. " + "15 words or less in an ELI5 manner.\n\n{text}\n\nSummary:" + ) + prompt = prompt_template.format(text=text) + + try: + with ThreadPoolExecutor() as executor: + response = await asyncio.get_event_loop().run_in_executor( + executor, + lambda: ollama.chat( + model=self.model_name, + messages=[{"role": "user", "content": prompt}], + ), + ) + return response["message"]["content"] + except Exception as e: + print(f"Error generating summary: {str(e)}") + return "" + + async def batch_summarize(self, texts: List[str]) -> List[str]: + tasks = [self.generate_summary(text) for text in texts] + return await asyncio.gather(*tasks) + + async def summarize_papers(self, papers: List[Dict]) -> List[Dict]: + summaries = await self.batch_summarize([paper["abstract"] for paper in papers]) + + summarized_papers = [] + for paper, summary in zip(papers, summaries): + paper_copy = paper.copy() + paper_copy["summary"] = summary + summarized_papers.append(paper_copy) + + return summarized_papers + +async def main(): + summarizer = OllamaSummarizer(model_name="llama2:latest") + recent_papers = papers["recent_papers"] + summarized_papers = await summarizer.summarize_papers(recent_papers) + + for paper in summarized_papers: + print(f"Title: {paper['title']}") + print(f"Summary: {paper['summary']}\n") + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file