AgentCourse/agent.py at main · Jimateo/AgentCourse · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
# --- Imports ---
# Standard library
import os
import asyncio


# Third-party libraries
import pandas as pd
from dotenv import load_dotenv


# LlamaIndex imports
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.readers.wikipedia import WikipediaReader
from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow
from llama_index.core.tools import FunctionTool
from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding

from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.schema import Document
from llama_index.core.node_parser import SentenceSplitter

# --- Environment Setup ---
load_dotenv()

# --- Constants ---
DEFAULT_API_URL = os.getenv("DEFAULT_API_URL")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
HF_KEY = os.getenv("HF_KEY")

# --- System Prompt ---
SYS_PROMPT = """You are a concise assistant. Your ONLY task is to provide the exact answer to the question.
IMPORTANT RULES:
0. If you are given data on a date or between two dates you have to be very precise
1. Your response MUST start with "FINAL ANSWER: "
2. After "FINAL ANSWER: " you MUST ONLY provide:
   - A single number (no words, no units, no commas)
   - OR a single word/phrase (no articles, no explanations)
   - OR a comma-separated list of numbers/words (no additional text)
3. DO NOT add any explanations, thoughts, or additional text
4. DO NOT use articles or abbreviations
5. DO NOT use units unless specifically requested

Example correct responses:
FINAL ANSWER: 42
FINAL ANSWER: Barcelona
FINAL ANSWER: 1,2,3,4,5
"""

# --- LLM Configuration ---
llm_model = GoogleGenAI(
    model_name="models/gemini-2.0-flash",
    api_key=GOOGLE_API_KEY,
    temperature=0.1,
)

# --- Tool Definitions ---
# File Operations Tools


def wikipedia_embed_retrieval(topic: str) -> dict:
    """
    Retrieves relevant Wikipedia chunks using Gemini embeddings.
    """
    try:
        loader = WikipediaReader()
        documents = loader.load_data(pages=[topic])

        if not documents:
            return {"error": "No Wikipedia article found"}

        # Usa Gemini para los embeddings
        Settings.embed_model = GoogleGenAIEmbedding(
            model_name="models/embedding-001",
            api_key=GOOGLE_API_KEY
        )

        # Configurar el splitter de oraciones
        text_splitter = SentenceSplitter(
            chunk_size=512,
            chunk_overlap=50
        )

        # Indexado y recuperación con chunks más pequeños para mejor precisión
        index = VectorStoreIndex.from_documents(
            documents,
            transformations=[text_splitter]
        )
        retriever = index.as_retriever(similarity_top_k=3)
        nodes = retriever.retrieve(topic)

        # Combina los chunks más relevantes
        relevant_chunks = "\n\n".join([n.text for n in nodes])
        return {"wikipedia_chunks": relevant_chunks}

    except Exception as e:
        return {"error": str(e)}


def load_video_transcript(video_link: str) -> str:
    """Loads transcript of the given video using the link."""
    try:
        loader = YoutubeTranscriptReader()
        documents = loader.load_data(ytlinks=[video_link])
        return {"video_transcript": documents[0].text_resource.text}
    except Exception as e:
        print("error", e)

# --- Tool Initialization ---
load_video_transcript_tool = FunctionTool.from_defaults(
    load_video_transcript,
    name="load_video_transcript",
    description=(
        "Given a YouTube video URL, fetch and return the full transcript text "
        "using the YouTube Transcript API."
    )
)

wiki_retriever_tool = FunctionTool.from_defaults(
    fn=wikipedia_embed_retrieval,
    name="wikipedia_embed_retrieval",
    description="Given a topic, retrieve the most relevant Wikipedia text chunks using embeddings"
)

# Search Tools
tool_search_duckduckgo = DuckDuckGoSearchToolSpec().to_tool_list()

# --- Agent Definitions ---
agent = FunctionAgent(
    name="Search Tools",
    description=(
        "Performs web searches using DuckDuckGo, Wikipedia and retrieves YouTube video "
        "transcripts to deliver comprehensive and accurate responses."
    ),
    tools=tool_search_duckduckgo+[load_video_transcript_tool,wiki_retriever_tool],
    llm=llm_model,
    system_prompt=SYS_PROMPT,

)

# --- Workflow Setup ---
workflow = AgentWorkflow(
    agents=[agent],
    root_agent="Search Tools",
)

# --- Main Execution ---
async def main():
    response = await workflow.run('How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.')
    print(str(response))

if __name__ == "__main__":
    asyncio.run(main())