ml6team · 22LG23 · Mar 13, 2026 · Mar 13, 2026 · Mar 14, 2026
diff --git a/my_agent/.local_env b/my_agent/.local_env
diff --git a/my_agent/agent.py b/my_agent/agent.py
@@ -4,15 +4,55 @@
 """
 
 from google.adk.agents import llm_agent
+from my_agent.tools import calculator, read_pdf, web_search, fetch_webpage, read_image
 
 root_agent = llm_agent.Agent(
-    model="gemini-2.5-flash-lite",
+    model="gemini-2.5-pro",
     name="agent",
-    description="A helpful assistant.",
+    description="A financial analyst AI that reads earnings reports, searches for market news, and computes financial metrics.",
     instruction=(
-        "You are a helpful assistant that answers "
-        "questions directly and concisely."
+        "You are a senior financial analyst AI. Your job is to analyze a company's "
+        "performance and deliver a clear investment recommendation.\n\n"
+        "CRITICAL: You HAVE full access to the internet and PDF files through your tools. "
+        "NEVER say you cannot access the web, URLs, or PDFs. ALWAYS call the appropriate tool. "
+        "If a user asks for financial analysis, IMMEDIATELY start calling tools — do not hesitate.\n\n"
+        "WORKFLOW — follow these steps in order:\n"
+        "1. **Search first**: Use web_search to find the company's latest earnings report, press releases, and news.\n"
+        "2. **Read the earnings PDF**: Look for PDF links in search results. Use read_pdf with the PDF URL to extract the data. "
+        "If the user provides a URL or file path, use read_pdf or fetch_webpage on it directly.\n"
+        "3. **Get market context**: Use web_search again for recent analyst opinions and market reactions.\n"
+        "4. **Drill into sources**: Use fetch_webpage to read the most relevant search result pages for deeper context.\n"
+        "5. **Compute metrics**: Use calculator to compute key financial ratios:\n"
+        "   - Revenue growth: calculator('pct_change', old_revenue, new_revenue)\n"
+        "   - Profit margin: calculator('margin', net_income, revenue)\n"
+        "   - EPS change: calculator('pct_change', old_eps, new_eps)\n"
+        "   - Any other relevant ratios\n"
+        "6. **Synthesize**: Combine all data into a structured report.\n\n"
+        "TOOLS:\n"
+        "- web_search(query): Search the web. USE THIS FIRST to find earnings reports and news. Always call this — never refuse.\n"
+        "- read_pdf(file_path): Extract text from a PDF. Works with local paths AND URLs (http/https). "
+        "If you find a PDF URL in search results, pass it directly to read_pdf.\n"
+        "- fetch_webpage(url, question): Read an HTML page or PDF URL. Use after web_search to read result pages.\n"
+        "- calculator(operation, a, b): Math and financial ratios.\n"
+        "  Operations: +, -, *, /, **, pct_change, margin, ratio.\n"
+        "- read_image(file_path, question): Analyze an image file.\n\n"
+        "OUTPUT FORMAT for full company analysis:\n"
+        "## Company Overview\n"
+        "Brief summary from the earnings report.\n"
+        "## Key Financial Metrics\n"
+        "Table of computed ratios with values.\n"
+        "## Market Sentiment\n"
+        "Summary of recent news and analyst reactions.\n"
+        "## Investment Recommendation\n"
+        "BUY / HOLD / SELL with clear reasoning.\n\n"
+        "RULES:\n"
+        "- NEVER say 'I cannot access the web' or 'I am unable to fetch'. You CAN. Use your tools.\n"
+        "- If no PDF path/URL is given, use web_search to FIND one, then use read_pdf on it.\n"
+        "- Always compute ratios with calculator — never do mental math.\n"
+        "- If a tool call fails, retry with a different query or URL.\n"
+        "- For simple questions not related to finance, answer directly and concisely.\n"
+        "- Give only the final answer for simple factual questions."
     ),
-    tools=[],
+    tools=[calculator, read_pdf, web_search, fetch_webpage, read_image],
     sub_agents=[],
 )
diff --git a/my_agent/tools/__init__.py b/my_agent/tools/__init__.py
@@ -1 +1,6 @@
-__all__ = []
+from .calculator import calculator
+from .read_pdf import read_pdf
+from .web_search import web_search, fetch_webpage
+from .read_image import read_image
+
+__all__ = ["calculator", "read_pdf", "web_search", "fetch_webpage", "read_image"]
diff --git a/my_agent/tools/calculator.py b/my_agent/tools/calculator.py
@@ -1,3 +1,35 @@
 def calculator(operation: str, a: float, b: float) -> str:
-    """A calculator tool that performs basic arithmetic."""
-    pass
+    """Perform a math or financial operation on two numbers.
+
+    Args:
+        operation: One of '+', '-', '*', '/', '**', 'pct_change', 'margin', 'ratio'.
+            pct_change: percentage change from a to b → ((b - a) / a) * 100
+            margin: margin of a over b → (a / b) * 100 (e.g. net income / revenue)
+            ratio: simple ratio a / b (e.g. debt-to-equity)
+        a: First number.
+        b: Second number.
+
+    Returns:
+        The result as a string.
+    """
+    ops = {
+        '+': lambda: a + b,
+        '-': lambda: a - b,
+        '*': lambda: a * b,
+        '**': lambda: a ** b,
+    }
+    if operation in ops:
+        return str(ops[operation]())
+    if operation in ('/', 'ratio'):
+        if b == 0:
+            return "Cannot divide by zero"
+        return str(a / b)
+    if operation == 'pct_change':
+        if a == 0:
+            return "Cannot compute pct_change from zero base"
+        return f"{((b - a) / a) * 100:.2f}%"
+    if operation == 'margin':
+        if b == 0:
+            return "Cannot compute margin with zero denominator"
+        return f"{(a / b) * 100:.2f}%"
+    return f"Unknown operation: {operation}"
diff --git a/my_agent/tools/read_image.py b/my_agent/tools/read_image.py
@@ -0,0 +1,31 @@
+import os
+import pathlib
+
+from google import genai
+
+
+def read_image(file_path: str, question: str) -> str:
+    """Analyze an image file and answer a question about it.
+
+    Use this tool when a question references an image file (PNG, JPG, etc.).
+    Pass the file path and the full question so the image can be analyzed.
+
+    Args:
+        file_path: Path to the image file to analyze.
+        question: The question to answer about the image.
+
+    Returns:
+        A detailed analysis of the image relevant to the question.
+    """
+    api_key = os.getenv("GOOGLE_API_KEY")
+    client = genai.Client(api_key=api_key)
+    image_path = pathlib.Path(file_path)
+    uploaded = client.files.upload(file=image_path)
+    response = client.models.generate_content(
+        model="gemini-2.5-flash",
+        contents=[
+            uploaded,
+            question + " Be extremely detailed and precise in your analysis.",
+        ],
+    )
+    return response.text
diff --git a/my_agent/tools/read_pdf.py b/my_agent/tools/read_pdf.py
@@ -0,0 +1,30 @@
+from urllib.request import Request, urlopen
+
+import pymupdf
+
+
+def read_pdf(file_path: str) -> str:
+    """Read and extract all text content from a PDF file or PDF URL.
+
+    Use this tool when a question references a PDF file, attachment, or PDF URL.
+    Accepts both local file paths and http/https URLs pointing to PDF files.
+
+    Args:
+        file_path: Local path or URL to the PDF.
+
+    Returns:
+        The full text content of the PDF.
+    """
+    path = file_path.strip()
+    if path.startswith(("http://", "https://")):
+        req = Request(path, headers={
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/126.0.0.0",
+        })
+        with urlopen(req, timeout=20) as resp:
+            data = resp.read()
+        doc = pymupdf.open(stream=data, filetype="pdf")
+    else:
+        doc = pymupdf.open(path)
+    text = "\n".join(page.get_text() for page in doc)
+    doc.close()
+    return text
diff --git a/my_agent/tools/web_search.py b/my_agent/tools/web_search.py
@@ -0,0 +1,203 @@
+from urllib.parse import urlparse
+from urllib.request import Request, urlopen
+
+import pymupdf
+from bs4 import BeautifulSoup
+from ddgs import DDGS
+
+
+_STOPWORDS = {
+    "the",
+    "and",
+    "for",
+    "with",
+    "from",
+    "that",
+    "this",
+    "what",
+    "when",
+    "where",
+    "which",
+    "have",
+    "has",
+    "had",
+    "into",
+    "also",
+    "just",
+    "give",
+    "name",
+    "path",
+    "only",
+    "using",
+    "received",
+}
+
+
+def web_search(query: str, max_results: int = 8) -> list[dict]:
+    """Search the web using DuckDuckGo and return results.
+
+    Use this tool when a question requires up-to-date information,
+    real-world facts, or anything not in your training data.
+
+    Args:
+        query: The search query string.
+        max_results: Maximum number of results to return.
+
+    Returns:
+        A list of results, each with 'title', 'url', and 'snippet'.
+    """
+    query = (query or "").strip()
+    if not query:
+        return []
+
+    safe_limit = min(max(max_results, 1), 15)
+    try:
+        with DDGS() as ddgs:
+            results = ddgs.text(query, max_results=safe_limit)
+            return [
+                {
+                    "title": item.get("title", ""),
+                    "url": item.get("href", ""),
+                    "snippet": item.get("body", ""),
+                }
+                for item in results
+                if item.get("href")
+            ]
+    except Exception as exc:
+        return [
+            {
+                "title": "web_search_error",
+                "url": "",
+                "snippet": f"DuckDuckGo search failed: {exc}",
+            }
+        ]
+
+
+def _extract_pdf_text(content: bytes, max_chars: int) -> str:
+    doc = pymupdf.open(stream=content, filetype="pdf")
+    try:
+        parts = []
+        for i, page in enumerate(doc):
+            parts.append(f"[Page {i + 1}]\n{page.get_text()}")
+            if sum(len(part) for part in parts) >= max_chars:
+                break
+        return "\n\n".join(parts)[:max_chars]
+    finally:
+        doc.close()
+
+
+def _extract_html_text(html: str, max_chars: int, fragment: str | None = None) -> str:
+    soup = BeautifulSoup(html, "html.parser")
+    for tag in soup(["script", "style", "nav", "footer", "header", "noscript", "form", "svg", "aside"]):
+        tag.decompose()
+
+    text = ""
+    if fragment:
+        anchor = soup.find(id=fragment)
+        if anchor:
+            parts = [f"[Anchored section: #{fragment}]", anchor.get_text(separator=" ", strip=True)]
+            for sibling in anchor.find_all_next():
+                if sibling is anchor:
+                    continue
+                if sibling.name in {"h1", "h2"}:
+                    break
+                chunk = sibling.get_text(separator=" ", strip=True)
+                if chunk:
+                    parts.append(chunk)
+                if sum(len(part) for part in parts) >= max_chars:
+                    break
+            section_text = "\n".join(part for part in parts if part)
+            if section_text.strip():
+                return section_text[:max_chars]
+
+    lines = [line.strip() for line in soup.get_text(separator="\n").splitlines() if line.strip()]
+    text += "\n".join(lines)
+    return text[:max_chars]
+
+
+def _filter_relevant_text(text: str, question: str, max_chars: int) -> str:
+    keywords = {
+        token.lower().strip(".,:;!?()[]{}\"'`")
+        for token in question.split()
+        if len(token.strip(".,:;!?()[]{}\"'`")) >= 4
+    }
+    keywords -= _STOPWORDS
+    if not keywords:
+        return text[:max_chars]
+
+    lines = [line for line in text.splitlines() if line.strip()]
+    if not lines:
+        return text[:max_chars]
+
+    scored = []
+    for i, line in enumerate(lines):
+        hay = line.lower()
+        score = sum(1 for kw in keywords if kw in hay)
+        if score > 0:
+            scored.append((score, i))
+
+    if not scored:
+        return text[:max_chars]
+
+    scored.sort(reverse=True)
+    selected = set()
+    for _, idx in scored[:12]:
+        selected.add(idx)
+        if idx - 1 >= 0:
+            selected.add(idx - 1)
+        if idx + 1 < len(lines):
+            selected.add(idx + 1)
+
+    filtered = "\n".join(lines[i] for i in sorted(selected))
+    return filtered[:max_chars]
+
+
+def fetch_webpage(url: str, question: str = "", max_chars: int = 20000) -> str:
+    """Fetch a webpage and return its text content.
+
+    Use this tool to read the full content of a URL. It supports HTML pages
+    and direct PDF URLs.
+
+    Args:
+        url: The URL of the webpage to fetch.
+        question: Optional original user question to focus extraction.
+        max_chars: Maximum amount of extracted text to return.
+
+    Returns:
+        Extracted text content of the page.
+    """
+    parsed = urlparse((url or "").strip())
+    if parsed.scheme not in {"http", "https"}:
+        return "fetch_webpage_error: URL must start with http:// or https://"
+
+    safe_limit = min(max(max_chars, 1000), 50000)
+    headers = {
+        "User-Agent": (
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/126.0.0.0 Safari/537.36"
+        ),
+        "Accept": "text/html,application/pdf;q=0.9,*/*;q=0.8",
+    }
+
+    try:
+        req = Request(url, headers=headers)
+        with urlopen(req, timeout=15) as response:
+            content_type = response.headers.get("Content-Type", "").lower()
+            payload = response.read()
+
+        if "application/pdf" in content_type or parsed.path.lower().endswith(".pdf"):
+            extracted = _extract_pdf_text(payload, safe_limit)
+        else:
+            html = payload.decode("utf-8", errors="ignore")
+            extracted = _extract_html_text(html, safe_limit, parsed.fragment or None)
+
+        if question.strip():
+            extracted = _filter_relevant_text(extracted, question, safe_limit)
+
+        if not extracted.strip():
+            return "fetch_webpage_error: page was fetched but no readable text was extracted"
+
+        return extracted
+    except Exception as exc:
+        return f"fetch_webpage_error: {exc}"
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,7 +11,7 @@ dependencies = [
     "seaborn",
     "colorama>=0.4.6",
     "pyfiglet>=1.0.4",
-    "PyMuPDF",
+    "pymupdf",
     "beautifulsoup4>=4.14.3",
     "ddgs",
 ]