Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions my_agent/.local_env

This file was deleted.

50 changes: 45 additions & 5 deletions my_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,55 @@
"""

from google.adk.agents import llm_agent
from my_agent.tools import calculator, read_pdf, web_search, fetch_webpage, read_image

root_agent = llm_agent.Agent(
model="gemini-2.5-flash-lite",
model="gemini-2.5-pro",
name="agent",
description="A helpful assistant.",
description="A financial analyst AI that reads earnings reports, searches for market news, and computes financial metrics.",
instruction=(
"You are a helpful assistant that answers "
"questions directly and concisely."
"You are a senior financial analyst AI. Your job is to analyze a company's "
"performance and deliver a clear investment recommendation.\n\n"
"CRITICAL: You HAVE full access to the internet and PDF files through your tools. "
"NEVER say you cannot access the web, URLs, or PDFs. ALWAYS call the appropriate tool. "
"If a user asks for financial analysis, IMMEDIATELY start calling tools — do not hesitate.\n\n"
"WORKFLOW — follow these steps in order:\n"
"1. **Search first**: Use web_search to find the company's latest earnings report, press releases, and news.\n"
"2. **Read the earnings PDF**: Look for PDF links in search results. Use read_pdf with the PDF URL to extract the data. "
"If the user provides a URL or file path, use read_pdf or fetch_webpage on it directly.\n"
"3. **Get market context**: Use web_search again for recent analyst opinions and market reactions.\n"
"4. **Drill into sources**: Use fetch_webpage to read the most relevant search result pages for deeper context.\n"
"5. **Compute metrics**: Use calculator to compute key financial ratios:\n"
" - Revenue growth: calculator('pct_change', old_revenue, new_revenue)\n"
" - Profit margin: calculator('margin', net_income, revenue)\n"
" - EPS change: calculator('pct_change', old_eps, new_eps)\n"
" - Any other relevant ratios\n"
"6. **Synthesize**: Combine all data into a structured report.\n\n"
"TOOLS:\n"
"- web_search(query): Search the web. USE THIS FIRST to find earnings reports and news. Always call this — never refuse.\n"
"- read_pdf(file_path): Extract text from a PDF. Works with local paths AND URLs (http/https). "
"If you find a PDF URL in search results, pass it directly to read_pdf.\n"
"- fetch_webpage(url, question): Read an HTML page or PDF URL. Use after web_search to read result pages.\n"
"- calculator(operation, a, b): Math and financial ratios.\n"
" Operations: +, -, *, /, **, pct_change, margin, ratio.\n"
"- read_image(file_path, question): Analyze an image file.\n\n"
"OUTPUT FORMAT for full company analysis:\n"
"## Company Overview\n"
"Brief summary from the earnings report.\n"
"## Key Financial Metrics\n"
"Table of computed ratios with values.\n"
"## Market Sentiment\n"
"Summary of recent news and analyst reactions.\n"
"## Investment Recommendation\n"
"BUY / HOLD / SELL with clear reasoning.\n\n"
"RULES:\n"
"- NEVER say 'I cannot access the web' or 'I am unable to fetch'. You CAN. Use your tools.\n"
"- If no PDF path/URL is given, use web_search to FIND one, then use read_pdf on it.\n"
"- Always compute ratios with calculator — never do mental math.\n"
"- If a tool call fails, retry with a different query or URL.\n"
"- For simple questions not related to finance, answer directly and concisely.\n"
"- Give only the final answer for simple factual questions."
),
tools=[],
tools=[calculator, read_pdf, web_search, fetch_webpage, read_image],
sub_agents=[],
)
7 changes: 6 additions & 1 deletion my_agent/tools/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
__all__ = []
from .calculator import calculator
from .read_pdf import read_pdf
from .web_search import web_search, fetch_webpage
from .read_image import read_image

__all__ = ["calculator", "read_pdf", "web_search", "fetch_webpage", "read_image"]
36 changes: 34 additions & 2 deletions my_agent/tools/calculator.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,35 @@
def calculator(operation: str, a: float, b: float) -> str:
"""A calculator tool that performs basic arithmetic."""
pass
"""Perform a math or financial operation on two numbers.

Args:
operation: One of '+', '-', '*', '/', '**', 'pct_change', 'margin', 'ratio'.
pct_change: percentage change from a to b → ((b - a) / a) * 100
margin: margin of a over b → (a / b) * 100 (e.g. net income / revenue)
ratio: simple ratio a / b (e.g. debt-to-equity)
a: First number.
b: Second number.

Returns:
The result as a string.
"""
ops = {
'+': lambda: a + b,
'-': lambda: a - b,
'*': lambda: a * b,
'**': lambda: a ** b,
}
if operation in ops:
return str(ops[operation]())
if operation in ('/', 'ratio'):
if b == 0:
return "Cannot divide by zero"
return str(a / b)
if operation == 'pct_change':
if a == 0:
return "Cannot compute pct_change from zero base"
return f"{((b - a) / a) * 100:.2f}%"
if operation == 'margin':
if b == 0:
return "Cannot compute margin with zero denominator"
return f"{(a / b) * 100:.2f}%"
return f"Unknown operation: {operation}"
31 changes: 31 additions & 0 deletions my_agent/tools/read_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import os
import pathlib

from google import genai


def read_image(file_path: str, question: str) -> str:
"""Analyze an image file and answer a question about it.

Use this tool when a question references an image file (PNG, JPG, etc.).
Pass the file path and the full question so the image can be analyzed.

Args:
file_path: Path to the image file to analyze.
question: The question to answer about the image.

Returns:
A detailed analysis of the image relevant to the question.
"""
api_key = os.getenv("GOOGLE_API_KEY")
client = genai.Client(api_key=api_key)
image_path = pathlib.Path(file_path)
uploaded = client.files.upload(file=image_path)
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=[
uploaded,
question + " Be extremely detailed and precise in your analysis.",
],
)
return response.text
30 changes: 30 additions & 0 deletions my_agent/tools/read_pdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from urllib.request import Request, urlopen

import pymupdf


def read_pdf(file_path: str) -> str:
"""Read and extract all text content from a PDF file or PDF URL.

Use this tool when a question references a PDF file, attachment, or PDF URL.
Accepts both local file paths and http/https URLs pointing to PDF files.

Args:
file_path: Local path or URL to the PDF.

Returns:
The full text content of the PDF.
"""
path = file_path.strip()
if path.startswith(("http://", "https://")):
req = Request(path, headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/126.0.0.0",
})
with urlopen(req, timeout=20) as resp:
data = resp.read()
doc = pymupdf.open(stream=data, filetype="pdf")
else:
doc = pymupdf.open(path)
text = "\n".join(page.get_text() for page in doc)
doc.close()
return text
203 changes: 203 additions & 0 deletions my_agent/tools/web_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
from urllib.parse import urlparse
from urllib.request import Request, urlopen

import pymupdf
from bs4 import BeautifulSoup
from ddgs import DDGS


_STOPWORDS = {
"the",
"and",
"for",
"with",
"from",
"that",
"this",
"what",
"when",
"where",
"which",
"have",
"has",
"had",
"into",
"also",
"just",
"give",
"name",
"path",
"only",
"using",
"received",
}


def web_search(query: str, max_results: int = 8) -> list[dict]:
"""Search the web using DuckDuckGo and return results.

Use this tool when a question requires up-to-date information,
real-world facts, or anything not in your training data.

Args:
query: The search query string.
max_results: Maximum number of results to return.

Returns:
A list of results, each with 'title', 'url', and 'snippet'.
"""
query = (query or "").strip()
if not query:
return []

safe_limit = min(max(max_results, 1), 15)
try:
with DDGS() as ddgs:
results = ddgs.text(query, max_results=safe_limit)
return [
{
"title": item.get("title", ""),
"url": item.get("href", ""),
"snippet": item.get("body", ""),
}
for item in results
if item.get("href")
]
except Exception as exc:
return [
{
"title": "web_search_error",
"url": "",
"snippet": f"DuckDuckGo search failed: {exc}",
}
]


def _extract_pdf_text(content: bytes, max_chars: int) -> str:
doc = pymupdf.open(stream=content, filetype="pdf")
try:
parts = []
for i, page in enumerate(doc):
parts.append(f"[Page {i + 1}]\n{page.get_text()}")
if sum(len(part) for part in parts) >= max_chars:
break
return "\n\n".join(parts)[:max_chars]
finally:
doc.close()


def _extract_html_text(html: str, max_chars: int, fragment: str | None = None) -> str:
soup = BeautifulSoup(html, "html.parser")
for tag in soup(["script", "style", "nav", "footer", "header", "noscript", "form", "svg", "aside"]):
tag.decompose()

text = ""
if fragment:
anchor = soup.find(id=fragment)
if anchor:
parts = [f"[Anchored section: #{fragment}]", anchor.get_text(separator=" ", strip=True)]
for sibling in anchor.find_all_next():
if sibling is anchor:
continue
if sibling.name in {"h1", "h2"}:
break
chunk = sibling.get_text(separator=" ", strip=True)
if chunk:
parts.append(chunk)
if sum(len(part) for part in parts) >= max_chars:
break
section_text = "\n".join(part for part in parts if part)
if section_text.strip():
return section_text[:max_chars]

lines = [line.strip() for line in soup.get_text(separator="\n").splitlines() if line.strip()]
text += "\n".join(lines)
return text[:max_chars]


def _filter_relevant_text(text: str, question: str, max_chars: int) -> str:
keywords = {
token.lower().strip(".,:;!?()[]{}\"'`")
for token in question.split()
if len(token.strip(".,:;!?()[]{}\"'`")) >= 4
}
keywords -= _STOPWORDS
if not keywords:
return text[:max_chars]

lines = [line for line in text.splitlines() if line.strip()]
if not lines:
return text[:max_chars]

scored = []
for i, line in enumerate(lines):
hay = line.lower()
score = sum(1 for kw in keywords if kw in hay)
if score > 0:
scored.append((score, i))

if not scored:
return text[:max_chars]

scored.sort(reverse=True)
selected = set()
for _, idx in scored[:12]:
selected.add(idx)
if idx - 1 >= 0:
selected.add(idx - 1)
if idx + 1 < len(lines):
selected.add(idx + 1)

filtered = "\n".join(lines[i] for i in sorted(selected))
return filtered[:max_chars]


def fetch_webpage(url: str, question: str = "", max_chars: int = 20000) -> str:
"""Fetch a webpage and return its text content.

Use this tool to read the full content of a URL. It supports HTML pages
and direct PDF URLs.

Args:
url: The URL of the webpage to fetch.
question: Optional original user question to focus extraction.
max_chars: Maximum amount of extracted text to return.

Returns:
Extracted text content of the page.
"""
parsed = urlparse((url or "").strip())
if parsed.scheme not in {"http", "https"}:
return "fetch_webpage_error: URL must start with http:// or https://"

safe_limit = min(max(max_chars, 1000), 50000)
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/126.0.0.0 Safari/537.36"
),
"Accept": "text/html,application/pdf;q=0.9,*/*;q=0.8",
}

try:
req = Request(url, headers=headers)
with urlopen(req, timeout=15) as response:
content_type = response.headers.get("Content-Type", "").lower()
payload = response.read()

if "application/pdf" in content_type or parsed.path.lower().endswith(".pdf"):
extracted = _extract_pdf_text(payload, safe_limit)
else:
html = payload.decode("utf-8", errors="ignore")
extracted = _extract_html_text(html, safe_limit, parsed.fragment or None)

if question.strip():
extracted = _filter_relevant_text(extracted, question, safe_limit)

if not extracted.strip():
return "fetch_webpage_error: page was fetched but no readable text was extracted"

return extracted
except Exception as exc:
return f"fetch_webpage_error: {exc}"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ dependencies = [
"seaborn",
"colorama>=0.4.6",
"pyfiglet>=1.0.4",
"PyMuPDF",
"pymupdf",
"beautifulsoup4>=4.14.3",
"ddgs",
]
Loading