From 21d37f83439e899a133cb89e30f88ca85b246456 Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Mon, 11 May 2026 18:30:00 +0800 Subject: [PATCH 1/2] Add AG2 TinyFish integration Add an AG2 TinyFishToolkit with SDK-backed tools for synchronous and asynchronous web automation, run lookup and listing, web search, page fetch, and browser session creation. Include documentation for the AG2 tool surface and focused unit coverage for schemas, execution paths, integration tagging, and error handling. --- ag2/README.md | 132 +++++ .../tools/experimental/tinyfish/__init__.py | 7 + .../tools/experimental/tinyfish/tinyfish.py | 414 +++++++++++++++ .../experimental/tinyfish/test_tinyfish.py | 477 ++++++++++++++++++ 4 files changed, 1030 insertions(+) create mode 100644 ag2/README.md create mode 100644 ag2/autogen/tools/experimental/tinyfish/__init__.py create mode 100644 ag2/autogen/tools/experimental/tinyfish/tinyfish.py create mode 100644 ag2/test/tools/experimental/tinyfish/test_tinyfish.py diff --git a/ag2/README.md b/ag2/README.md new file mode 100644 index 0000000..7f56839 --- /dev/null +++ b/ag2/README.md @@ -0,0 +1,132 @@ +# TinyFish Web Agent Tools for AG2 + +[TinyFish Web Agent](https://docs.tinyfish.ai) toolkit for [AG2](https://github.com/ag2ai/ag2) — automate any website using natural language. + +This toolkit lets AG2 agents search the web, fetch page contents, open remote browser sessions, navigate real websites, extract structured data, fill forms, click buttons, and execute multi-step workflows described in plain English. + +## Installation + +```bash +pip install ag2[tinyfish] +``` + +## Setup + +Get your API key at [agent.tinyfish.ai/api-keys](https://agent.tinyfish.ai/api-keys) and set it as an environment variable: + +```bash +export TINYFISH_API_KEY=sk-tinyfish-... +``` + +## Quick Start + +```python +from autogen import ConversableAgent, LLMConfig +from autogen.tools.experimental import TinyFishToolkit + +llm_config = LLMConfig(config_list=[{"model": "gpt-4o", "api_key": "..."}]) + +toolkit = TinyFishToolkit() + +assistant = ConversableAgent( + name="assistant", + llm_config=llm_config, + system_message="You can browse the web using the TinyFish tools.", +) + +user_proxy = ConversableAgent( + name="user_proxy", + human_input_mode="NEVER", + llm_config=False, +) + +# Register all tools at once +toolkit.register_for_llm(assistant) +toolkit.register_for_execution(user_proxy) + +user_proxy.initiate_chat( + assistant, + message="Extract the top 3 product names and prices from https://scrapeme.live/shop/", + max_turns=4, +) +``` + +## Tools + +The toolkit provides 7 tools: + +| Tool | Endpoint | Description | +|------|----------|-------------| +| `tinyfish_web_agent` | `POST /v1/automation/run` | Run automation synchronously, wait for result | +| `tinyfish_web_agent_async` | `POST /v1/automation/run-async` | Queue automation, return run ID immediately | +| `tinyfish_get_run` | `GET /v1/runs/{id}` | Check status and get result of a run | +| `tinyfish_list_runs` | `GET /v1/runs` | List recent runs with optional filtering | +| `tinyfish_search` | `POST /v1/search` | Search the web and return ranked results | +| `tinyfish_fetch` | `POST /v1/fetch` | Fetch readable page contents from one or more URLs | +| `tinyfish_create_browser_session` | `POST /v1/browser/sessions` | Create a remote browser session | + +### When to Use Which + +- **`tinyfish_search`** — Discover relevant URLs before deciding which pages to inspect. +- **`tinyfish_fetch`** — Extract readable content from known URLs or search results. +- **`tinyfish_web_agent`** — Quick tasks under 60 seconds. Simplest option. +- **`tinyfish_web_agent_async`** + **`tinyfish_get_run`** — Long-running tasks, batch processing, or when you want the agent to do other work while waiting. +- **`tinyfish_list_runs`** — Review past automations, check what's running. +- **`tinyfish_create_browser_session`** — Start a browser session when direct browser control is needed. + +## Configuration + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `api_key` | `str` | `None` | TinyFish API key. Reads from `TINYFISH_API_KEY` env var if not provided. | +| `browser_profile` | `str` | `"lite"` | `"lite"` for standard sites, `"stealth"` for bot-protected sites. | +| `proxy_country` | `str` | `None` | Route through a country: `US`, `GB`, `CA`, `DE`, `FR`, `JP`, `AU`. | + +### Stealth Mode + Proxy + +For sites with Cloudflare, CAPTCHAs, or other bot protection: + +```python +toolkit = TinyFishToolkit(browser_profile="stealth", proxy_country="US") +``` + +## Writing Good Goals + +```python +# Specific extraction +"Extract all product names, prices, and stock status. Return as JSON array." + +# Multi-step workflow +"1. Click 'Load More' 3 times 2. Extract all product cards 3. Return as JSON" + +# Form filling +"Fill the contact form with name 'John Doe' and email 'john@example.com', then click Submit" +``` + +## Example Prompts + +```text +Search for the TinyFish Web Agent docs and fetch the most relevant result. + +Extract the top 3 product names and prices from https://scrapeme.live/shop/ + +Scrape the first 5 headlines from https://news.ycombinator.com + +Go to https://books.toscrape.com and extract all books with their titles and prices +``` + +## Contributing to AG2 + +This tool is intended to be contributed to the AG2 repository at [`autogen/tools/experimental/tinyfish/`](https://github.com/ag2ai/ag2). The dependency would be added to AG2's `pyproject.toml`: + +```toml +[project.optional-dependencies] +tinyfish = ["tinyfish>=0.2.5,<1"] +``` + +## Resources + +- [TinyFish Docs](https://docs.tinyfish.ai) +- [AG2 Docs](https://docs.ag2.ai) +- [AG2 Tool Contributor Guide](https://docs.ag2.ai/docs/contributor-guide/building/creating-a-tool) +- [API Keys](https://agent.tinyfish.ai/api-keys) diff --git a/ag2/autogen/tools/experimental/tinyfish/__init__.py b/ag2/autogen/tools/experimental/tinyfish/__init__.py new file mode 100644 index 0000000..2c1fc12 --- /dev/null +++ b/ag2/autogen/tools/experimental/tinyfish/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +from .tinyfish import TinyFishToolkit + +__all__ = ["TinyFishToolkit"] diff --git a/ag2/autogen/tools/experimental/tinyfish/tinyfish.py b/ag2/autogen/tools/experimental/tinyfish/tinyfish.py new file mode 100644 index 0000000..e48745f --- /dev/null +++ b/ag2/autogen/tools/experimental/tinyfish/tinyfish.py @@ -0,0 +1,414 @@ +# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +import json +import logging +import os +from typing import Annotated, Any, Optional + +from autogen.doc_utils import export_module +from autogen.import_utils import optional_import_block, require_optional_import +from autogen.tools import Tool + +logger = logging.getLogger(__name__) + +with optional_import_block(): + from tinyfish import ( + BrowserProfile, + ProxyConfig, + ProxyCountryCode, + RunStatus, + SortDirection, + TinyFish, + ) + +__all__ = ["TinyFishToolkit"] + +_INTEGRATION_NAME = "ag2" + + +def _ensure_integration_tag() -> None: + """Set the default TinyFish integration tag for request attribution.""" + os.environ.setdefault("TF_API_INTEGRATION", _INTEGRATION_NAME) + + +def _json_dumps(value: Any) -> str: + """Serialize SDK responses and plain Python values for agent-friendly output.""" + if hasattr(value, "model_dump"): + return json.dumps(value.model_dump(mode="json")) + + if isinstance(value, dict): + return json.dumps(value) + + if isinstance(value, list): + return json.dumps(value) + + return json.dumps(value, default=str) + + +@require_optional_import(["tinyfish"], "tinyfish") +@export_module("autogen.tools.experimental") +class TinyFishToolkit: + """A toolkit providing multiple TinyFish Web Agent tools for AG2 agents. + + TinyFish Web Agent navigates real websites, extracts structured data, + fills forms, clicks buttons, and executes multi-step workflows — all + described in plain English. No CSS selectors or XPath needed. + + This toolkit provides seven tools: + + - **tinyfish_web_agent**: Run a web automation synchronously and return results. + - **tinyfish_web_agent_async**: Queue an automation and return a run ID immediately. + - **tinyfish_get_run**: Check the status and result of a run by ID. + - **tinyfish_list_runs**: List recent automation runs with optional filtering. + - **tinyfish_search**: Search the web and return ranked results. + - **tinyfish_fetch**: Fetch readable page contents from one or more URLs. + - **tinyfish_create_browser_session**: Create a remote browser session. + + Install the optional dependency with: ``pip install ag2[tinyfish]`` + + Args: + api_key: TinyFish API key. If not provided, reads from the + ``TINYFISH_API_KEY`` environment variable. + browser_profile: Browser execution mode. ``"lite"`` (default) is + fast for standard sites. ``"stealth"`` enables anti-detection + for sites with bot protection (Cloudflare, CAPTCHAs, etc.). + proxy_country: Optional proxy country code (``"US"``, ``"GB"``, + ``"CA"``, ``"DE"``, ``"FR"``, ``"JP"``, ``"AU"``). When set, + requests are routed through the specified country. + + Example: + .. code-block:: python + + from autogen import ConversableAgent, LLMConfig + from autogen.tools.experimental import TinyFishToolkit + + toolkit = TinyFishToolkit() + + assistant = ConversableAgent( + name="assistant", + llm_config=llm_config, + ) + user_proxy = ConversableAgent( + name="user_proxy", + human_input_mode="NEVER", + ) + + toolkit.register_for_llm(assistant) + toolkit.register_for_execution(user_proxy) + + user_proxy.initiate_chat( + assistant, + message="Extract product names and prices from https://scrapeme.live/shop/", + ) + """ + + def __init__( + self, + api_key: Optional[str] = None, + browser_profile: str = "lite", + proxy_country: Optional[str] = None, + ) -> None: + try: + bp = BrowserProfile(browser_profile) + except ValueError: + raise ValueError( + f"Invalid browser_profile '{browser_profile}'. Use 'lite' or 'stealth'." + ) + + proxy_config = None + if proxy_country: + try: + proxy_config = ProxyConfig( + enabled=True, + country_code=ProxyCountryCode(proxy_country), + ) + except ValueError: + raise ValueError( + f"Invalid proxy_country '{proxy_country}'. " + f"Use one of: US, GB, CA, DE, FR, JP, AU." + ) + + _ensure_integration_tag() + client = TinyFish(api_key=api_key) if api_key else TinyFish() + + # --- Tool: Run (sync) --- + + def run_web_automation( + url: Annotated[ + str, + "The target website URL to automate (must start with http:// or https://).", + ], + goal: Annotated[ + str, + "Natural language description of what to accomplish on the website. " + "Be specific: e.g. 'Extract all product names and prices as JSON' or " + "'Fill the contact form with name John Doe and email john@example.com, then click Submit'.", + ], + ) -> str: + """Run a web automation synchronously and return the result.""" + try: + kwargs: dict = {"url": url, "goal": goal, "browser_profile": bp} + if proxy_config: + kwargs["proxy_config"] = proxy_config + + result = client.agent.run(**kwargs) + + if result.status == RunStatus.COMPLETED: + if result.result is not None: + return json.dumps(result.result) + return "Automation completed successfully with no result data." + + error_msg = getattr(result, "error", None) or "Unknown error" + return f"Automation failed: {error_msg}" + except Exception as e: + logger.error("Error running web automation", exc_info=True) + return f"Error running web automation: {e}" + + # --- Tool: Queue (async) --- + + def queue_web_automation( + url: Annotated[ + str, + "The target website URL to automate (must start with http:// or https://).", + ], + goal: Annotated[ + str, + "Natural language description of what to accomplish on the website.", + ], + ) -> str: + """Start a web automation asynchronously and return the run ID.""" + try: + kwargs: dict = {"url": url, "goal": goal, "browser_profile": bp} + if proxy_config: + kwargs["proxy_config"] = proxy_config + + result = client.agent.queue(**kwargs) + run_id = result.run_id + return f"Automation queued. run_id: {run_id}. Use tinyfish_get_run to check status and get results." + except Exception as e: + logger.error("Error queuing web automation", exc_info=True) + return f"Error queuing web automation: {e}" + + # --- Tool: Get Run --- + + def get_run( + run_id: Annotated[str, "The unique run ID to look up."], + ) -> str: + """Get the status and result of a TinyFish automation run.""" + try: + run = client.runs.get(run_id) + + parts = [f"Status: {run.status}"] + + streaming_url = getattr(run, "streaming_url", None) or getattr(run, "streamingUrl", None) + if streaming_url: + parts.append(f"Live view: {streaming_url}") + + result = getattr(run, "result", None) + if result is not None: + parts.append(f"Result: {json.dumps(result)}") + + error = getattr(run, "error", None) + if error: + error_msg = error.get("message", str(error)) if isinstance(error, dict) else str(error) + parts.append(f"Error: {error_msg}") + + return "\n".join(parts) + except Exception as e: + logger.error("Error getting run", exc_info=True) + return f"Error getting run: {e}" + + # --- Tool: List Runs --- + + def list_runs( + status: Annotated[ + Optional[str], + "Filter by status: PENDING, RUNNING, COMPLETED, FAILED, or CANCELLED. Leave empty for all.", + ] = None, + limit: Annotated[ + int, + "Maximum number of runs to return (1-100).", + ] = 20, + ) -> str: + """List recent TinyFish automation runs.""" + try: + kwargs: dict = {"limit": max(1, min(limit, 100))} + if status: + kwargs["status"] = RunStatus(status) + + response = client.runs.list(**kwargs) + runs = response.runs if hasattr(response, "runs") else [] + + if not runs: + return "No runs found." + + lines = [] + for run in runs: + run_id = getattr(run, "run_id", "?") + run_status = getattr(run, "status", "?") + run_url = getattr(run, "url", "?") + run_goal = getattr(run, "goal", "") + goal_preview = (run_goal[:60] + "...") if len(run_goal) > 60 else run_goal + lines.append(f"- {run_id} | {run_status} | {run_url} | {goal_preview}") + + return f"Found {len(runs)} runs:\n" + "\n".join(lines) + except Exception as e: + logger.error("Error listing runs", exc_info=True) + return f"Error listing runs: {e}" + + # --- Tool: Search --- + + def search_web( + query: Annotated[str, "The web search query to run."], + location: Annotated[ + Optional[str], + 'Optional search location, such as "United States", "Singapore", or "London".', + ] = None, + language: Annotated[ + Optional[str], + 'Optional language code, such as "en", "es", or "fr".', + ] = None, + ) -> str: + """Search the web with TinyFish and return ranked results.""" + try: + response = client.search.query(query, location=location, language=language) + return _json_dumps(response) + except Exception as e: + logger.error("Error searching web", exc_info=True) + return f"Error searching web: {e}" + + # --- Tool: Fetch --- + + def fetch_web_pages( + urls: Annotated[list[str], "One to ten URLs to fetch and extract content from."], + format: Annotated[ + str, + 'Output format: "markdown", "html", or "json".', + ] = "markdown", + links: Annotated[ + bool, + "Whether to include extracted links in the response.", + ] = False, + image_links: Annotated[ + bool, + "Whether to include extracted image links in the response.", + ] = False, + ) -> str: + """Fetch readable content from web pages.""" + try: + if format not in {"markdown", "html", "json"}: + return 'Invalid format. Use "markdown", "html", or "json".' + + response = client.fetch.get_contents( + urls, + format=format, + links=links, + image_links=image_links, + ) + return _json_dumps(response) + except Exception as e: + logger.error("Error fetching web pages", exc_info=True) + return f"Error fetching web pages: {e}" + + # --- Tool: Create Browser Session --- + + def create_browser_session( + url: Annotated[ + Optional[str], + "Optional URL to open when the remote browser session starts.", + ] = None, + timeout_seconds: Annotated[ + Optional[int], + "Optional inactivity timeout in seconds.", + ] = None, + ) -> str: + """Create a remote browser session and return connection details.""" + try: + response = client.browser.sessions.create(url=url, timeout_seconds=timeout_seconds) + return _json_dumps(response) + except Exception as e: + logger.error("Error creating browser session", exc_info=True) + return f"Error creating browser session: {e}" + + # --- Build Tool instances --- + + self._tools = [ + Tool( + name="tinyfish_web_agent", + description=( + "Automate any website using natural language. Provide a URL and describe " + "what you want to accomplish — extract data, fill forms, click buttons, " + "navigate pages, and more. Waits for completion and returns structured " + "JSON results. Best for tasks that complete in under 60 seconds." + ), + func_or_tool=run_web_automation, + ), + Tool( + name="tinyfish_web_agent_async", + description=( + "Start a web automation asynchronously without waiting for completion. " + "Returns a run_id immediately. Use tinyfish_get_run to poll for status " + "and results. Best for long-running tasks or batch processing." + ), + func_or_tool=queue_web_automation, + ), + Tool( + name="tinyfish_get_run", + description=( + "Get the status and result of a TinyFish automation run by its run_id. " + "Returns status (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) and " + "result data when available. Use after tinyfish_web_agent_async." + ), + func_or_tool=get_run, + ), + Tool( + name="tinyfish_list_runs", + description=( + "List recent TinyFish automation runs. Optionally filter by status. " + "Returns run IDs, statuses, URLs, and goal previews." + ), + func_or_tool=list_runs, + ), + Tool( + name="tinyfish_search", + description=( + "Search the web using TinyFish and return ranked results with titles, " + "URLs, snippets, and metadata. Best when you need to discover relevant pages." + ), + func_or_tool=search_web, + ), + Tool( + name="tinyfish_fetch", + description=( + "Fetch and extract readable content from one or more URLs. Supports markdown, " + "HTML, or JSON output and optional link extraction. Best after search when you " + "need page contents." + ), + func_or_tool=fetch_web_pages, + ), + Tool( + name="tinyfish_create_browser_session", + description=( + "Create a remote browser session and return the session ID, CDP URL, and base URL. " + "Best when a workflow needs direct browser control." + ), + func_or_tool=create_browser_session, + ), + ] + + @property + def tools(self) -> list[Tool]: + """Return all tools in this toolkit.""" + return list(self._tools) + + def register_for_llm(self, agent: "ConversableAgent") -> None: + """Register all tools for LLM recommendation with the given agent.""" + for tool in self._tools: + tool.register_for_llm(agent) + + def register_for_execution(self, agent: "ConversableAgent") -> None: + """Register all tools for execution with the given agent.""" + for tool in self._tools: + tool.register_for_execution(agent) diff --git a/ag2/test/tools/experimental/tinyfish/test_tinyfish.py b/ag2/test/tools/experimental/tinyfish/test_tinyfish.py new file mode 100644 index 0000000..3adbe4a --- /dev/null +++ b/ag2/test/tools/experimental/tinyfish/test_tinyfish.py @@ -0,0 +1,477 @@ +# Copyright (c) 2023 - 2025, AG2ai, Inc., AG2ai open-source projects maintainers and core contributors +# +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + + +def _schema_parameters(tool): + """Return the parameters block from either AG2 schema shape.""" + schema = tool.function_schema + if "function" in schema: + return schema["function"]["parameters"] + return schema["parameters"] + + +@pytest.fixture +def mock_tinyfish(): + """Patch TinyFish client and related imports.""" + with ( + patch("autogen.tools.experimental.tinyfish.tinyfish.TinyFish") as mock_cls, + patch("autogen.tools.experimental.tinyfish.tinyfish.RunStatus") as mock_status, + patch("autogen.tools.experimental.tinyfish.tinyfish.BrowserProfile") as mock_bp, + ): + mock_client = MagicMock() + mock_cls.return_value = mock_client + mock_bp.return_value = "lite" + yield { + "cls": mock_cls, + "client": mock_client, + "RunStatus": mock_status, + } + + +class TestTinyFishToolkitInit: + """Test TinyFishToolkit initialization.""" + + def test_creates_seven_tools(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + assert len(toolkit.tools) == 7 + + def test_tool_names(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + names = [t.name for t in toolkit.tools] + assert "tinyfish_web_agent" in names + assert "tinyfish_web_agent_async" in names + assert "tinyfish_get_run" in names + assert "tinyfish_list_runs" in names + assert "tinyfish_search" in names + assert "tinyfish_fetch" in names + assert "tinyfish_create_browser_session" in names + + def test_custom_api_key(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + TinyFishToolkit(api_key="sk-test-key") + mock_tinyfish["cls"].assert_called_once_with(api_key="sk-test-key") + + def test_default_api_key_from_env(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + TinyFishToolkit() + mock_tinyfish["cls"].assert_called_once_with() + + def test_sets_default_integration_tag(self, mock_tinyfish, monkeypatch): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + monkeypatch.delenv("TF_API_INTEGRATION", raising=False) + + TinyFishToolkit() + + assert os.environ["TF_API_INTEGRATION"] == "ag2" + mock_tinyfish["cls"].assert_called_once_with() + + def test_preserves_existing_integration_tag(self, mock_tinyfish, monkeypatch): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + monkeypatch.setenv("TF_API_INTEGRATION", "custom-tag") + + TinyFishToolkit() + + assert os.environ["TF_API_INTEGRATION"] == "custom-tag" + mock_tinyfish["cls"].assert_called_once_with() + + def test_tools_are_callable(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + for tool in toolkit.tools: + assert callable(tool.func) + + def test_invalid_browser_profile_raises(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_tinyfish["cls"].side_effect = None + with patch( + "autogen.tools.experimental.tinyfish.tinyfish.BrowserProfile", + side_effect=ValueError("bad"), + ): + with pytest.raises(ValueError, match="Invalid browser_profile"): + TinyFishToolkit(browser_profile="invalid_profile") + + def test_invalid_proxy_country_raises(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + with patch( + "autogen.tools.experimental.tinyfish.tinyfish.ProxyCountryCode", + side_effect=ValueError("bad"), + ): + with pytest.raises(ValueError, match="Invalid proxy_country"): + TinyFishToolkit(proxy_country="ZZZ") + + +class TestTinyFishToolSchemas: + """Test tool function schemas.""" + + def test_run_tool_schema(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + run_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent") + params = _schema_parameters(run_tool) + assert "url" in params["properties"] + assert "goal" in params["properties"] + assert "url" in params["required"] + assert "goal" in params["required"] + + def test_async_tool_schema(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + async_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent_async") + params = _schema_parameters(async_tool) + assert "url" in params["properties"] + assert "goal" in params["properties"] + + def test_get_run_tool_schema(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + get_tool = next(t for t in toolkit.tools if t.name == "tinyfish_get_run") + params = _schema_parameters(get_tool) + assert "run_id" in params["properties"] + assert "run_id" in params["required"] + + def test_list_runs_tool_schema(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + list_tool = next(t for t in toolkit.tools if t.name == "tinyfish_list_runs") + params = _schema_parameters(list_tool) + assert "status" in params["properties"] + assert "limit" in params["properties"] + + def test_search_tool_schema(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + search_tool = next(t for t in toolkit.tools if t.name == "tinyfish_search") + params = _schema_parameters(search_tool) + assert "query" in params["properties"] + assert "location" in params["properties"] + assert "language" in params["properties"] + assert "query" in params["required"] + + def test_fetch_tool_schema(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + fetch_tool = next(t for t in toolkit.tools if t.name == "tinyfish_fetch") + params = _schema_parameters(fetch_tool) + assert "urls" in params["properties"] + assert "format" in params["properties"] + assert "links" in params["properties"] + assert "image_links" in params["properties"] + assert "urls" in params["required"] + + def test_create_browser_session_tool_schema(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + browser_tool = next(t for t in toolkit.tools if t.name == "tinyfish_create_browser_session") + params = _schema_parameters(browser_tool) + assert "url" in params["properties"] + assert "timeout_seconds" in params["properties"] + + +class TestRunTool: + """Test the sync run tool execution.""" + + def test_successful_run(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_result = MagicMock() + mock_result.status = mock_tinyfish["RunStatus"].COMPLETED + mock_result.result = {"title": "Example", "price": "$9.99"} + mock_tinyfish["client"].agent.run.return_value = mock_result + + toolkit = TinyFishToolkit() + run_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent") + output = run_tool.func(url="https://example.com", goal="Extract title and price") + + parsed = json.loads(output) + assert parsed["title"] == "Example" + assert parsed["price"] == "$9.99" + + def test_failed_run(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_result = MagicMock() + mock_result.status = MagicMock() # Not COMPLETED + mock_result.error = "Page not found" + mock_tinyfish["client"].agent.run.return_value = mock_result + + toolkit = TinyFishToolkit() + run_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent") + output = run_tool.func(url="https://example.com", goal="Extract data") + + assert "failed" in output.lower() + + def test_exception_returns_error_string(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_tinyfish["client"].agent.run.side_effect = ConnectionError("Network error") + + toolkit = TinyFishToolkit() + run_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent") + output = run_tool.func(url="https://example.com", goal="Extract data") + + assert "Error" in output + assert "Network error" in output + + +class TestQueueTool: + """Test the async queue tool execution.""" + + def test_successful_queue(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_result = MagicMock() + mock_result.run_id = "run_abc123" + mock_tinyfish["client"].agent.queue.return_value = mock_result + + toolkit = TinyFishToolkit() + queue_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent_async") + output = queue_tool.func(url="https://example.com", goal="Extract data") + + assert "run_abc123" in output + assert "tinyfish_get_run" in output + + def test_queue_exception(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_tinyfish["client"].agent.queue.side_effect = Exception("Auth failed") + + toolkit = TinyFishToolkit() + queue_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent_async") + output = queue_tool.func(url="https://example.com", goal="Extract data") + + assert "Error" in output + + +class TestGetRunTool: + """Test the get run tool execution.""" + + def test_completed_run(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_run = MagicMock() + mock_run.status = "COMPLETED" + mock_run.result = {"data": "test"} + mock_run.error = None + mock_tinyfish["client"].runs.get.return_value = mock_run + + toolkit = TinyFishToolkit() + get_tool = next(t for t in toolkit.tools if t.name == "tinyfish_get_run") + output = get_tool.func(run_id="run_abc123") + + assert "COMPLETED" in output + assert "test" in output + + def test_pending_run(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_run = MagicMock() + mock_run.status = "PENDING" + mock_run.result = None + mock_run.error = None + del mock_run.streaming_url + del mock_run.streamingUrl + mock_tinyfish["client"].runs.get.return_value = mock_run + + toolkit = TinyFishToolkit() + get_tool = next(t for t in toolkit.tools if t.name == "tinyfish_get_run") + output = get_tool.func(run_id="run_abc123") + + assert "PENDING" in output + + +class TestListRunsTool: + """Test the list runs tool execution.""" + + def test_list_with_results(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_run = MagicMock() + mock_run.run_id = "run_abc123" + mock_run.status = "COMPLETED" + mock_run.url = "https://example.com" + mock_run.goal = "Extract product data" + + mock_response = MagicMock() + mock_response.runs = [mock_run] + mock_tinyfish["client"].runs.list.return_value = mock_response + + toolkit = TinyFishToolkit() + list_tool = next(t for t in toolkit.tools if t.name == "tinyfish_list_runs") + output = list_tool.func() + + assert "run_abc123" in output + assert "COMPLETED" in output + + def test_list_empty(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_response = MagicMock() + mock_response.runs = [] + mock_tinyfish["client"].runs.list.return_value = mock_response + + toolkit = TinyFishToolkit() + list_tool = next(t for t in toolkit.tools if t.name == "tinyfish_list_runs") + output = list_tool.func() + + assert "No runs found" in output + + +class TestSearchTool: + """Test the search tool execution.""" + + def test_successful_search(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_response = SimpleNamespace( + model_dump=lambda mode="json": { + "query": "tinyfish docs", + "total_results": 1, + "results": [{"title": "TinyFish Docs", "url": "https://docs.tinyfish.ai"}], + } + ) + mock_tinyfish["client"].search.query.return_value = mock_response + + toolkit = TinyFishToolkit() + search_tool = next(t for t in toolkit.tools if t.name == "tinyfish_search") + output = search_tool.func(query="tinyfish docs", location="United States", language="en") + + parsed = json.loads(output) + assert parsed["query"] == "tinyfish docs" + assert parsed["results"][0]["title"] == "TinyFish Docs" + mock_tinyfish["client"].search.query.assert_called_once_with( + "tinyfish docs", + location="United States", + language="en", + ) + + def test_search_exception(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_tinyfish["client"].search.query.side_effect = Exception("Search failed") + + toolkit = TinyFishToolkit() + search_tool = next(t for t in toolkit.tools if t.name == "tinyfish_search") + output = search_tool.func(query="tinyfish docs") + + assert "Error" in output + assert "Search failed" in output + + +class TestFetchTool: + """Test the fetch tool execution.""" + + def test_successful_fetch(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_response = SimpleNamespace( + model_dump=lambda mode="json": { + "results": [{"url": "https://docs.tinyfish.ai", "content": "# TinyFish"}], + "errors": [], + } + ) + mock_tinyfish["client"].fetch.get_contents.return_value = mock_response + + toolkit = TinyFishToolkit() + fetch_tool = next(t for t in toolkit.tools if t.name == "tinyfish_fetch") + output = fetch_tool.func( + urls=["https://docs.tinyfish.ai"], + format="markdown", + links=True, + image_links=False, + ) + + parsed = json.loads(output) + assert parsed["results"][0]["content"] == "# TinyFish" + mock_tinyfish["client"].fetch.get_contents.assert_called_once_with( + ["https://docs.tinyfish.ai"], + format="markdown", + links=True, + image_links=False, + ) + + def test_invalid_fetch_format(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + fetch_tool = next(t for t in toolkit.tools if t.name == "tinyfish_fetch") + output = fetch_tool.func(urls=["https://docs.tinyfish.ai"], format="xml") + + assert "Invalid format" in output + mock_tinyfish["client"].fetch.get_contents.assert_not_called() + + def test_fetch_exception(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_tinyfish["client"].fetch.get_contents.side_effect = Exception("Fetch failed") + + toolkit = TinyFishToolkit() + fetch_tool = next(t for t in toolkit.tools if t.name == "tinyfish_fetch") + output = fetch_tool.func(urls=["https://docs.tinyfish.ai"]) + + assert "Error" in output + assert "Fetch failed" in output + + +class TestCreateBrowserSessionTool: + """Test the browser session tool execution.""" + + def test_successful_create_browser_session(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_response = SimpleNamespace( + model_dump=lambda mode="json": { + "session_id": "session_123", + "cdp_url": "wss://example.com/cdp", + "base_url": "https://browser.tinyfish.ai/session_123", + } + ) + mock_tinyfish["client"].browser.sessions.create.return_value = mock_response + + toolkit = TinyFishToolkit() + browser_tool = next(t for t in toolkit.tools if t.name == "tinyfish_create_browser_session") + output = browser_tool.func(url="https://docs.tinyfish.ai", timeout_seconds=300) + + parsed = json.loads(output) + assert parsed["session_id"] == "session_123" + mock_tinyfish["client"].browser.sessions.create.assert_called_once_with( + url="https://docs.tinyfish.ai", + timeout_seconds=300, + ) + + def test_create_browser_session_exception(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_tinyfish["client"].browser.sessions.create.side_effect = Exception("Browser failed") + + toolkit = TinyFishToolkit() + browser_tool = next(t for t in toolkit.tools if t.name == "tinyfish_create_browser_session") + output = browser_tool.func(url="https://docs.tinyfish.ai") + + assert "Error" in output + assert "Browser failed" in output From 0ca136e44f80c3d7af0668125c2f36198a249e94 Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Mon, 11 May 2026 22:50:21 +0800 Subject: [PATCH 2/2] Address AG2 CodeRabbit review findings Add deterministic URL and fetch bounds validation, handle nullable run goals, and cover the edge cases with focused tests. --- .../tools/experimental/tinyfish/tinyfish.py | 8 ++- .../experimental/tinyfish/test_tinyfish.py | 50 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/ag2/autogen/tools/experimental/tinyfish/tinyfish.py b/ag2/autogen/tools/experimental/tinyfish/tinyfish.py index e48745f..97c6928 100644 --- a/ag2/autogen/tools/experimental/tinyfish/tinyfish.py +++ b/ag2/autogen/tools/experimental/tinyfish/tinyfish.py @@ -149,6 +149,8 @@ def run_web_automation( ) -> str: """Run a web automation synchronously and return the result.""" try: + if not url.startswith(("http://", "https://")): + return "Invalid url. Must start with http:// or https://." kwargs: dict = {"url": url, "goal": goal, "browser_profile": bp} if proxy_config: kwargs["proxy_config"] = proxy_config @@ -180,6 +182,8 @@ def queue_web_automation( ) -> str: """Start a web automation asynchronously and return the run ID.""" try: + if not url.startswith(("http://", "https://")): + return "Invalid url. Must start with http:// or https://." kwargs: dict = {"url": url, "goal": goal, "browser_profile": bp} if proxy_config: kwargs["proxy_config"] = proxy_config @@ -249,7 +253,7 @@ def list_runs( run_id = getattr(run, "run_id", "?") run_status = getattr(run, "status", "?") run_url = getattr(run, "url", "?") - run_goal = getattr(run, "goal", "") + run_goal = str(getattr(run, "goal", "") or "") goal_preview = (run_goal[:60] + "...") if len(run_goal) > 60 else run_goal lines.append(f"- {run_id} | {run_status} | {run_url} | {goal_preview}") @@ -298,6 +302,8 @@ def fetch_web_pages( ) -> str: """Fetch readable content from web pages.""" try: + if not 1 <= len(urls) <= 10: + return "Invalid urls. Provide between 1 and 10 URLs." if format not in {"markdown", "html", "json"}: return 'Invalid format. Use "markdown", "html", or "json".' diff --git a/ag2/test/tools/experimental/tinyfish/test_tinyfish.py b/ag2/test/tools/experimental/tinyfish/test_tinyfish.py index 3adbe4a..3929b33 100644 --- a/ag2/test/tools/experimental/tinyfish/test_tinyfish.py +++ b/ag2/test/tools/experimental/tinyfish/test_tinyfish.py @@ -238,6 +238,16 @@ def test_exception_returns_error_string(self, mock_tinyfish): assert "Error" in output assert "Network error" in output + def test_invalid_url_scheme(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + run_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent") + output = run_tool.func(url="ftp://example.com", goal="Extract data") + + assert "Invalid url" in output + mock_tinyfish["client"].agent.run.assert_not_called() + class TestQueueTool: """Test the async queue tool execution.""" @@ -267,6 +277,16 @@ def test_queue_exception(self, mock_tinyfish): assert "Error" in output + def test_invalid_url_scheme(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + queue_tool = next(t for t in toolkit.tools if t.name == "tinyfish_web_agent_async") + output = queue_tool.func(url="ftp://example.com", goal="Extract data") + + assert "Invalid url" in output + mock_tinyfish["client"].agent.queue.assert_not_called() + class TestGetRunTool: """Test the get run tool execution.""" @@ -341,6 +361,26 @@ def test_list_empty(self, mock_tinyfish): assert "No runs found" in output + def test_list_handles_null_goal(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + mock_run = MagicMock() + mock_run.run_id = "run_abc123" + mock_run.status = "COMPLETED" + mock_run.url = "https://example.com" + mock_run.goal = None + + mock_response = MagicMock() + mock_response.runs = [mock_run] + mock_tinyfish["client"].runs.list.return_value = mock_response + + toolkit = TinyFishToolkit() + list_tool = next(t for t in toolkit.tools if t.name == "tinyfish_list_runs") + output = list_tool.func() + + assert "run_abc123" in output + assert "COMPLETED" in output + class TestSearchTool: """Test the search tool execution.""" @@ -425,6 +465,16 @@ def test_invalid_fetch_format(self, mock_tinyfish): assert "Invalid format" in output mock_tinyfish["client"].fetch.get_contents.assert_not_called() + def test_invalid_fetch_url_count(self, mock_tinyfish): + from autogen.tools.experimental.tinyfish import TinyFishToolkit + + toolkit = TinyFishToolkit() + fetch_tool = next(t for t in toolkit.tools if t.name == "tinyfish_fetch") + output = fetch_tool.func(urls=[]) + + assert "Invalid urls" in output + mock_tinyfish["client"].fetch.get_contents.assert_not_called() + def test_fetch_exception(self, mock_tinyfish): from autogen.tools.experimental.tinyfish import TinyFishToolkit