From f5eb995aac5dd0fc734f96275a55d98849c829f5 Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Mon, 11 May 2026 18:13:03 +0800 Subject: [PATCH 1/9] Add CrewAI TinyFish integration Adds the CrewAI TinyFish package with SDK-backed tools for web automation, queued runs, run lookup, run listing, web search, content fetch, and browser session creation. Uses tinyfish>=0.2.5, includes focused tests, and adds scoped CI. --- .github/workflows/crew-ai-ci.yml | 39 ++ crew-ai/.gitignore | 19 + crew-ai/Makefile | 15 + crew-ai/README.md | 142 ++++++ crew-ai/pyproject.toml | 27 + crew-ai/requirements-dev.txt | 2 + crew-ai/src/tinyfish_web_agent/__init__.py | 21 + crew-ai/src/tinyfish_web_agent/tool.py | 541 +++++++++++++++++++++ crew-ai/tests/test_tool.py | 227 +++++++++ 9 files changed, 1033 insertions(+) create mode 100644 .github/workflows/crew-ai-ci.yml create mode 100644 crew-ai/.gitignore create mode 100644 crew-ai/Makefile create mode 100644 crew-ai/README.md create mode 100644 crew-ai/pyproject.toml create mode 100644 crew-ai/requirements-dev.txt create mode 100644 crew-ai/src/tinyfish_web_agent/__init__.py create mode 100644 crew-ai/src/tinyfish_web_agent/tool.py create mode 100644 crew-ai/tests/test_tool.py diff --git a/.github/workflows/crew-ai-ci.yml b/.github/workflows/crew-ai-ci.yml new file mode 100644 index 0000000..db35a3b --- /dev/null +++ b/.github/workflows/crew-ai-ci.yml @@ -0,0 +1,39 @@ +name: CrewAI CI + +on: + pull_request: + branches: [main] + paths: + - ".github/workflows/crew-ai-ci.yml" + - "crew-ai/**" + push: + branches: [main] + paths: + - ".github/workflows/crew-ai-ci.yml" + - "crew-ai/**" + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + + defaults: + run: + working-directory: crew-ai + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - run: python -m pip install --upgrade pip + + - run: python -m pip install -e . -r requirements-dev.txt + + - run: make lint + + - run: make test diff --git a/crew-ai/.gitignore b/crew-ai/.gitignore new file mode 100644 index 0000000..6ca54b8 --- /dev/null +++ b/crew-ai/.gitignore @@ -0,0 +1,19 @@ +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv + +# Lockfiles +uv.lock + +# Environment variables +.env + +# Claude Code +.claude/ diff --git a/crew-ai/Makefile b/crew-ai/Makefile new file mode 100644 index 0000000..77ba14b --- /dev/null +++ b/crew-ai/Makefile @@ -0,0 +1,15 @@ +.PHONY: format lint test clean + +format: + ruff format src tests + ruff check --fix src tests + +lint: + ruff check src tests + +test: + pytest tests -v + +clean: + rm -rf build dist *.egg-info .pytest_cache .ruff_cache + find . -type d -name __pycache__ -exec rm -rf {} + diff --git a/crew-ai/README.md b/crew-ai/README.md new file mode 100644 index 0000000..5eb4692 --- /dev/null +++ b/crew-ai/README.md @@ -0,0 +1,142 @@ +# Tinyfish — CrewAI Tool + +Automate any website using natural language with [TinyFish Web Agent](https://tinyfish.ai). Extract data, fill forms, click buttons, navigate pages, and more — all described in plain English. + +## Installation + +Clone the repo and install the tool locally: + +```bash +git clone https://github.com/tinyfish-io/tinyfish-web-agent-integrations.git +cd tinyfish-web-agent-integrations/crew-ai +pip install -e . +``` + +## Setup + +Get your API key at [agent.tinyfish.ai/api-keys](https://agent.tinyfish.ai/api-keys), then set it as an environment variable: + +```bash +export TINYFISH_API_KEY="your-api-key" +``` + +Or add it to your `.env` file: + +```dotenv +TINYFISH_API_KEY=your-api-key +``` + +`tinyfish-web-agent` automatically tags TinyFish SDK requests as originating +from `crew-ai`. You do not need to set `TF_API_INTEGRATION` yourself. + +## Tools + +| Tool | Description | +|------|-------------| +| `TinyfishRun` | Run a browser automation synchronously. Best for quick tasks (<30s). | +| `TinyfishRunAsync` | Start an automation asynchronously. Returns a `run_id` immediately. | +| `TinyfishGetRun` | Check status and get results of a run by its `run_id`. | +| `TinyfishListRuns` | List recent automation runs, optionally filtered by status. | +| `TinyfishSearch` | Search the web and return structured results. | +| `TinyfishFetch` | Fetch clean content from one or more URLs. | +| `TinyfishBrowserSession` | Create a remote browser session and return connection URLs. | + +`Tinyfish` is an alias for `TinyfishRun`. + +## Usage + +### Basic — synchronous run + +```python +from crewai import Agent +from tinyfish_web_agent import TinyfishRun + +agent = Agent( + role="Web Researcher", + goal="Find and extract information from websites", + tools=[TinyfishRun()], +) +``` + +### Async workflow — start and check + +```python +from tinyfish_web_agent import TinyfishRunAsync, TinyfishGetRun + +agent = Agent( + role="Data Collector", + goal="Collect data from multiple sources efficiently", + tools=[TinyfishRunAsync(), TinyfishGetRun()], +) +``` + +### Search and fetch + +```python +from tinyfish_web_agent import TinyfishSearch, TinyfishFetch + +agent = Agent( + role="Web Researcher", + goal="Search the web and read relevant pages", + tools=[TinyfishSearch(), TinyfishFetch()], +) +``` + +### All tools at once + +```python +from tinyfish_web_agent import ( + TinyfishRun, + TinyfishRunAsync, + TinyfishGetRun, + TinyfishListRuns, + TinyfishSearch, + TinyfishFetch, + TinyfishBrowserSession, +) + +agent = Agent( + role="Web Automation Specialist", + goal="Automate any web task", + tools=[ + TinyfishRun(), + TinyfishRunAsync(), + TinyfishGetRun(), + TinyfishListRuns(), + TinyfishSearch(), + TinyfishFetch(), + TinyfishBrowserSession(), + ], +) +``` + +## Configuration + +All tools accept these optional constructor parameters: + +| Parameter | Description | +|-----------|-------------| +| `api_key` | TinyFish API key. Falls back to `TINYFISH_API_KEY` env var. | +| `proxy_country` | Route through a proxy in this country (`US`, `GB`, `CA`, `DE`, `FR`, `JP`, `AU`). | + +The package also sets `TF_API_INTEGRATION=crew-ai` internally so requests are +attributed automatically. + +```python +tool = TinyfishRun(api_key="sk-...", proxy_country="US") +``` + +## Example goals + +```text +"Extract all product names, prices, and ratings from this page" +"Fill the contact form with name 'Jane Doe' and email 'jane@example.com', then submit" +"Click 'Next Page' 3 times, extracting all listings from each page" +"Log in with the provided credentials, then extract the dashboard data" +``` + +## Support + +- [TinyFish Docs](https://docs.tinyfish.ai) +- [CrewAI Docs](https://docs.crewai.com) +- [Discord](https://discord.gg/agentql) diff --git a/crew-ai/pyproject.toml b/crew-ai/pyproject.toml new file mode 100644 index 0000000..3fd2a90 --- /dev/null +++ b/crew-ai/pyproject.toml @@ -0,0 +1,27 @@ +[project] +name = "tinyfish-web-agent" +version = "0.1.1" +description = "Power up your crews with TinyFish Web Agent" +readme = "README.md" +requires-python = ">=3.10,<3.14" +dependencies = [ + "crewai[tools]>=0.203.1", + "tinyfish>=0.2.5", +] + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.crewai] +type = "tool" + +[tool.ruff] +target-version = "py310" +line-length = 88 + +[tool.ruff.lint] +select = ["E", "F", "I", "T201", "W"] diff --git a/crew-ai/requirements-dev.txt b/crew-ai/requirements-dev.txt new file mode 100644 index 0000000..bcd205e --- /dev/null +++ b/crew-ai/requirements-dev.txt @@ -0,0 +1,2 @@ +pytest>=7.0,<9.0 +ruff>=0.4,<1.0 diff --git a/crew-ai/src/tinyfish_web_agent/__init__.py b/crew-ai/src/tinyfish_web_agent/__init__.py new file mode 100644 index 0000000..01ed426 --- /dev/null +++ b/crew-ai/src/tinyfish_web_agent/__init__.py @@ -0,0 +1,21 @@ +from .tool import ( + Tinyfish, + TinyfishBrowserSession, + TinyfishFetch, + TinyfishGetRun, + TinyfishListRuns, + TinyfishRun, + TinyfishRunAsync, + TinyfishSearch, +) + +__all__ = [ + "Tinyfish", + "TinyfishRun", + "TinyfishRunAsync", + "TinyfishGetRun", + "TinyfishListRuns", + "TinyfishSearch", + "TinyfishFetch", + "TinyfishBrowserSession", +] diff --git a/crew-ai/src/tinyfish_web_agent/tool.py b/crew-ai/src/tinyfish_web_agent/tool.py new file mode 100644 index 0000000..784845c --- /dev/null +++ b/crew-ai/src/tinyfish_web_agent/tool.py @@ -0,0 +1,541 @@ +import json +import logging +import os +from typing import Any, List, Literal, Optional, Type + +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, Field, field_validator +from tinyfish import ( + BrowserProfile, + ProxyConfig, + ProxyCountryCode, + RunStatus, + TinyFish, +) + +logger = logging.getLogger(__name__) + +_INTEGRATION_NAME = "crew-ai" + + +# --------------------------------------------------------------------------- +# Input schemas +# --------------------------------------------------------------------------- + + +class TinyfishRunInput(BaseModel): + """Input schema for running a browser automation.""" + + url: str = Field(..., description="Target website URL to automate.") + goal: str = Field( + ..., + description=( + "Natural language description of what to accomplish on the website." + ), + ) + browser_profile: Literal["lite", "stealth"] = Field( + default="lite", + description=( + 'Browser execution mode. "lite" (default) is fast for ' + 'standard sites. "stealth" enables anti-detection for ' + "sites with bot protection." + ), + ) + + @field_validator("url") + @classmethod + def _check_url_scheme(cls, v: str) -> str: + if not v.startswith(("http://", "https://")): + raise ValueError("URL must start with http:// or https://") + return v + + +class TinyfishGetRunInput(BaseModel): + """Input schema for getting a run by ID.""" + + run_id: str = Field(..., description="The unique run ID to look up.") + + +class TinyfishListRunsInput(BaseModel): + """Input schema for listing runs.""" + + status: Optional[ + Literal[ + "PENDING", + "RUNNING", + "COMPLETED", + "FAILED", + "CANCELLED", + ] + ] = Field(default=None, description="Filter runs by status.") + goal: Optional[str] = Field( + default=None, + description=("Search runs by goal text (case-insensitive partial match)."), + ) + limit: int = Field( + default=20, + ge=1, + le=100, + description="Maximum number of runs to return (1-100).", + ) + cursor: Optional[str] = Field( + default=None, + description=( + "Pagination cursor from a previous response to get the next page." + ), + ) + created_after: Optional[str] = Field( + default=None, + description=("Only return runs created after this ISO 8601 timestamp."), + ) + created_before: Optional[str] = Field( + default=None, + description=("Only return runs created before this ISO 8601 timestamp."), + ) + sort_direction: Optional[Literal["asc", "desc"]] = Field( + default=None, + description=('Sort order by creation date: "asc" or "desc" (default: desc).'), + ) + + +class TinyfishSearchInput(BaseModel): + """Input schema for searching the web.""" + + query: str = Field(..., description="Search query.") + location: Optional[str] = Field( + default=None, + description="Optional location to scope results, such as 'United States'.", + ) + language: Optional[str] = Field( + default=None, + description="Optional language code, such as 'en'.", + ) + + +class TinyfishFetchInput(BaseModel): + """Input schema for fetching clean page content.""" + + urls: list[str] = Field( + ..., + description="One to ten URLs to fetch and extract clean content from.", + ) + format: Literal["markdown", "html", "json"] = Field( + default="markdown", + description="Output format for extracted content.", + ) + links: bool = Field(default=False, description="Whether to include page links.") + image_links: bool = Field( + default=False, + description="Whether to include image links.", + ) + + +class TinyfishBrowserSessionInput(BaseModel): + """Input schema for creating a remote browser session.""" + + url: Optional[str] = Field( + default=None, + description="Optional URL to open when the browser session starts.", + ) + timeout_seconds: int = Field( + default=0, + ge=0, + description=( + "Optional inactivity timeout in seconds. " + "Use 0 to let TinyFish apply the plan default." + ), + ) + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + + +def _format_status(status: Any) -> str: + """Convert a RunStatus enum (or any value) to a string.""" + if isinstance(status, RunStatus): + return status.value + return str(status) + + +def _format_error(error: Any) -> str: + """Extract a human-readable message from an error object.""" + if isinstance(error, dict): + return error.get("message", str(error)) + return str(error) + + +def _ensure_integration_tag() -> None: + """Set the default TinyFish integration tag for request attribution.""" + os.environ.setdefault("TF_API_INTEGRATION", _INTEGRATION_NAME) + + +def _dump_json(value: Any) -> str: + """Serialize SDK response models to a JSON string.""" + if hasattr(value, "model_dump_json"): + return str(value.model_dump_json()) + if hasattr(value, "model_dump"): + return json.dumps(value.model_dump()) + return json.dumps(value) + + +# --------------------------------------------------------------------------- +# Base tool +# --------------------------------------------------------------------------- + + +class _TinyfishBaseTool(BaseTool): + """Shared configuration and helpers for all Tinyfish tools.""" + + api_key: Optional[str] = None + proxy_country: Optional[str] = None + + env_vars: List[EnvVar] = [ + EnvVar( + name="TINYFISH_API_KEY", + description=( + "TinyFish Web Agent API key. " + "Get one at https://agent.tinyfish.ai/api-keys" + ), + required=True, + ), + ] + + # Cached per-instance to avoid re-creating on every call. + _client: Optional[TinyFish] = None + + def _get_client(self) -> tuple[Optional[TinyFish], Optional[str]]: + """Return (client, None) or (None, error_string).""" + if self._client is not None: + return self._client, None + _ensure_integration_tag() + key = self.api_key or os.environ.get("TINYFISH_API_KEY") + if not key: + return None, ( + "Error: TINYFISH_API_KEY is not set. " + "Set it as an environment variable or pass " + "api_key when creating the tool. " + "Get your key at " + "https://agent.tinyfish.ai/api-keys" + ) + self._client = TinyFish(api_key=key) + return self._client, None + + def _run_kwargs(self, browser_profile: str) -> dict[str, Any]: + """Common kwargs for SDK agent calls.""" + try: + bp = BrowserProfile(browser_profile) + except ValueError: + bp = BrowserProfile.LITE + kwargs: dict[str, Any] = {"browser_profile": bp} + if self.proxy_country: + kwargs["proxy_config"] = ProxyConfig( + enabled=True, + country_code=ProxyCountryCode(self.proxy_country), + ) + return kwargs + + def _safe_call( + self, fn: Any, *args: Any, **kwargs: Any + ) -> tuple[Any, Optional[str]]: + """Call fn and return (result, None) or (None, error). + + Logs the traceback at DEBUG level for diagnostics. + """ + try: + return fn(*args, **kwargs), None + except Exception as exc: + logger.debug("SDK call failed", exc_info=True) + return None, f"Error: {exc}" + + +# --------------------------------------------------------------------------- +# Tools +# --------------------------------------------------------------------------- + + +class TinyfishRun(_TinyfishBaseTool): + """Run a browser automation synchronously and return the + result. + + Best for quick tasks that complete in under 30 seconds. + """ + + name: str = "Tinyfish Web Agent" + description: str = ( + "Automate any website using natural language. " + "Provide a URL and describe what you want to " + "accomplish — extract data, fill forms, click " + "buttons, navigate pages, and more. " + "Waits for completion and returns structured " + "JSON results." + ) + args_schema: Type[BaseModel] = TinyfishRunInput + + def _run( + self, + url: str, + goal: str, + browser_profile: str = "lite", + ) -> str: + client, err = self._get_client() + if err: + return err + + result, err = self._safe_call( + client.agent.run, + url=url, + goal=goal, + **self._run_kwargs(browser_profile), + ) + if err: + return err + + if result.status == RunStatus.COMPLETED: + if result.result is not None: + return json.dumps(result.result) + return "Completed with no result data." + + error = getattr(result, "error", None) + if error: + return f"Automation failed: {_format_error(error)}" + return f"Automation ended with status: {_format_status(result.status)}" + + +class TinyfishRunAsync(_TinyfishBaseTool): + """Start a browser automation asynchronously and return + the run ID. + + Use TinyfishGetRun to poll for results. + Best for long-running tasks. + """ + + name: str = "Tinyfish Web Agent (Async)" + description: str = ( + "Start a browser automation asynchronously. " + "Returns a run_id immediately without waiting " + "for completion. " + "Use the 'Tinyfish Get Run' tool to check status " + "and get results." + ) + args_schema: Type[BaseModel] = TinyfishRunInput + + def _run( + self, + url: str, + goal: str, + browser_profile: str = "lite", + ) -> str: + client, err = self._get_client() + if err: + return err + + result, err = self._safe_call( + client.agent.queue, + url=url, + goal=goal, + **self._run_kwargs(browser_profile), + ) + if err: + return err + + return f"Automation started. run_id: {result.run_id}" + + +class TinyfishGetRun(_TinyfishBaseTool): + """Get the status and result of a TinyFish automation + run.""" + + name: str = "Tinyfish Get Run" + description: str = ( + "Check the status and get results of a TinyFish " + "automation run by its run_id. Returns status " + "(PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) " + "and result data." + ) + args_schema: Type[BaseModel] = TinyfishGetRunInput + + def _run(self, run_id: str) -> str: + client, err = self._get_client() + if err: + return err + + run, err = self._safe_call(client.runs.get, run_id) + if err: + return err + + parts = [f"Status: {_format_status(run.status)}"] + streaming_url = getattr(run, "streaming_url", None) + if streaming_url: + parts.append(f"Live view: {streaming_url}") + result = getattr(run, "result", None) + if result is not None: + parts.append(f"Result: {json.dumps(result)}") + error = getattr(run, "error", None) + if error: + parts.append(f"Error: {_format_error(error)}") + + return "\n".join(parts) + + +class TinyfishListRuns(_TinyfishBaseTool): + """List recent TinyFish automation runs.""" + + name: str = "Tinyfish List Runs" + description: str = ( + "List recent TinyFish automation runs. " + "Optionally filter by status (PENDING, RUNNING, " + "COMPLETED, FAILED, CANCELLED)." + ) + args_schema: Type[BaseModel] = TinyfishListRunsInput + + def _run( + self, + status: Optional[str] = None, + goal: Optional[str] = None, + limit: int = 20, + cursor: Optional[str] = None, + created_after: Optional[str] = None, + created_before: Optional[str] = None, + sort_direction: Optional[str] = None, + ) -> str: + client, err = self._get_client() + if err: + return err + + kwargs: dict[str, Any] = {"limit": limit} + if status: + kwargs["status"] = RunStatus(status) + if goal: + kwargs["goal"] = goal + if cursor: + kwargs["cursor"] = cursor + if created_after: + kwargs["created_after"] = created_after + if created_before: + kwargs["created_before"] = created_before + if sort_direction: + kwargs["sort_direction"] = sort_direction + + response, err = self._safe_call(client.runs.list, **kwargs) + if err: + return err + + runs = getattr(response, "runs", []) + if not runs: + return "No runs found." + + lines = [] + for run in runs: + rid = getattr(run, "run_id", "?") + st = _format_status(getattr(run, "status", "?")) + url = getattr(run, "url", "?") + lines.append(f"- {rid} | {st} | {url}") + + return f"Found {len(runs)} runs:\n" + "\n".join(lines) + + +class TinyfishSearch(_TinyfishBaseTool): + """Search the web using TinyFish.""" + + name: str = "Tinyfish Search" + description: str = ( + "Search the web using TinyFish and return structured search results. " + "Use this to discover relevant URLs or current web pages before " + "fetching content or running browser automation." + ) + args_schema: Type[BaseModel] = TinyfishSearchInput + + def _run( + self, + query: str, + location: Optional[str] = None, + language: Optional[str] = None, + ) -> str: + client, err = self._get_client() + if err: + return err + + kwargs: dict[str, Any] = {} + if location: + kwargs["location"] = location + if language: + kwargs["language"] = language + + response, err = self._safe_call( + client.search.query, + query=query, + **kwargs, + ) + if err: + return err + return _dump_json(response) + + +class TinyfishFetch(_TinyfishBaseTool): + """Fetch clean content from one or more URLs using TinyFish.""" + + name: str = "Tinyfish Fetch" + description: str = ( + "Fetch clean readable content from one or more URLs using TinyFish. " + "Use this when the URL is already known and the agent needs page text, " + "metadata, links, or image links without controlling a browser." + ) + args_schema: Type[BaseModel] = TinyfishFetchInput + + def _run( + self, + urls: list[str], + format: Literal["markdown", "html", "json"] = "markdown", + links: bool = False, + image_links: bool = False, + ) -> str: + client, err = self._get_client() + if err: + return err + + response, err = self._safe_call( + client.fetch.get_contents, + urls=urls, + format=format, + links=links, + image_links=image_links, + ) + if err: + return err + return _dump_json(response) + + +class TinyfishBrowserSession(_TinyfishBaseTool): + """Create a TinyFish remote browser session.""" + + name: str = "Tinyfish Browser Session" + description: str = ( + "Create a TinyFish remote browser session and return its session ID, " + "CDP WebSocket URL, and base URL. Use this for low-level browser " + "control from external browser automation clients." + ) + args_schema: Type[BaseModel] = TinyfishBrowserSessionInput + + def _run( + self, + url: Optional[str] = None, + timeout_seconds: int = 0, + ) -> str: + client, err = self._get_client() + if err: + return err + + response, err = self._safe_call( + client.browser.sessions.create, + url=url, + timeout_seconds=timeout_seconds or None, + ) + if err: + return err + return _dump_json(response) + + +# Backwards-compatible alias +Tinyfish = TinyfishRun diff --git a/crew-ai/tests/test_tool.py b/crew-ai/tests/test_tool.py new file mode 100644 index 0000000..2b82cf5 --- /dev/null +++ b/crew-ai/tests/test_tool.py @@ -0,0 +1,227 @@ +"""Tests for TinyFish CrewAI tools.""" + +from __future__ import annotations + +import json +import os +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from tinyfish import BrowserProfile, ProxyCountryCode, RunStatus + +from tinyfish_web_agent import ( + Tinyfish, + TinyfishBrowserSession, + TinyfishFetch, + TinyfishGetRun, + TinyfishListRuns, + TinyfishRun, + TinyfishRunAsync, + TinyfishSearch, + __all__, +) + + +def _response(payload: dict): + return SimpleNamespace(model_dump=lambda: payload) + + +def test_public_exports() -> None: + assert sorted(__all__) == sorted( + [ + "Tinyfish", + "TinyfishRun", + "TinyfishRunAsync", + "TinyfishGetRun", + "TinyfishListRuns", + "TinyfishSearch", + "TinyfishFetch", + "TinyfishBrowserSession", + ] + ) + assert Tinyfish is TinyfishRun + + +def test_get_client_requires_api_key(monkeypatch) -> None: + monkeypatch.delenv("TINYFISH_API_KEY", raising=False) + + result = TinyfishRun()._run("https://example.com", "Extract title") + + assert result.startswith("Error: TINYFISH_API_KEY is not set") + + +def test_get_client_sets_integration_tag(monkeypatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + monkeypatch.delenv("TF_API_INTEGRATION", raising=False) + + with patch("tinyfish_web_agent.tool.TinyFish") as tinyfish: + TinyfishRun()._get_client() + + assert os.environ["TF_API_INTEGRATION"] == "crew-ai" + tinyfish.assert_called_once_with(api_key="sk-test") + + +def test_run_kwargs_defaults_invalid_profile_to_lite() -> None: + kwargs = TinyfishRun()._run_kwargs("bad-profile") + + assert kwargs["browser_profile"] == BrowserProfile.LITE + + +def test_run_kwargs_adds_proxy_config() -> None: + kwargs = TinyfishRun(proxy_country="GB")._run_kwargs("stealth") + + assert kwargs["browser_profile"] == BrowserProfile.STEALTH + assert kwargs["proxy_config"].enabled is True + assert kwargs["proxy_config"].country_code == ProxyCountryCode("GB") + + +def test_run_completed_result() -> None: + client = MagicMock() + client.agent.run.return_value = SimpleNamespace( + status=RunStatus.COMPLETED, + result={"title": "Example"}, + ) + + with patch.object(TinyfishRun, "_get_client", return_value=(client, None)): + result = TinyfishRun()._run("https://example.com", "Extract title") + + assert json.loads(result) == {"title": "Example"} + client.agent.run.assert_called_once() + + +def test_run_failed_result() -> None: + client = MagicMock() + client.agent.run.return_value = SimpleNamespace( + status=RunStatus.FAILED, + result=None, + error={"message": "Failed"}, + ) + + with patch.object(TinyfishRun, "_get_client", return_value=(client, None)): + result = TinyfishRun()._run("https://example.com", "Extract title") + + assert result == "Automation failed: Failed" + + +def test_async_run() -> None: + client = MagicMock() + client.agent.queue.return_value = SimpleNamespace(run_id="run-123") + + with patch.object(TinyfishRunAsync, "_get_client", return_value=(client, None)): + result = TinyfishRunAsync()._run("https://example.com", "Extract title") + + assert result == "Automation started. run_id: run-123" + + +def test_get_run_formats_details() -> None: + client = MagicMock() + client.runs.get.return_value = SimpleNamespace( + status=RunStatus.COMPLETED, + result={"ok": True}, + error=None, + streaming_url="https://stream.example.com", + ) + + with patch.object(TinyfishGetRun, "_get_client", return_value=(client, None)): + result = TinyfishGetRun()._run("run-123") + + assert "Status: COMPLETED" in result + assert "Live view: https://stream.example.com" in result + assert 'Result: {"ok": true}' in result + + +def test_list_runs_with_status_filter() -> None: + client = MagicMock() + client.runs.list.return_value = SimpleNamespace( + runs=[ + SimpleNamespace( + run_id="run-123", + status=RunStatus.COMPLETED, + url="https://example.com", + ) + ] + ) + + with patch.object(TinyfishListRuns, "_get_client", return_value=(client, None)): + result = TinyfishListRuns()._run(status="COMPLETED", limit=5) + + assert "Found 1 runs" in result + client.runs.list.assert_called_once_with(limit=5, status=RunStatus.COMPLETED) + + +def test_search_uses_sdk_resource() -> None: + client = MagicMock() + client.search.query.return_value = _response( + { + "query": "tinyfish", + "results": [{"title": "TinyFish", "url": "https://tinyfish.ai"}], + "total_results": 1, + } + ) + + with patch.object(TinyfishSearch, "_get_client", return_value=(client, None)): + result = TinyfishSearch()._run( + "tinyfish", + location="United States", + language="en", + ) + + assert json.loads(result)["total_results"] == 1 + client.search.query.assert_called_once_with( + query="tinyfish", + location="United States", + language="en", + ) + + +def test_fetch_uses_sdk_resource() -> None: + client = MagicMock() + client.fetch.get_contents.return_value = _response( + { + "results": [{"url": "https://example.com", "text": "Example"}], + "errors": [], + } + ) + + with patch.object(TinyfishFetch, "_get_client", return_value=(client, None)): + result = TinyfishFetch()._run( + ["https://example.com"], + format="markdown", + links=True, + image_links=False, + ) + + assert json.loads(result)["results"][0]["text"] == "Example" + client.fetch.get_contents.assert_called_once_with( + urls=["https://example.com"], + format="markdown", + links=True, + image_links=False, + ) + + +def test_create_browser_session_uses_sdk_resource() -> None: + client = MagicMock() + client.browser.sessions.create.return_value = _response( + { + "session_id": "tf-session", + "cdp_url": "wss://example.test", + "base_url": "https://example.test", + } + ) + + with patch.object( + TinyfishBrowserSession, + "_get_client", + return_value=(client, None), + ): + result = TinyfishBrowserSession()._run( + url="https://example.com", + timeout_seconds=120, + ) + + assert json.loads(result)["session_id"] == "tf-session" + client.browser.sessions.create.assert_called_once_with( + url="https://example.com", + timeout_seconds=120, + ) From 679fcbc4609609e0bd6f2c3f406392e77f817c4a Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Mon, 11 May 2026 22:50:09 +0800 Subject: [PATCH 2/9] Address CrewAI CodeRabbit review findings Align Python requirements with TinyFish SDK, enforce fetch URL bounds, handle invalid proxy countries, support SDK data responses, and add CI build coverage. --- .github/workflows/crew-ai-ci.yml | 2 ++ crew-ai/pyproject.toml | 4 ++-- crew-ai/requirements-dev.txt | 1 + crew-ai/src/tinyfish_web_agent/tool.py | 25 ++++++++++++++------ crew-ai/tests/test_tool.py | 32 ++++++++++++++++++++++++++ 5 files changed, 55 insertions(+), 9 deletions(-) diff --git a/.github/workflows/crew-ai-ci.yml b/.github/workflows/crew-ai-ci.yml index db35a3b..fdc3dd1 100644 --- a/.github/workflows/crew-ai-ci.yml +++ b/.github/workflows/crew-ai-ci.yml @@ -37,3 +37,5 @@ jobs: - run: make lint - run: make test + + - run: python -m build diff --git a/crew-ai/pyproject.toml b/crew-ai/pyproject.toml index 3fd2a90..f9e2871 100644 --- a/crew-ai/pyproject.toml +++ b/crew-ai/pyproject.toml @@ -3,7 +3,7 @@ name = "tinyfish-web-agent" version = "0.1.1" description = "Power up your crews with TinyFish Web Agent" readme = "README.md" -requires-python = ">=3.10,<3.14" +requires-python = ">=3.11,<3.14" dependencies = [ "crewai[tools]>=0.203.1", "tinyfish>=0.2.5", @@ -20,7 +20,7 @@ where = ["src"] type = "tool" [tool.ruff] -target-version = "py310" +target-version = "py311" line-length = 88 [tool.ruff.lint] diff --git a/crew-ai/requirements-dev.txt b/crew-ai/requirements-dev.txt index bcd205e..35b5b0d 100644 --- a/crew-ai/requirements-dev.txt +++ b/crew-ai/requirements-dev.txt @@ -1,2 +1,3 @@ +build>=1.0,<2.0 pytest>=7.0,<9.0 ruff>=0.4,<1.0 diff --git a/crew-ai/src/tinyfish_web_agent/tool.py b/crew-ai/src/tinyfish_web_agent/tool.py index 784845c..d6b50c6 100644 --- a/crew-ai/src/tinyfish_web_agent/tool.py +++ b/crew-ai/src/tinyfish_web_agent/tool.py @@ -117,6 +117,8 @@ class TinyfishFetchInput(BaseModel): urls: list[str] = Field( ..., + min_length=1, + max_length=10, description="One to ten URLs to fetch and extract clean content from.", ) format: Literal["markdown", "html", "json"] = Field( @@ -230,10 +232,11 @@ def _run_kwargs(self, browser_profile: str) -> dict[str, Any]: bp = BrowserProfile.LITE kwargs: dict[str, Any] = {"browser_profile": bp} if self.proxy_country: - kwargs["proxy_config"] = ProxyConfig( - enabled=True, - country_code=ProxyCountryCode(self.proxy_country), - ) + try: + country = ProxyCountryCode(self.proxy_country) + except ValueError: + return {"_error": f"Invalid proxy country: {self.proxy_country!r}"} + kwargs["proxy_config"] = ProxyConfig(enabled=True, country_code=country) return kwargs def _safe_call( @@ -283,11 +286,15 @@ def _run( if err: return err + kwargs = self._run_kwargs(browser_profile) + if "_error" in kwargs: + return f"Error: {kwargs['_error']}" + result, err = self._safe_call( client.agent.run, url=url, goal=goal, - **self._run_kwargs(browser_profile), + **kwargs, ) if err: return err @@ -331,11 +338,15 @@ def _run( if err: return err + kwargs = self._run_kwargs(browser_profile) + if "_error" in kwargs: + return f"Error: {kwargs['_error']}" + result, err = self._safe_call( client.agent.queue, url=url, goal=goal, - **self._run_kwargs(browser_profile), + **kwargs, ) if err: return err @@ -422,7 +433,7 @@ def _run( if err: return err - runs = getattr(response, "runs", []) + runs = getattr(response, "data", None) or getattr(response, "runs", []) if not runs: return "No runs found." diff --git a/crew-ai/tests/test_tool.py b/crew-ai/tests/test_tool.py index 2b82cf5..032461f 100644 --- a/crew-ai/tests/test_tool.py +++ b/crew-ai/tests/test_tool.py @@ -75,6 +75,19 @@ def test_run_kwargs_adds_proxy_config() -> None: assert kwargs["proxy_config"].country_code == ProxyCountryCode("GB") +def test_run_rejects_invalid_proxy_country() -> None: + client = MagicMock() + + with patch.object(TinyfishRun, "_get_client", return_value=(client, None)): + result = TinyfishRun(proxy_country="XX")._run( + "https://example.com", + "Extract title", + ) + + assert result == "Error: Invalid proxy country: 'XX'" + client.agent.run.assert_not_called() + + def test_run_completed_result() -> None: client = MagicMock() client.agent.run.return_value = SimpleNamespace( @@ -149,6 +162,25 @@ def test_list_runs_with_status_filter() -> None: client.runs.list.assert_called_once_with(limit=5, status=RunStatus.COMPLETED) +def test_list_runs_uses_sdk_data_field() -> None: + client = MagicMock() + client.runs.list.return_value = SimpleNamespace( + data=[ + SimpleNamespace( + run_id="run-123", + status=RunStatus.COMPLETED, + url="https://example.com", + ) + ] + ) + + with patch.object(TinyfishListRuns, "_get_client", return_value=(client, None)): + result = TinyfishListRuns()._run(limit=5) + + assert "Found 1 runs" in result + assert "run-123" in result + + def test_search_uses_sdk_resource() -> None: client = MagicMock() client.search.query.return_value = _response( From 5f69c5985d55fac73865cf23e108bb19791f86ca Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Thu, 14 May 2026 20:04:36 +0800 Subject: [PATCH 3/9] Ignore sibling integration dirs and editor/system noise --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9138b99 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.DS_Store +.claude/ +google-adk/ +langchain/ +supabase/ +zapier/ From 56710dd4839652d93c4d9ee9321f740f0681c96b Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Thu, 14 May 2026 20:39:54 +0800 Subject: [PATCH 4/9] Expose env_vars and config fields per concrete tool for AMP UI --- crew-ai/pyproject.toml | 2 +- crew-ai/src/tinyfish_web_agent/tool.py | 135 ++++++++++++++++++++++--- 2 files changed, 124 insertions(+), 13 deletions(-) diff --git a/crew-ai/pyproject.toml b/crew-ai/pyproject.toml index f9e2871..7b38f72 100644 --- a/crew-ai/pyproject.toml +++ b/crew-ai/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "tinyfish-web-agent" -version = "0.1.1" +version = "0.1.2" description = "Power up your crews with TinyFish Web Agent" readme = "README.md" requires-python = ">=3.11,<3.14" diff --git a/crew-ai/src/tinyfish_web_agent/tool.py b/crew-ai/src/tinyfish_web_agent/tool.py index d6b50c6..2b46df6 100644 --- a/crew-ai/src/tinyfish_web_agent/tool.py +++ b/crew-ai/src/tinyfish_web_agent/tool.py @@ -182,28 +182,34 @@ def _dump_json(value: Any) -> str: return json.dumps(value) +_TINYFISH_ENV_VARS: List[EnvVar] = [ + EnvVar( + name="TINYFISH_API_KEY", + description=( + "TinyFish Web Agent API key. " + "Get one at https://agent.tinyfish.ai/api-keys" + ), + required=True, + ), +] + + # --------------------------------------------------------------------------- # Base tool # --------------------------------------------------------------------------- class _TinyfishBaseTool(BaseTool): - """Shared configuration and helpers for all Tinyfish tools.""" + """Shared configuration and helpers for all Tinyfish tools. + + Concrete subclasses redeclare ``api_key``, ``proxy_country``, + ``env_vars``, and ``package_dependencies`` as Pydantic Fields so the + CrewAI AMP UI surfaces them on each tool. + """ api_key: Optional[str] = None proxy_country: Optional[str] = None - env_vars: List[EnvVar] = [ - EnvVar( - name="TINYFISH_API_KEY", - description=( - "TinyFish Web Agent API key. " - "Get one at https://agent.tinyfish.ai/api-keys" - ), - required=True, - ), - ] - # Cached per-instance to avoid re-creating on every call. _client: Optional[TinyFish] = None @@ -275,6 +281,21 @@ class TinyfishRun(_TinyfishBaseTool): "JSON results." ) args_schema: Type[BaseModel] = TinyfishRunInput + package_dependencies: List[str] = ["tinyfish"] + api_key: Optional[str] = Field( + default_factory=lambda: os.getenv("TINYFISH_API_KEY"), + description="TinyFish API key (overrides env var if provided).", + json_schema_extra={"required": False}, + ) + proxy_country: Optional[str] = Field( + default=None, + description=( + "Optional ISO country code for proxy routing " + "(US, GB, CA, DE, FR, JP, AU)." + ), + json_schema_extra={"required": False}, + ) + env_vars: List[EnvVar] = _TINYFISH_ENV_VARS def _run( self, @@ -327,6 +348,21 @@ class TinyfishRunAsync(_TinyfishBaseTool): "and get results." ) args_schema: Type[BaseModel] = TinyfishRunInput + package_dependencies: List[str] = ["tinyfish"] + api_key: Optional[str] = Field( + default_factory=lambda: os.getenv("TINYFISH_API_KEY"), + description="TinyFish API key (overrides env var if provided).", + json_schema_extra={"required": False}, + ) + proxy_country: Optional[str] = Field( + default=None, + description=( + "Optional ISO country code for proxy routing " + "(US, GB, CA, DE, FR, JP, AU)." + ), + json_schema_extra={"required": False}, + ) + env_vars: List[EnvVar] = _TINYFISH_ENV_VARS def _run( self, @@ -366,6 +402,21 @@ class TinyfishGetRun(_TinyfishBaseTool): "and result data." ) args_schema: Type[BaseModel] = TinyfishGetRunInput + package_dependencies: List[str] = ["tinyfish"] + api_key: Optional[str] = Field( + default_factory=lambda: os.getenv("TINYFISH_API_KEY"), + description="TinyFish API key (overrides env var if provided).", + json_schema_extra={"required": False}, + ) + proxy_country: Optional[str] = Field( + default=None, + description=( + "Optional ISO country code for proxy routing " + "(US, GB, CA, DE, FR, JP, AU)." + ), + json_schema_extra={"required": False}, + ) + env_vars: List[EnvVar] = _TINYFISH_ENV_VARS def _run(self, run_id: str) -> str: client, err = self._get_client() @@ -400,6 +451,21 @@ class TinyfishListRuns(_TinyfishBaseTool): "COMPLETED, FAILED, CANCELLED)." ) args_schema: Type[BaseModel] = TinyfishListRunsInput + package_dependencies: List[str] = ["tinyfish"] + api_key: Optional[str] = Field( + default_factory=lambda: os.getenv("TINYFISH_API_KEY"), + description="TinyFish API key (overrides env var if provided).", + json_schema_extra={"required": False}, + ) + proxy_country: Optional[str] = Field( + default=None, + description=( + "Optional ISO country code for proxy routing " + "(US, GB, CA, DE, FR, JP, AU)." + ), + json_schema_extra={"required": False}, + ) + env_vars: List[EnvVar] = _TINYFISH_ENV_VARS def _run( self, @@ -457,6 +523,21 @@ class TinyfishSearch(_TinyfishBaseTool): "fetching content or running browser automation." ) args_schema: Type[BaseModel] = TinyfishSearchInput + package_dependencies: List[str] = ["tinyfish"] + api_key: Optional[str] = Field( + default_factory=lambda: os.getenv("TINYFISH_API_KEY"), + description="TinyFish API key (overrides env var if provided).", + json_schema_extra={"required": False}, + ) + proxy_country: Optional[str] = Field( + default=None, + description=( + "Optional ISO country code for proxy routing " + "(US, GB, CA, DE, FR, JP, AU)." + ), + json_schema_extra={"required": False}, + ) + env_vars: List[EnvVar] = _TINYFISH_ENV_VARS def _run( self, @@ -494,6 +575,21 @@ class TinyfishFetch(_TinyfishBaseTool): "metadata, links, or image links without controlling a browser." ) args_schema: Type[BaseModel] = TinyfishFetchInput + package_dependencies: List[str] = ["tinyfish"] + api_key: Optional[str] = Field( + default_factory=lambda: os.getenv("TINYFISH_API_KEY"), + description="TinyFish API key (overrides env var if provided).", + json_schema_extra={"required": False}, + ) + proxy_country: Optional[str] = Field( + default=None, + description=( + "Optional ISO country code for proxy routing " + "(US, GB, CA, DE, FR, JP, AU)." + ), + json_schema_extra={"required": False}, + ) + env_vars: List[EnvVar] = _TINYFISH_ENV_VARS def _run( self, @@ -528,6 +624,21 @@ class TinyfishBrowserSession(_TinyfishBaseTool): "control from external browser automation clients." ) args_schema: Type[BaseModel] = TinyfishBrowserSessionInput + package_dependencies: List[str] = ["tinyfish"] + api_key: Optional[str] = Field( + default_factory=lambda: os.getenv("TINYFISH_API_KEY"), + description="TinyFish API key (overrides env var if provided).", + json_schema_extra={"required": False}, + ) + proxy_country: Optional[str] = Field( + default=None, + description=( + "Optional ISO country code for proxy routing " + "(US, GB, CA, DE, FR, JP, AU)." + ), + json_schema_extra={"required": False}, + ) + env_vars: List[EnvVar] = _TINYFISH_ENV_VARS def _run( self, From f87072fa7fac89b323e711a15f852e0c4ec572fc Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Thu, 14 May 2026 22:46:35 +0800 Subject: [PATCH 5/9] Add TinyfishWebAgent alias for Crew Studio auto-import --- crew-ai/pyproject.toml | 2 +- crew-ai/src/tinyfish_web_agent/__init__.py | 2 ++ crew-ai/src/tinyfish_web_agent/tool.py | 4 ++++ crew-ai/tests/test_tool.py | 4 ++++ 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/crew-ai/pyproject.toml b/crew-ai/pyproject.toml index 7b38f72..ba8eb18 100644 --- a/crew-ai/pyproject.toml +++ b/crew-ai/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "tinyfish-web-agent" -version = "0.1.2" +version = "0.1.3" description = "Power up your crews with TinyFish Web Agent" readme = "README.md" requires-python = ">=3.11,<3.14" diff --git a/crew-ai/src/tinyfish_web_agent/__init__.py b/crew-ai/src/tinyfish_web_agent/__init__.py index 01ed426..62eee04 100644 --- a/crew-ai/src/tinyfish_web_agent/__init__.py +++ b/crew-ai/src/tinyfish_web_agent/__init__.py @@ -7,6 +7,7 @@ TinyfishRun, TinyfishRunAsync, TinyfishSearch, + TinyfishWebAgent, ) __all__ = [ @@ -18,4 +19,5 @@ "TinyfishSearch", "TinyfishFetch", "TinyfishBrowserSession", + "TinyfishWebAgent", ] diff --git a/crew-ai/src/tinyfish_web_agent/tool.py b/crew-ai/src/tinyfish_web_agent/tool.py index 2b46df6..e5fd83b 100644 --- a/crew-ai/src/tinyfish_web_agent/tool.py +++ b/crew-ai/src/tinyfish_web_agent/tool.py @@ -661,3 +661,7 @@ def _run( # Backwards-compatible alias Tinyfish = TinyfishRun + +# Crew Studio auto-imports `CamelCase(package_name)` — alias so the +# Studio-generated crew code can import this package without errors. +TinyfishWebAgent = TinyfishRun diff --git a/crew-ai/tests/test_tool.py b/crew-ai/tests/test_tool.py index 032461f..6479146 100644 --- a/crew-ai/tests/test_tool.py +++ b/crew-ai/tests/test_tool.py @@ -27,6 +27,8 @@ def _response(payload: dict): def test_public_exports() -> None: + from tinyfish_web_agent import TinyfishWebAgent + assert sorted(__all__) == sorted( [ "Tinyfish", @@ -37,9 +39,11 @@ def test_public_exports() -> None: "TinyfishSearch", "TinyfishFetch", "TinyfishBrowserSession", + "TinyfishWebAgent", ] ) assert Tinyfish is TinyfishRun + assert TinyfishWebAgent is TinyfishRun def test_get_client_requires_api_key(monkeypatch) -> None: From fdefd950ee088b2276d69175012fbc61e7f26345 Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Thu, 14 May 2026 22:47:03 +0800 Subject: [PATCH 6/9] Ignore test crew + examples scratchpad --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 9138b99..ddb1df3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ google-adk/ langchain/ supabase/ zapier/ +test_tinyfish_crew/ +crew-ai/examples/ From 35256f9fca6c1db6a6b1d84beaf7e1b486ae598b Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Thu, 14 May 2026 22:53:48 +0800 Subject: [PATCH 7/9] Lazy-import tinyfish SDK so tool loads without it (matches Exa pattern) --- crew-ai/pyproject.toml | 2 +- crew-ai/src/tinyfish_web_agent/tool.py | 37 +++++++++++++++++++------- 2 files changed, 29 insertions(+), 10 deletions(-) diff --git a/crew-ai/pyproject.toml b/crew-ai/pyproject.toml index ba8eb18..8b3a05c 100644 --- a/crew-ai/pyproject.toml +++ b/crew-ai/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "tinyfish-web-agent" -version = "0.1.3" +version = "0.1.4" description = "Power up your crews with TinyFish Web Agent" readme = "README.md" requires-python = ">=3.11,<3.14" diff --git a/crew-ai/src/tinyfish_web_agent/tool.py b/crew-ai/src/tinyfish_web_agent/tool.py index e5fd83b..02b2111 100644 --- a/crew-ai/src/tinyfish_web_agent/tool.py +++ b/crew-ai/src/tinyfish_web_agent/tool.py @@ -5,13 +5,26 @@ from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field, field_validator -from tinyfish import ( - BrowserProfile, - ProxyConfig, - ProxyCountryCode, - RunStatus, - TinyFish, -) + +try: + from tinyfish import ( + BrowserProfile, + ProxyConfig, + ProxyCountryCode, + RunStatus, + TinyFish, + ) + + _TINYFISH_AVAILABLE = True + _TINYFISH_IMPORT_ERROR: Optional[str] = None +except ImportError as exc: + BrowserProfile = Any # type: ignore[misc,assignment] + ProxyConfig = Any # type: ignore[misc,assignment] + ProxyCountryCode = Any # type: ignore[misc,assignment] + RunStatus = Any # type: ignore[misc,assignment] + TinyFish = Any # type: ignore[misc,assignment] + _TINYFISH_AVAILABLE = False + _TINYFISH_IMPORT_ERROR = str(exc) logger = logging.getLogger(__name__) @@ -156,7 +169,7 @@ class TinyfishBrowserSessionInput(BaseModel): def _format_status(status: Any) -> str: """Convert a RunStatus enum (or any value) to a string.""" - if isinstance(status, RunStatus): + if _TINYFISH_AVAILABLE and isinstance(status, RunStatus): return status.value return str(status) @@ -213,8 +226,14 @@ class _TinyfishBaseTool(BaseTool): # Cached per-instance to avoid re-creating on every call. _client: Optional[TinyFish] = None - def _get_client(self) -> tuple[Optional[TinyFish], Optional[str]]: + def _get_client(self) -> tuple[Optional[Any], Optional[str]]: """Return (client, None) or (None, error_string).""" + if not _TINYFISH_AVAILABLE: + return None, ( + "Error: the 'tinyfish' Python SDK is not installed in this " + "environment. Install with `pip install tinyfish`. " + f"(Import error: {_TINYFISH_IMPORT_ERROR})" + ) if self._client is not None: return self._client, None _ensure_integration_tag() From 4e30395ad494ce0e9f6337f6ead49792fa40388f Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Fri, 15 May 2026 00:23:23 +0800 Subject: [PATCH 8/9] Refactor to single-tool, requests-based TinyfishWebAgent (0.2.0) Drops the multi-tool SDK-backed design (which failed to install in Crew Studio sandboxes) in favour of one focused tool that hits the TinyFish agent endpoint via raw requests: * One BaseTool: TinyfishWebAgent (sync run on /v1/automation/run) * Drops tinyfish SDK dep; only requires crewai[tools] and requests * Lowers requires-python to >=3.10,<3.14 (matches CrewAI's broader floor) The class name matches CamelCase(package_name), so Crew Studio's auto-import resolves it correctly and exposes a single card to users. Code-first users can also import it directly. Tests mock requests.post instead of the SDK client. --- crew-ai/pyproject.toml | 8 +- crew-ai/src/tinyfish_web_agent/__init__.py | 24 +- crew-ai/src/tinyfish_web_agent/tool.py | 700 +++------------------ crew-ai/tests/test_tool.py | 331 ++++------ 4 files changed, 202 insertions(+), 861 deletions(-) diff --git a/crew-ai/pyproject.toml b/crew-ai/pyproject.toml index 8b3a05c..d72b44c 100644 --- a/crew-ai/pyproject.toml +++ b/crew-ai/pyproject.toml @@ -1,12 +1,12 @@ [project] name = "tinyfish-web-agent" -version = "0.1.4" +version = "0.2.0" description = "Power up your crews with TinyFish Web Agent" readme = "README.md" -requires-python = ">=3.11,<3.14" +requires-python = ">=3.10,<3.14" dependencies = [ "crewai[tools]>=0.203.1", - "tinyfish>=0.2.5", + "requests>=2.28", ] [build-system] @@ -20,7 +20,7 @@ where = ["src"] type = "tool" [tool.ruff] -target-version = "py311" +target-version = "py310" line-length = 88 [tool.ruff.lint] diff --git a/crew-ai/src/tinyfish_web_agent/__init__.py b/crew-ai/src/tinyfish_web_agent/__init__.py index 62eee04..11dd944 100644 --- a/crew-ai/src/tinyfish_web_agent/__init__.py +++ b/crew-ai/src/tinyfish_web_agent/__init__.py @@ -1,23 +1,3 @@ -from .tool import ( - Tinyfish, - TinyfishBrowserSession, - TinyfishFetch, - TinyfishGetRun, - TinyfishListRuns, - TinyfishRun, - TinyfishRunAsync, - TinyfishSearch, - TinyfishWebAgent, -) +from .tool import TinyfishWebAgent -__all__ = [ - "Tinyfish", - "TinyfishRun", - "TinyfishRunAsync", - "TinyfishGetRun", - "TinyfishListRuns", - "TinyfishSearch", - "TinyfishFetch", - "TinyfishBrowserSession", - "TinyfishWebAgent", -] +__all__ = ["TinyfishWebAgent"] diff --git a/crew-ai/src/tinyfish_web_agent/tool.py b/crew-ai/src/tinyfish_web_agent/tool.py index 02b2111..f0c6974 100644 --- a/crew-ai/src/tinyfish_web_agent/tool.py +++ b/crew-ai/src/tinyfish_web_agent/tool.py @@ -1,57 +1,43 @@ +"""TinyFish CrewAI tool — synchronous browser-automation agent. + +Single-tool package. Talks to the TinyFish agent endpoint via raw HTTP +so it installs cleanly on Python 3.10+ in any Crew Studio sandbox. +""" + +from __future__ import annotations + import json -import logging import os from typing import Any, List, Literal, Optional, Type +import requests from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field, field_validator -try: - from tinyfish import ( - BrowserProfile, - ProxyConfig, - ProxyCountryCode, - RunStatus, - TinyFish, - ) - - _TINYFISH_AVAILABLE = True - _TINYFISH_IMPORT_ERROR: Optional[str] = None -except ImportError as exc: - BrowserProfile = Any # type: ignore[misc,assignment] - ProxyConfig = Any # type: ignore[misc,assignment] - ProxyCountryCode = Any # type: ignore[misc,assignment] - RunStatus = Any # type: ignore[misc,assignment] - TinyFish = Any # type: ignore[misc,assignment] - _TINYFISH_AVAILABLE = False - _TINYFISH_IMPORT_ERROR = str(exc) - -logger = logging.getLogger(__name__) - +_BASE_URL = "https://agent.tinyfish.ai" +_AGENT_PATH = "/v1/automation/run" +_TIMEOUT_SECONDS = 300 # automation runs can take minutes _INTEGRATION_NAME = "crew-ai" - -# --------------------------------------------------------------------------- -# Input schemas -# --------------------------------------------------------------------------- +_VALID_PROXY_COUNTRIES = {"US", "GB", "CA", "DE", "FR", "JP", "AU"} -class TinyfishRunInput(BaseModel): - """Input schema for running a browser automation.""" +class TinyfishWebAgentInput(BaseModel): + """Input schema for a synchronous TinyFish browser automation.""" url: str = Field(..., description="Target website URL to automate.") goal: str = Field( ..., description=( - "Natural language description of what to accomplish on the website." + "Natural language description of what to accomplish on the website " + "(e.g., 'extract product names and prices', 'fill the contact form')." ), ) browser_profile: Literal["lite", "stealth"] = Field( default="lite", description=( - 'Browser execution mode. "lite" (default) is fast for ' - 'standard sites. "stealth" enables anti-detection for ' - "sites with bot protection." + 'Browser execution mode. "lite" is fast for standard sites; ' + '"stealth" enables anti-detection for sites with bot protection.' ), ) @@ -63,139 +49,7 @@ def _check_url_scheme(cls, v: str) -> str: return v -class TinyfishGetRunInput(BaseModel): - """Input schema for getting a run by ID.""" - - run_id: str = Field(..., description="The unique run ID to look up.") - - -class TinyfishListRunsInput(BaseModel): - """Input schema for listing runs.""" - - status: Optional[ - Literal[ - "PENDING", - "RUNNING", - "COMPLETED", - "FAILED", - "CANCELLED", - ] - ] = Field(default=None, description="Filter runs by status.") - goal: Optional[str] = Field( - default=None, - description=("Search runs by goal text (case-insensitive partial match)."), - ) - limit: int = Field( - default=20, - ge=1, - le=100, - description="Maximum number of runs to return (1-100).", - ) - cursor: Optional[str] = Field( - default=None, - description=( - "Pagination cursor from a previous response to get the next page." - ), - ) - created_after: Optional[str] = Field( - default=None, - description=("Only return runs created after this ISO 8601 timestamp."), - ) - created_before: Optional[str] = Field( - default=None, - description=("Only return runs created before this ISO 8601 timestamp."), - ) - sort_direction: Optional[Literal["asc", "desc"]] = Field( - default=None, - description=('Sort order by creation date: "asc" or "desc" (default: desc).'), - ) - - -class TinyfishSearchInput(BaseModel): - """Input schema for searching the web.""" - - query: str = Field(..., description="Search query.") - location: Optional[str] = Field( - default=None, - description="Optional location to scope results, such as 'United States'.", - ) - language: Optional[str] = Field( - default=None, - description="Optional language code, such as 'en'.", - ) - - -class TinyfishFetchInput(BaseModel): - """Input schema for fetching clean page content.""" - - urls: list[str] = Field( - ..., - min_length=1, - max_length=10, - description="One to ten URLs to fetch and extract clean content from.", - ) - format: Literal["markdown", "html", "json"] = Field( - default="markdown", - description="Output format for extracted content.", - ) - links: bool = Field(default=False, description="Whether to include page links.") - image_links: bool = Field( - default=False, - description="Whether to include image links.", - ) - - -class TinyfishBrowserSessionInput(BaseModel): - """Input schema for creating a remote browser session.""" - - url: Optional[str] = Field( - default=None, - description="Optional URL to open when the browser session starts.", - ) - timeout_seconds: int = Field( - default=0, - ge=0, - description=( - "Optional inactivity timeout in seconds. " - "Use 0 to let TinyFish apply the plan default." - ), - ) - - -# --------------------------------------------------------------------------- -# Shared helpers -# --------------------------------------------------------------------------- - - -def _format_status(status: Any) -> str: - """Convert a RunStatus enum (or any value) to a string.""" - if _TINYFISH_AVAILABLE and isinstance(status, RunStatus): - return status.value - return str(status) - - -def _format_error(error: Any) -> str: - """Extract a human-readable message from an error object.""" - if isinstance(error, dict): - return error.get("message", str(error)) - return str(error) - - -def _ensure_integration_tag() -> None: - """Set the default TinyFish integration tag for request attribution.""" - os.environ.setdefault("TF_API_INTEGRATION", _INTEGRATION_NAME) - - -def _dump_json(value: Any) -> str: - """Serialize SDK response models to a JSON string.""" - if hasattr(value, "model_dump_json"): - return str(value.model_dump_json()) - if hasattr(value, "model_dump"): - return json.dumps(value.model_dump()) - return json.dumps(value) - - -_TINYFISH_ENV_VARS: List[EnvVar] = [ +_ENV_VARS: List[EnvVar] = [ EnvVar( name="TINYFISH_API_KEY", description=( @@ -207,167 +61,19 @@ def _dump_json(value: Any) -> str: ] -# --------------------------------------------------------------------------- -# Base tool -# --------------------------------------------------------------------------- - - -class _TinyfishBaseTool(BaseTool): - """Shared configuration and helpers for all Tinyfish tools. - - Concrete subclasses redeclare ``api_key``, ``proxy_country``, - ``env_vars``, and ``package_dependencies`` as Pydantic Fields so the - CrewAI AMP UI surfaces them on each tool. - """ - - api_key: Optional[str] = None - proxy_country: Optional[str] = None - - # Cached per-instance to avoid re-creating on every call. - _client: Optional[TinyFish] = None - - def _get_client(self) -> tuple[Optional[Any], Optional[str]]: - """Return (client, None) or (None, error_string).""" - if not _TINYFISH_AVAILABLE: - return None, ( - "Error: the 'tinyfish' Python SDK is not installed in this " - "environment. Install with `pip install tinyfish`. " - f"(Import error: {_TINYFISH_IMPORT_ERROR})" - ) - if self._client is not None: - return self._client, None - _ensure_integration_tag() - key = self.api_key or os.environ.get("TINYFISH_API_KEY") - if not key: - return None, ( - "Error: TINYFISH_API_KEY is not set. " - "Set it as an environment variable or pass " - "api_key when creating the tool. " - "Get your key at " - "https://agent.tinyfish.ai/api-keys" - ) - self._client = TinyFish(api_key=key) - return self._client, None - - def _run_kwargs(self, browser_profile: str) -> dict[str, Any]: - """Common kwargs for SDK agent calls.""" - try: - bp = BrowserProfile(browser_profile) - except ValueError: - bp = BrowserProfile.LITE - kwargs: dict[str, Any] = {"browser_profile": bp} - if self.proxy_country: - try: - country = ProxyCountryCode(self.proxy_country) - except ValueError: - return {"_error": f"Invalid proxy country: {self.proxy_country!r}"} - kwargs["proxy_config"] = ProxyConfig(enabled=True, country_code=country) - return kwargs - - def _safe_call( - self, fn: Any, *args: Any, **kwargs: Any - ) -> tuple[Any, Optional[str]]: - """Call fn and return (result, None) or (None, error). - - Logs the traceback at DEBUG level for diagnostics. - """ - try: - return fn(*args, **kwargs), None - except Exception as exc: - logger.debug("SDK call failed", exc_info=True) - return None, f"Error: {exc}" - - -# --------------------------------------------------------------------------- -# Tools -# --------------------------------------------------------------------------- - - -class TinyfishRun(_TinyfishBaseTool): - """Run a browser automation synchronously and return the - result. - - Best for quick tasks that complete in under 30 seconds. - """ +class TinyfishWebAgent(BaseTool): + """Run a TinyFish browser automation synchronously and return the result.""" name: str = "Tinyfish Web Agent" description: str = ( - "Automate any website using natural language. " - "Provide a URL and describe what you want to " - "accomplish — extract data, fill forms, click " - "buttons, navigate pages, and more. " - "Waits for completion and returns structured " - "JSON results." - ) - args_schema: Type[BaseModel] = TinyfishRunInput - package_dependencies: List[str] = ["tinyfish"] - api_key: Optional[str] = Field( - default_factory=lambda: os.getenv("TINYFISH_API_KEY"), - description="TinyFish API key (overrides env var if provided).", - json_schema_extra={"required": False}, - ) - proxy_country: Optional[str] = Field( - default=None, - description=( - "Optional ISO country code for proxy routing " - "(US, GB, CA, DE, FR, JP, AU)." - ), - json_schema_extra={"required": False}, - ) - env_vars: List[EnvVar] = _TINYFISH_ENV_VARS - - def _run( - self, - url: str, - goal: str, - browser_profile: str = "lite", - ) -> str: - client, err = self._get_client() - if err: - return err - - kwargs = self._run_kwargs(browser_profile) - if "_error" in kwargs: - return f"Error: {kwargs['_error']}" - - result, err = self._safe_call( - client.agent.run, - url=url, - goal=goal, - **kwargs, - ) - if err: - return err - - if result.status == RunStatus.COMPLETED: - if result.result is not None: - return json.dumps(result.result) - return "Completed with no result data." - - error = getattr(result, "error", None) - if error: - return f"Automation failed: {_format_error(error)}" - return f"Automation ended with status: {_format_status(result.status)}" - - -class TinyfishRunAsync(_TinyfishBaseTool): - """Start a browser automation asynchronously and return - the run ID. - - Use TinyfishGetRun to poll for results. - Best for long-running tasks. - """ - - name: str = "Tinyfish Web Agent (Async)" - description: str = ( - "Start a browser automation asynchronously. " - "Returns a run_id immediately without waiting " - "for completion. " - "Use the 'Tinyfish Get Run' tool to check status " - "and get results." - ) - args_schema: Type[BaseModel] = TinyfishRunInput - package_dependencies: List[str] = ["tinyfish"] + "Automate any website using natural language. Provide a URL and a " + "goal describing what to accomplish — extract data, fill forms, click " + "buttons, navigate pages, and more. Waits for completion and returns " + "the structured JSON result, including status, run_id, and any " + "extracted data." + ) + args_schema: Type[BaseModel] = TinyfishWebAgentInput + package_dependencies: List[str] = ["requests"] api_key: Optional[str] = Field( default_factory=lambda: os.getenv("TINYFISH_API_KEY"), description="TinyFish API key (overrides env var if provided).", @@ -376,12 +82,11 @@ class TinyfishRunAsync(_TinyfishBaseTool): proxy_country: Optional[str] = Field( default=None, description=( - "Optional ISO country code for proxy routing " - "(US, GB, CA, DE, FR, JP, AU)." + "ISO country code for proxy routing (US, GB, CA, DE, FR, JP, AU)." ), json_schema_extra={"required": False}, ) - env_vars: List[EnvVar] = _TINYFISH_ENV_VARS + env_vars: List[EnvVar] = _ENV_VARS def _run( self, @@ -389,298 +94,55 @@ def _run( goal: str, browser_profile: str = "lite", ) -> str: - client, err = self._get_client() - if err: - return err - - kwargs = self._run_kwargs(browser_profile) - if "_error" in kwargs: - return f"Error: {kwargs['_error']}" - - result, err = self._safe_call( - client.agent.queue, - url=url, - goal=goal, - **kwargs, - ) - if err: - return err - - return f"Automation started. run_id: {result.run_id}" - - -class TinyfishGetRun(_TinyfishBaseTool): - """Get the status and result of a TinyFish automation - run.""" - - name: str = "Tinyfish Get Run" - description: str = ( - "Check the status and get results of a TinyFish " - "automation run by its run_id. Returns status " - "(PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) " - "and result data." - ) - args_schema: Type[BaseModel] = TinyfishGetRunInput - package_dependencies: List[str] = ["tinyfish"] - api_key: Optional[str] = Field( - default_factory=lambda: os.getenv("TINYFISH_API_KEY"), - description="TinyFish API key (overrides env var if provided).", - json_schema_extra={"required": False}, - ) - proxy_country: Optional[str] = Field( - default=None, - description=( - "Optional ISO country code for proxy routing " - "(US, GB, CA, DE, FR, JP, AU)." - ), - json_schema_extra={"required": False}, - ) - env_vars: List[EnvVar] = _TINYFISH_ENV_VARS - - def _run(self, run_id: str) -> str: - client, err = self._get_client() - if err: - return err - - run, err = self._safe_call(client.runs.get, run_id) - if err: - return err - - parts = [f"Status: {_format_status(run.status)}"] - streaming_url = getattr(run, "streaming_url", None) - if streaming_url: - parts.append(f"Live view: {streaming_url}") - result = getattr(run, "result", None) - if result is not None: - parts.append(f"Result: {json.dumps(result)}") - error = getattr(run, "error", None) - if error: - parts.append(f"Error: {_format_error(error)}") - - return "\n".join(parts) - - -class TinyfishListRuns(_TinyfishBaseTool): - """List recent TinyFish automation runs.""" - - name: str = "Tinyfish List Runs" - description: str = ( - "List recent TinyFish automation runs. " - "Optionally filter by status (PENDING, RUNNING, " - "COMPLETED, FAILED, CANCELLED)." - ) - args_schema: Type[BaseModel] = TinyfishListRunsInput - package_dependencies: List[str] = ["tinyfish"] - api_key: Optional[str] = Field( - default_factory=lambda: os.getenv("TINYFISH_API_KEY"), - description="TinyFish API key (overrides env var if provided).", - json_schema_extra={"required": False}, - ) - proxy_country: Optional[str] = Field( - default=None, - description=( - "Optional ISO country code for proxy routing " - "(US, GB, CA, DE, FR, JP, AU)." - ), - json_schema_extra={"required": False}, - ) - env_vars: List[EnvVar] = _TINYFISH_ENV_VARS - - def _run( - self, - status: Optional[str] = None, - goal: Optional[str] = None, - limit: int = 20, - cursor: Optional[str] = None, - created_after: Optional[str] = None, - created_before: Optional[str] = None, - sort_direction: Optional[str] = None, - ) -> str: - client, err = self._get_client() - if err: - return err - - kwargs: dict[str, Any] = {"limit": limit} - if status: - kwargs["status"] = RunStatus(status) - if goal: - kwargs["goal"] = goal - if cursor: - kwargs["cursor"] = cursor - if created_after: - kwargs["created_after"] = created_after - if created_before: - kwargs["created_before"] = created_before - if sort_direction: - kwargs["sort_direction"] = sort_direction - - response, err = self._safe_call(client.runs.list, **kwargs) - if err: - return err - - runs = getattr(response, "data", None) or getattr(response, "runs", []) - if not runs: - return "No runs found." - - lines = [] - for run in runs: - rid = getattr(run, "run_id", "?") - st = _format_status(getattr(run, "status", "?")) - url = getattr(run, "url", "?") - lines.append(f"- {rid} | {st} | {url}") - - return f"Found {len(runs)} runs:\n" + "\n".join(lines) - - -class TinyfishSearch(_TinyfishBaseTool): - """Search the web using TinyFish.""" - - name: str = "Tinyfish Search" - description: str = ( - "Search the web using TinyFish and return structured search results. " - "Use this to discover relevant URLs or current web pages before " - "fetching content or running browser automation." - ) - args_schema: Type[BaseModel] = TinyfishSearchInput - package_dependencies: List[str] = ["tinyfish"] - api_key: Optional[str] = Field( - default_factory=lambda: os.getenv("TINYFISH_API_KEY"), - description="TinyFish API key (overrides env var if provided).", - json_schema_extra={"required": False}, - ) - proxy_country: Optional[str] = Field( - default=None, - description=( - "Optional ISO country code for proxy routing " - "(US, GB, CA, DE, FR, JP, AU)." - ), - json_schema_extra={"required": False}, - ) - env_vars: List[EnvVar] = _TINYFISH_ENV_VARS - - def _run( - self, - query: str, - location: Optional[str] = None, - language: Optional[str] = None, - ) -> str: - client, err = self._get_client() - if err: - return err - - kwargs: dict[str, Any] = {} - if location: - kwargs["location"] = location - if language: - kwargs["language"] = language - - response, err = self._safe_call( - client.search.query, - query=query, - **kwargs, - ) - if err: - return err - return _dump_json(response) - - -class TinyfishFetch(_TinyfishBaseTool): - """Fetch clean content from one or more URLs using TinyFish.""" - - name: str = "Tinyfish Fetch" - description: str = ( - "Fetch clean readable content from one or more URLs using TinyFish. " - "Use this when the URL is already known and the agent needs page text, " - "metadata, links, or image links without controlling a browser." - ) - args_schema: Type[BaseModel] = TinyfishFetchInput - package_dependencies: List[str] = ["tinyfish"] - api_key: Optional[str] = Field( - default_factory=lambda: os.getenv("TINYFISH_API_KEY"), - description="TinyFish API key (overrides env var if provided).", - json_schema_extra={"required": False}, - ) - proxy_country: Optional[str] = Field( - default=None, - description=( - "Optional ISO country code for proxy routing " - "(US, GB, CA, DE, FR, JP, AU)." - ), - json_schema_extra={"required": False}, - ) - env_vars: List[EnvVar] = _TINYFISH_ENV_VARS - - def _run( - self, - urls: list[str], - format: Literal["markdown", "html", "json"] = "markdown", - links: bool = False, - image_links: bool = False, - ) -> str: - client, err = self._get_client() - if err: - return err - - response, err = self._safe_call( - client.fetch.get_contents, - urls=urls, - format=format, - links=links, - image_links=image_links, - ) - if err: - return err - return _dump_json(response) - - -class TinyfishBrowserSession(_TinyfishBaseTool): - """Create a TinyFish remote browser session.""" - - name: str = "Tinyfish Browser Session" - description: str = ( - "Create a TinyFish remote browser session and return its session ID, " - "CDP WebSocket URL, and base URL. Use this for low-level browser " - "control from external browser automation clients." - ) - args_schema: Type[BaseModel] = TinyfishBrowserSessionInput - package_dependencies: List[str] = ["tinyfish"] - api_key: Optional[str] = Field( - default_factory=lambda: os.getenv("TINYFISH_API_KEY"), - description="TinyFish API key (overrides env var if provided).", - json_schema_extra={"required": False}, - ) - proxy_country: Optional[str] = Field( - default=None, - description=( - "Optional ISO country code for proxy routing " - "(US, GB, CA, DE, FR, JP, AU)." - ), - json_schema_extra={"required": False}, - ) - env_vars: List[EnvVar] = _TINYFISH_ENV_VARS - - def _run( - self, - url: Optional[str] = None, - timeout_seconds: int = 0, - ) -> str: - client, err = self._get_client() - if err: - return err - - response, err = self._safe_call( - client.browser.sessions.create, - url=url, - timeout_seconds=timeout_seconds or None, - ) - if err: - return err - return _dump_json(response) + key = self.api_key or os.environ.get("TINYFISH_API_KEY") + if not key: + return ( + "Error: TINYFISH_API_KEY is not set. " + "Pass api_key when instantiating the tool or set the " + "TINYFISH_API_KEY environment variable. " + "Get a key at https://agent.tinyfish.ai/api-keys" + ) + body: dict[str, Any] = { + "url": url, + "goal": goal, + "browser_profile": browser_profile, + } + if self.proxy_country: + if self.proxy_country not in _VALID_PROXY_COUNTRIES: + return f"Error: Invalid proxy country: {self.proxy_country!r}" + body["proxy_config"] = { + "enabled": True, + "country_code": self.proxy_country, + } + + headers = { + "X-API-Key": key, + "Content-Type": "application/json", + "X-TF-Integration": _INTEGRATION_NAME, + } -# Backwards-compatible alias -Tinyfish = TinyfishRun + try: + response = requests.post( + _BASE_URL + _AGENT_PATH, + headers=headers, + json=body, + timeout=_TIMEOUT_SECONDS, + ) + except requests.RequestException as exc: + return f"Error: HTTP request failed — {exc}" + + if response.status_code == 401: + return "Error: Invalid or missing API key." + if response.status_code == 403: + return "Error: Insufficient credits or no active subscription." + if response.status_code == 429: + return "Error: Rate limit exceeded — retry after a moment." + if response.status_code >= 400: + snippet = (response.text or "No body")[:200] + return f"Error: HTTP {response.status_code} — {snippet}" -# Crew Studio auto-imports `CamelCase(package_name)` — alias so the -# Studio-generated crew code can import this package without errors. -TinyfishWebAgent = TinyfishRun + try: + return json.dumps(response.json()) + except ValueError: + return response.text or "Empty response from TinyFish." diff --git a/crew-ai/tests/test_tool.py b/crew-ai/tests/test_tool.py index 6479146..a5c68b3 100644 --- a/crew-ai/tests/test_tool.py +++ b/crew-ai/tests/test_tool.py @@ -1,263 +1,162 @@ -"""Tests for TinyFish CrewAI tools.""" +"""Tests for the TinyFish web-agent CrewAI tool.""" from __future__ import annotations import json -import os -from types import SimpleNamespace +from typing import Any from unittest.mock import MagicMock, patch -from tinyfish import BrowserProfile, ProxyCountryCode, RunStatus +import pytest -from tinyfish_web_agent import ( - Tinyfish, - TinyfishBrowserSession, - TinyfishFetch, - TinyfishGetRun, - TinyfishListRuns, - TinyfishRun, - TinyfishRunAsync, - TinyfishSearch, - __all__, -) +from tinyfish_web_agent import TinyfishWebAgent, __all__ +from tinyfish_web_agent.tool import TinyfishWebAgentInput -def _response(payload: dict): - return SimpleNamespace(model_dump=lambda: payload) - - -def test_public_exports() -> None: - from tinyfish_web_agent import TinyfishWebAgent - - assert sorted(__all__) == sorted( - [ - "Tinyfish", - "TinyfishRun", - "TinyfishRunAsync", - "TinyfishGetRun", - "TinyfishListRuns", - "TinyfishSearch", - "TinyfishFetch", - "TinyfishBrowserSession", - "TinyfishWebAgent", - ] - ) - assert Tinyfish is TinyfishRun - assert TinyfishWebAgent is TinyfishRun - - -def test_get_client_requires_api_key(monkeypatch) -> None: +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("TINYFISH_API_KEY", raising=False) - result = TinyfishRun()._run("https://example.com", "Extract title") - - assert result.startswith("Error: TINYFISH_API_KEY is not set") - - -def test_get_client_sets_integration_tag(monkeypatch) -> None: - monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") - monkeypatch.delenv("TF_API_INTEGRATION", raising=False) - - with patch("tinyfish_web_agent.tool.TinyFish") as tinyfish: - TinyfishRun()._get_client() - assert os.environ["TF_API_INTEGRATION"] == "crew-ai" - tinyfish.assert_called_once_with(api_key="sk-test") +def _mock_response( + status_code: int = 200, + json_data: dict | None = None, + text: str | None = None, +) -> MagicMock: + resp = MagicMock() + resp.status_code = status_code + resp.json.return_value = json_data or {} + resp.text = text if text is not None else json.dumps(json_data or {}) + if json_data is None and text is None: + resp.json.side_effect = ValueError("no body") + return resp -def test_run_kwargs_defaults_invalid_profile_to_lite() -> None: - kwargs = TinyfishRun()._run_kwargs("bad-profile") +# --------------------------------------------------------------------------- +# Public exports +# --------------------------------------------------------------------------- - assert kwargs["browser_profile"] == BrowserProfile.LITE +def test_public_exports() -> None: + assert sorted(__all__) == ["TinyfishWebAgent"] -def test_run_kwargs_adds_proxy_config() -> None: - kwargs = TinyfishRun(proxy_country="GB")._run_kwargs("stealth") - - assert kwargs["browser_profile"] == BrowserProfile.STEALTH - assert kwargs["proxy_config"].enabled is True - assert kwargs["proxy_config"].country_code == ProxyCountryCode("GB") - - -def test_run_rejects_invalid_proxy_country() -> None: - client = MagicMock() - - with patch.object(TinyfishRun, "_get_client", return_value=(client, None)): - result = TinyfishRun(proxy_country="XX")._run( - "https://example.com", - "Extract title", - ) - - assert result == "Error: Invalid proxy country: 'XX'" - client.agent.run.assert_not_called() +def test_tool_metadata() -> None: + tool = TinyfishWebAgent() + assert tool.name == "Tinyfish Web Agent" + assert tool.package_dependencies == ["requests"] + assert [e.name for e in tool.env_vars] == ["TINYFISH_API_KEY"] + assert tool.env_vars[0].required is True -def test_run_completed_result() -> None: - client = MagicMock() - client.agent.run.return_value = SimpleNamespace( - status=RunStatus.COMPLETED, - result={"title": "Example"}, - ) - with patch.object(TinyfishRun, "_get_client", return_value=(client, None)): - result = TinyfishRun()._run("https://example.com", "Extract title") +# --------------------------------------------------------------------------- +# Input validation +# --------------------------------------------------------------------------- - assert json.loads(result) == {"title": "Example"} - client.agent.run.assert_called_once() +def test_input_rejects_non_http_url() -> None: + with pytest.raises(ValueError, match="http"): + TinyfishWebAgentInput(url="ftp://example.com", goal="x") -def test_run_failed_result() -> None: - client = MagicMock() - client.agent.run.return_value = SimpleNamespace( - status=RunStatus.FAILED, - result=None, - error={"message": "Failed"}, - ) - with patch.object(TinyfishRun, "_get_client", return_value=(client, None)): - result = TinyfishRun()._run("https://example.com", "Extract title") +def test_input_accepts_https() -> None: + payload = TinyfishWebAgentInput(url="https://example.com", goal="x") + assert payload.browser_profile == "lite" - assert result == "Automation failed: Failed" +# --------------------------------------------------------------------------- +# Runtime error paths +# --------------------------------------------------------------------------- -def test_async_run() -> None: - client = MagicMock() - client.agent.queue.return_value = SimpleNamespace(run_id="run-123") - with patch.object(TinyfishRunAsync, "_get_client", return_value=(client, None)): - result = TinyfishRunAsync()._run("https://example.com", "Extract title") +def test_missing_api_key_returns_error(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("TINYFISH_API_KEY", raising=False) + result = TinyfishWebAgent()._run(url="https://example.com", goal="x") + assert result.startswith("Error: TINYFISH_API_KEY is not set") - assert result == "Automation started. run_id: run-123" +def test_invalid_proxy_country_returns_error(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + tool = TinyfishWebAgent(proxy_country="ZZ") + result = tool._run(url="https://example.com", goal="x") + assert result == "Error: Invalid proxy country: 'ZZ'" -def test_get_run_formats_details() -> None: - client = MagicMock() - client.runs.get.return_value = SimpleNamespace( - status=RunStatus.COMPLETED, - result={"ok": True}, - error=None, - streaming_url="https://stream.example.com", - ) - with patch.object(TinyfishGetRun, "_get_client", return_value=(client, None)): - result = TinyfishGetRun()._run("run-123") +def test_http_error_returns_message(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + with patch( + "tinyfish_web_agent.tool.requests.post", + return_value=_mock_response(status_code=401), + ): + result = TinyfishWebAgent()._run(url="https://example.com", goal="x") + assert "Invalid or missing API key" in result - assert "Status: COMPLETED" in result - assert "Live view: https://stream.example.com" in result - assert 'Result: {"ok": true}' in result +def test_network_exception_returns_error(monkeypatch: pytest.MonkeyPatch) -> None: + import requests as requests_lib -def test_list_runs_with_status_filter() -> None: - client = MagicMock() - client.runs.list.return_value = SimpleNamespace( - runs=[ - SimpleNamespace( - run_id="run-123", - status=RunStatus.COMPLETED, - url="https://example.com", - ) - ] - ) + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + with patch( + "tinyfish_web_agent.tool.requests.post", + side_effect=requests_lib.ConnectTimeout("timed out"), + ): + result = TinyfishWebAgent()._run(url="https://example.com", goal="x") + assert result.startswith("Error: HTTP request failed") - with patch.object(TinyfishListRuns, "_get_client", return_value=(client, None)): - result = TinyfishListRuns()._run(status="COMPLETED", limit=5) - assert "Found 1 runs" in result - client.runs.list.assert_called_once_with(limit=5, status=RunStatus.COMPLETED) +# --------------------------------------------------------------------------- +# Happy paths +# --------------------------------------------------------------------------- -def test_list_runs_uses_sdk_data_field() -> None: - client = MagicMock() - client.runs.list.return_value = SimpleNamespace( - data=[ - SimpleNamespace( - run_id="run-123", - status=RunStatus.COMPLETED, - url="https://example.com", - ) - ] - ) +def test_happy_path(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + payload = {"status": "COMPLETED", "result": {"title": "Example"}, "run_id": "abc"} - with patch.object(TinyfishListRuns, "_get_client", return_value=(client, None)): - result = TinyfishListRuns()._run(limit=5) + with patch( + "tinyfish_web_agent.tool.requests.post", + return_value=_mock_response(status_code=200, json_data=payload), + ) as mock_post: + result = TinyfishWebAgent()._run( + url="https://example.com", + goal="Get the title", + browser_profile="lite", + ) - assert "Found 1 runs" in result - assert "run-123" in result + assert json.loads(result) == payload + sent = mock_post.call_args + assert sent.kwargs["json"] == { + "url": "https://example.com", + "goal": "Get the title", + "browser_profile": "lite", + } + assert sent.kwargs["headers"]["X-API-Key"] == "sk-test" + assert sent.kwargs["headers"]["X-TF-Integration"] == "crew-ai" -def test_search_uses_sdk_resource() -> None: - client = MagicMock() - client.search.query.return_value = _response( - { - "query": "tinyfish", - "results": [{"title": "TinyFish", "url": "https://tinyfish.ai"}], - "total_results": 1, - } - ) - - with patch.object(TinyfishSearch, "_get_client", return_value=(client, None)): - result = TinyfishSearch()._run( - "tinyfish", - location="United States", - language="en", +def test_proxy_country_included_in_body(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + with patch( + "tinyfish_web_agent.tool.requests.post", + return_value=_mock_response(status_code=200, json_data={"status": "COMPLETED"}), + ) as mock_post: + TinyfishWebAgent(proxy_country="GB")._run( + url="https://example.com", + goal="x", + browser_profile="stealth", ) - assert json.loads(result)["total_results"] == 1 - client.search.query.assert_called_once_with( - query="tinyfish", - location="United States", - language="en", - ) - - -def test_fetch_uses_sdk_resource() -> None: - client = MagicMock() - client.fetch.get_contents.return_value = _response( - { - "results": [{"url": "https://example.com", "text": "Example"}], - "errors": [], - } - ) - - with patch.object(TinyfishFetch, "_get_client", return_value=(client, None)): - result = TinyfishFetch()._run( - ["https://example.com"], - format="markdown", - links=True, - image_links=False, - ) + body = mock_post.call_args.kwargs["json"] + assert body["browser_profile"] == "stealth" + assert body["proxy_config"] == {"enabled": True, "country_code": "GB"} - assert json.loads(result)["results"][0]["text"] == "Example" - client.fetch.get_contents.assert_called_once_with( - urls=["https://example.com"], - format="markdown", - links=True, - image_links=False, - ) - - -def test_create_browser_session_uses_sdk_resource() -> None: - client = MagicMock() - client.browser.sessions.create.return_value = _response( - { - "session_id": "tf-session", - "cdp_url": "wss://example.test", - "base_url": "https://example.test", - } - ) - - with patch.object( - TinyfishBrowserSession, - "_get_client", - return_value=(client, None), - ): - result = TinyfishBrowserSession()._run( - url="https://example.com", - timeout_seconds=120, - ) - assert json.loads(result)["session_id"] == "tf-session" - client.browser.sessions.create.assert_called_once_with( - url="https://example.com", - timeout_seconds=120, - ) +def test_explicit_api_key_overrides_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-env") + with patch( + "tinyfish_web_agent.tool.requests.post", + return_value=_mock_response(status_code=200, json_data={"status": "COMPLETED"}), + ) as mock_post: + TinyfishWebAgent(api_key="sk-explicit")._run( + url="https://example.com", goal="x" + ) + assert mock_post.call_args.kwargs["headers"]["X-API-Key"] == "sk-explicit" From dfad350bf65f2ec1da853595f7a023f6938fd0d5 Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Fri, 15 May 2026 00:43:40 +0800 Subject: [PATCH 9/9] Drop unused typing.Any import in crew-ai tests --- crew-ai/tests/test_tool.py | 1 - 1 file changed, 1 deletion(-) diff --git a/crew-ai/tests/test_tool.py b/crew-ai/tests/test_tool.py index a5c68b3..81ba075 100644 --- a/crew-ai/tests/test_tool.py +++ b/crew-ai/tests/test_tool.py @@ -3,7 +3,6 @@ from __future__ import annotations import json -from typing import Any from unittest.mock import MagicMock, patch import pytest