diff --git a/.github/workflows/crew-ai-ci.yml b/.github/workflows/crew-ai-ci.yml new file mode 100644 index 0000000..fdc3dd1 --- /dev/null +++ b/.github/workflows/crew-ai-ci.yml @@ -0,0 +1,41 @@ +name: CrewAI CI + +on: + pull_request: + branches: [main] + paths: + - ".github/workflows/crew-ai-ci.yml" + - "crew-ai/**" + push: + branches: [main] + paths: + - ".github/workflows/crew-ai-ci.yml" + - "crew-ai/**" + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + + defaults: + run: + working-directory: crew-ai + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - run: python -m pip install --upgrade pip + + - run: python -m pip install -e . -r requirements-dev.txt + + - run: make lint + + - run: make test + + - run: python -m build diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ddb1df3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +.claude/ +google-adk/ +langchain/ +supabase/ +zapier/ +test_tinyfish_crew/ +crew-ai/examples/ diff --git a/crew-ai/.gitignore b/crew-ai/.gitignore new file mode 100644 index 0000000..6ca54b8 --- /dev/null +++ b/crew-ai/.gitignore @@ -0,0 +1,19 @@ +# Python-generated files +__pycache__/ +*.py[oc] +build/ +dist/ +wheels/ +*.egg-info + +# Virtual environments +.venv + +# Lockfiles +uv.lock + +# Environment variables +.env + +# Claude Code +.claude/ diff --git a/crew-ai/Makefile b/crew-ai/Makefile new file mode 100644 index 0000000..77ba14b --- /dev/null +++ b/crew-ai/Makefile @@ -0,0 +1,15 @@ +.PHONY: format lint test clean + +format: + ruff format src tests + ruff check --fix src tests + +lint: + ruff check src tests + +test: + pytest tests -v + +clean: + rm -rf build dist *.egg-info .pytest_cache .ruff_cache + find . -type d -name __pycache__ -exec rm -rf {} + diff --git a/crew-ai/README.md b/crew-ai/README.md new file mode 100644 index 0000000..5eb4692 --- /dev/null +++ b/crew-ai/README.md @@ -0,0 +1,142 @@ +# Tinyfish — CrewAI Tool + +Automate any website using natural language with [TinyFish Web Agent](https://tinyfish.ai). Extract data, fill forms, click buttons, navigate pages, and more — all described in plain English. + +## Installation + +Clone the repo and install the tool locally: + +```bash +git clone https://github.com/tinyfish-io/tinyfish-web-agent-integrations.git +cd tinyfish-web-agent-integrations/crew-ai +pip install -e . +``` + +## Setup + +Get your API key at [agent.tinyfish.ai/api-keys](https://agent.tinyfish.ai/api-keys), then set it as an environment variable: + +```bash +export TINYFISH_API_KEY="your-api-key" +``` + +Or add it to your `.env` file: + +```dotenv +TINYFISH_API_KEY=your-api-key +``` + +`tinyfish-web-agent` automatically tags TinyFish SDK requests as originating +from `crew-ai`. You do not need to set `TF_API_INTEGRATION` yourself. + +## Tools + +| Tool | Description | +|------|-------------| +| `TinyfishRun` | Run a browser automation synchronously. Best for quick tasks (<30s). | +| `TinyfishRunAsync` | Start an automation asynchronously. Returns a `run_id` immediately. | +| `TinyfishGetRun` | Check status and get results of a run by its `run_id`. | +| `TinyfishListRuns` | List recent automation runs, optionally filtered by status. | +| `TinyfishSearch` | Search the web and return structured results. | +| `TinyfishFetch` | Fetch clean content from one or more URLs. | +| `TinyfishBrowserSession` | Create a remote browser session and return connection URLs. | + +`Tinyfish` is an alias for `TinyfishRun`. + +## Usage + +### Basic — synchronous run + +```python +from crewai import Agent +from tinyfish_web_agent import TinyfishRun + +agent = Agent( + role="Web Researcher", + goal="Find and extract information from websites", + tools=[TinyfishRun()], +) +``` + +### Async workflow — start and check + +```python +from tinyfish_web_agent import TinyfishRunAsync, TinyfishGetRun + +agent = Agent( + role="Data Collector", + goal="Collect data from multiple sources efficiently", + tools=[TinyfishRunAsync(), TinyfishGetRun()], +) +``` + +### Search and fetch + +```python +from tinyfish_web_agent import TinyfishSearch, TinyfishFetch + +agent = Agent( + role="Web Researcher", + goal="Search the web and read relevant pages", + tools=[TinyfishSearch(), TinyfishFetch()], +) +``` + +### All tools at once + +```python +from tinyfish_web_agent import ( + TinyfishRun, + TinyfishRunAsync, + TinyfishGetRun, + TinyfishListRuns, + TinyfishSearch, + TinyfishFetch, + TinyfishBrowserSession, +) + +agent = Agent( + role="Web Automation Specialist", + goal="Automate any web task", + tools=[ + TinyfishRun(), + TinyfishRunAsync(), + TinyfishGetRun(), + TinyfishListRuns(), + TinyfishSearch(), + TinyfishFetch(), + TinyfishBrowserSession(), + ], +) +``` + +## Configuration + +All tools accept these optional constructor parameters: + +| Parameter | Description | +|-----------|-------------| +| `api_key` | TinyFish API key. Falls back to `TINYFISH_API_KEY` env var. | +| `proxy_country` | Route through a proxy in this country (`US`, `GB`, `CA`, `DE`, `FR`, `JP`, `AU`). | + +The package also sets `TF_API_INTEGRATION=crew-ai` internally so requests are +attributed automatically. + +```python +tool = TinyfishRun(api_key="sk-...", proxy_country="US") +``` + +## Example goals + +```text +"Extract all product names, prices, and ratings from this page" +"Fill the contact form with name 'Jane Doe' and email 'jane@example.com', then submit" +"Click 'Next Page' 3 times, extracting all listings from each page" +"Log in with the provided credentials, then extract the dashboard data" +``` + +## Support + +- [TinyFish Docs](https://docs.tinyfish.ai) +- [CrewAI Docs](https://docs.crewai.com) +- [Discord](https://discord.gg/agentql) diff --git a/crew-ai/pyproject.toml b/crew-ai/pyproject.toml new file mode 100644 index 0000000..d72b44c --- /dev/null +++ b/crew-ai/pyproject.toml @@ -0,0 +1,27 @@ +[project] +name = "tinyfish-web-agent" +version = "0.2.0" +description = "Power up your crews with TinyFish Web Agent" +readme = "README.md" +requires-python = ">=3.10,<3.14" +dependencies = [ + "crewai[tools]>=0.203.1", + "requests>=2.28", +] + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.crewai] +type = "tool" + +[tool.ruff] +target-version = "py310" +line-length = 88 + +[tool.ruff.lint] +select = ["E", "F", "I", "T201", "W"] diff --git a/crew-ai/requirements-dev.txt b/crew-ai/requirements-dev.txt new file mode 100644 index 0000000..35b5b0d --- /dev/null +++ b/crew-ai/requirements-dev.txt @@ -0,0 +1,3 @@ +build>=1.0,<2.0 +pytest>=7.0,<9.0 +ruff>=0.4,<1.0 diff --git a/crew-ai/src/tinyfish_web_agent/__init__.py b/crew-ai/src/tinyfish_web_agent/__init__.py new file mode 100644 index 0000000..11dd944 --- /dev/null +++ b/crew-ai/src/tinyfish_web_agent/__init__.py @@ -0,0 +1,3 @@ +from .tool import TinyfishWebAgent + +__all__ = ["TinyfishWebAgent"] diff --git a/crew-ai/src/tinyfish_web_agent/tool.py b/crew-ai/src/tinyfish_web_agent/tool.py new file mode 100644 index 0000000..f0c6974 --- /dev/null +++ b/crew-ai/src/tinyfish_web_agent/tool.py @@ -0,0 +1,148 @@ +"""TinyFish CrewAI tool — synchronous browser-automation agent. + +Single-tool package. Talks to the TinyFish agent endpoint via raw HTTP +so it installs cleanly on Python 3.10+ in any Crew Studio sandbox. +""" + +from __future__ import annotations + +import json +import os +from typing import Any, List, Literal, Optional, Type + +import requests +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, Field, field_validator + +_BASE_URL = "https://agent.tinyfish.ai" +_AGENT_PATH = "/v1/automation/run" +_TIMEOUT_SECONDS = 300 # automation runs can take minutes +_INTEGRATION_NAME = "crew-ai" + +_VALID_PROXY_COUNTRIES = {"US", "GB", "CA", "DE", "FR", "JP", "AU"} + + +class TinyfishWebAgentInput(BaseModel): + """Input schema for a synchronous TinyFish browser automation.""" + + url: str = Field(..., description="Target website URL to automate.") + goal: str = Field( + ..., + description=( + "Natural language description of what to accomplish on the website " + "(e.g., 'extract product names and prices', 'fill the contact form')." + ), + ) + browser_profile: Literal["lite", "stealth"] = Field( + default="lite", + description=( + 'Browser execution mode. "lite" is fast for standard sites; ' + '"stealth" enables anti-detection for sites with bot protection.' + ), + ) + + @field_validator("url") + @classmethod + def _check_url_scheme(cls, v: str) -> str: + if not v.startswith(("http://", "https://")): + raise ValueError("URL must start with http:// or https://") + return v + + +_ENV_VARS: List[EnvVar] = [ + EnvVar( + name="TINYFISH_API_KEY", + description=( + "TinyFish Web Agent API key. " + "Get one at https://agent.tinyfish.ai/api-keys" + ), + required=True, + ), +] + + +class TinyfishWebAgent(BaseTool): + """Run a TinyFish browser automation synchronously and return the result.""" + + name: str = "Tinyfish Web Agent" + description: str = ( + "Automate any website using natural language. Provide a URL and a " + "goal describing what to accomplish — extract data, fill forms, click " + "buttons, navigate pages, and more. Waits for completion and returns " + "the structured JSON result, including status, run_id, and any " + "extracted data." + ) + args_schema: Type[BaseModel] = TinyfishWebAgentInput + package_dependencies: List[str] = ["requests"] + api_key: Optional[str] = Field( + default_factory=lambda: os.getenv("TINYFISH_API_KEY"), + description="TinyFish API key (overrides env var if provided).", + json_schema_extra={"required": False}, + ) + proxy_country: Optional[str] = Field( + default=None, + description=( + "ISO country code for proxy routing (US, GB, CA, DE, FR, JP, AU)." + ), + json_schema_extra={"required": False}, + ) + env_vars: List[EnvVar] = _ENV_VARS + + def _run( + self, + url: str, + goal: str, + browser_profile: str = "lite", + ) -> str: + key = self.api_key or os.environ.get("TINYFISH_API_KEY") + if not key: + return ( + "Error: TINYFISH_API_KEY is not set. " + "Pass api_key when instantiating the tool or set the " + "TINYFISH_API_KEY environment variable. " + "Get a key at https://agent.tinyfish.ai/api-keys" + ) + + body: dict[str, Any] = { + "url": url, + "goal": goal, + "browser_profile": browser_profile, + } + if self.proxy_country: + if self.proxy_country not in _VALID_PROXY_COUNTRIES: + return f"Error: Invalid proxy country: {self.proxy_country!r}" + body["proxy_config"] = { + "enabled": True, + "country_code": self.proxy_country, + } + + headers = { + "X-API-Key": key, + "Content-Type": "application/json", + "X-TF-Integration": _INTEGRATION_NAME, + } + + try: + response = requests.post( + _BASE_URL + _AGENT_PATH, + headers=headers, + json=body, + timeout=_TIMEOUT_SECONDS, + ) + except requests.RequestException as exc: + return f"Error: HTTP request failed — {exc}" + + if response.status_code == 401: + return "Error: Invalid or missing API key." + if response.status_code == 403: + return "Error: Insufficient credits or no active subscription." + if response.status_code == 429: + return "Error: Rate limit exceeded — retry after a moment." + if response.status_code >= 400: + snippet = (response.text or "No body")[:200] + return f"Error: HTTP {response.status_code} — {snippet}" + + try: + return json.dumps(response.json()) + except ValueError: + return response.text or "Empty response from TinyFish." diff --git a/crew-ai/tests/test_tool.py b/crew-ai/tests/test_tool.py new file mode 100644 index 0000000..81ba075 --- /dev/null +++ b/crew-ai/tests/test_tool.py @@ -0,0 +1,161 @@ +"""Tests for the TinyFish web-agent CrewAI tool.""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock, patch + +import pytest + +from tinyfish_web_agent import TinyfishWebAgent, __all__ +from tinyfish_web_agent.tool import TinyfishWebAgentInput + + +@pytest.fixture(autouse=True) +def _clean_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("TINYFISH_API_KEY", raising=False) + + +def _mock_response( + status_code: int = 200, + json_data: dict | None = None, + text: str | None = None, +) -> MagicMock: + resp = MagicMock() + resp.status_code = status_code + resp.json.return_value = json_data or {} + resp.text = text if text is not None else json.dumps(json_data or {}) + if json_data is None and text is None: + resp.json.side_effect = ValueError("no body") + return resp + + +# --------------------------------------------------------------------------- +# Public exports +# --------------------------------------------------------------------------- + + +def test_public_exports() -> None: + assert sorted(__all__) == ["TinyfishWebAgent"] + + +def test_tool_metadata() -> None: + tool = TinyfishWebAgent() + assert tool.name == "Tinyfish Web Agent" + assert tool.package_dependencies == ["requests"] + assert [e.name for e in tool.env_vars] == ["TINYFISH_API_KEY"] + assert tool.env_vars[0].required is True + + +# --------------------------------------------------------------------------- +# Input validation +# --------------------------------------------------------------------------- + + +def test_input_rejects_non_http_url() -> None: + with pytest.raises(ValueError, match="http"): + TinyfishWebAgentInput(url="ftp://example.com", goal="x") + + +def test_input_accepts_https() -> None: + payload = TinyfishWebAgentInput(url="https://example.com", goal="x") + assert payload.browser_profile == "lite" + + +# --------------------------------------------------------------------------- +# Runtime error paths +# --------------------------------------------------------------------------- + + +def test_missing_api_key_returns_error(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("TINYFISH_API_KEY", raising=False) + result = TinyfishWebAgent()._run(url="https://example.com", goal="x") + assert result.startswith("Error: TINYFISH_API_KEY is not set") + + +def test_invalid_proxy_country_returns_error(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + tool = TinyfishWebAgent(proxy_country="ZZ") + result = tool._run(url="https://example.com", goal="x") + assert result == "Error: Invalid proxy country: 'ZZ'" + + +def test_http_error_returns_message(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + with patch( + "tinyfish_web_agent.tool.requests.post", + return_value=_mock_response(status_code=401), + ): + result = TinyfishWebAgent()._run(url="https://example.com", goal="x") + assert "Invalid or missing API key" in result + + +def test_network_exception_returns_error(monkeypatch: pytest.MonkeyPatch) -> None: + import requests as requests_lib + + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + with patch( + "tinyfish_web_agent.tool.requests.post", + side_effect=requests_lib.ConnectTimeout("timed out"), + ): + result = TinyfishWebAgent()._run(url="https://example.com", goal="x") + assert result.startswith("Error: HTTP request failed") + + +# --------------------------------------------------------------------------- +# Happy paths +# --------------------------------------------------------------------------- + + +def test_happy_path(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + payload = {"status": "COMPLETED", "result": {"title": "Example"}, "run_id": "abc"} + + with patch( + "tinyfish_web_agent.tool.requests.post", + return_value=_mock_response(status_code=200, json_data=payload), + ) as mock_post: + result = TinyfishWebAgent()._run( + url="https://example.com", + goal="Get the title", + browser_profile="lite", + ) + + assert json.loads(result) == payload + sent = mock_post.call_args + assert sent.kwargs["json"] == { + "url": "https://example.com", + "goal": "Get the title", + "browser_profile": "lite", + } + assert sent.kwargs["headers"]["X-API-Key"] == "sk-test" + assert sent.kwargs["headers"]["X-TF-Integration"] == "crew-ai" + + +def test_proxy_country_included_in_body(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-test") + with patch( + "tinyfish_web_agent.tool.requests.post", + return_value=_mock_response(status_code=200, json_data={"status": "COMPLETED"}), + ) as mock_post: + TinyfishWebAgent(proxy_country="GB")._run( + url="https://example.com", + goal="x", + browser_profile="stealth", + ) + + body = mock_post.call_args.kwargs["json"] + assert body["browser_profile"] == "stealth" + assert body["proxy_config"] == {"enabled": True, "country_code": "GB"} + + +def test_explicit_api_key_overrides_env(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-env") + with patch( + "tinyfish_web_agent.tool.requests.post", + return_value=_mock_response(status_code=200, json_data={"status": "COMPLETED"}), + ) as mock_post: + TinyfishWebAgent(api_key="sk-explicit")._run( + url="https://example.com", goal="x" + ) + assert mock_post.call_args.kwargs["headers"]["X-API-Key"] == "sk-explicit"