From 46fedc97252e60ede5dabdc090f5008acb0d1dd6 Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Mon, 11 May 2026 17:32:03 +0800 Subject: [PATCH 1/3] Add SDK-backed LangChain TinyFish tools Bumps the TinyFish Python SDK dependency to 0.2.5 and exposes SDK-backed LangChain tools for web search, content fetch, and browser session creation. Updates package metadata, README examples, unit coverage, and adds LangChain CI plus PyPI trusted-publishing workflows. --- .github/workflows/langchain-ci.yml | 39 ++ .github/workflows/langchain-publish.yml | 34 ++ langchain/.gitignore | 12 + langchain/LICENSE | 21 + langchain/Makefile | 24 + langchain/README.md | 166 +++++++ langchain/langchain_tinyfish/__init__.py | 33 ++ langchain/langchain_tinyfish/_api_wrapper.py | 458 ++++++++++++++++++ langchain/langchain_tinyfish/py.typed | 0 langchain/langchain_tinyfish/tool.py | 405 ++++++++++++++++ langchain/pyproject.toml | 60 +++ langchain/requirements-dev.txt | 7 + langchain/scripts/check_imports.py | 22 + langchain/tests/__init__.py | 0 langchain/tests/integration_tests/__init__.py | 0 .../tests/integration_tests/test_compile.py | 9 + .../integration_tests/test_integration.py | 59 +++ .../tests/integration_tests/test_standard.py | 26 + langchain/tests/unit_tests/__init__.py | 0 .../tests/unit_tests/test_api_wrapper.py | 440 +++++++++++++++++ langchain/tests/unit_tests/test_imports.py | 20 + langchain/tests/unit_tests/test_packaging.py | 16 + langchain/tests/unit_tests/test_standard.py | 39 ++ langchain/tests/unit_tests/test_tool.py | 404 +++++++++++++++ 24 files changed, 2294 insertions(+) create mode 100644 .github/workflows/langchain-ci.yml create mode 100644 .github/workflows/langchain-publish.yml create mode 100644 langchain/.gitignore create mode 100644 langchain/LICENSE create mode 100644 langchain/Makefile create mode 100644 langchain/README.md create mode 100644 langchain/langchain_tinyfish/__init__.py create mode 100644 langchain/langchain_tinyfish/_api_wrapper.py create mode 100644 langchain/langchain_tinyfish/py.typed create mode 100644 langchain/langchain_tinyfish/tool.py create mode 100644 langchain/pyproject.toml create mode 100644 langchain/requirements-dev.txt create mode 100644 langchain/scripts/check_imports.py create mode 100644 langchain/tests/__init__.py create mode 100644 langchain/tests/integration_tests/__init__.py create mode 100644 langchain/tests/integration_tests/test_compile.py create mode 100644 langchain/tests/integration_tests/test_integration.py create mode 100644 langchain/tests/integration_tests/test_standard.py create mode 100644 langchain/tests/unit_tests/__init__.py create mode 100644 langchain/tests/unit_tests/test_api_wrapper.py create mode 100644 langchain/tests/unit_tests/test_imports.py create mode 100644 langchain/tests/unit_tests/test_packaging.py create mode 100644 langchain/tests/unit_tests/test_standard.py create mode 100644 langchain/tests/unit_tests/test_tool.py diff --git a/.github/workflows/langchain-ci.yml b/.github/workflows/langchain-ci.yml new file mode 100644 index 0000000..d13304c --- /dev/null +++ b/.github/workflows/langchain-ci.yml @@ -0,0 +1,39 @@ +name: LangChain CI + +on: + pull_request: + branches: [main] + paths: + - ".github/workflows/langchain-ci.yml" + - "langchain/**" + push: + branches: [main] + paths: + - ".github/workflows/langchain-ci.yml" + - "langchain/**" + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + + defaults: + run: + working-directory: langchain + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - run: python -m pip install --upgrade pip + + - run: python -m pip install -e . -r requirements-dev.txt + + - run: make lint + + - run: make test diff --git a/.github/workflows/langchain-publish.yml b/.github/workflows/langchain-publish.yml new file mode 100644 index 0000000..eb38445 --- /dev/null +++ b/.github/workflows/langchain-publish.yml @@ -0,0 +1,34 @@ +name: LangChain Publish + +on: + push: + tags: + - "langchain-v*" + +permissions: + contents: read + id-token: write + +jobs: + publish: + if: startsWith(github.ref, 'refs/tags/langchain-v') + runs-on: ubuntu-latest + + defaults: + run: + working-directory: langchain + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-python@v6 + with: + python-version: "3.12" + + - run: python -m pip install --upgrade pip build + + - run: python -m build + + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + packages-dir: langchain/dist diff --git a/langchain/.gitignore b/langchain/.gitignore new file mode 100644 index 0000000..3ace3ce --- /dev/null +++ b/langchain/.gitignore @@ -0,0 +1,12 @@ +*.egg-info/ +*.egg +dist/ +build/ +__pycache__/ +*.pyc +*.pyo +.env +.claude/ +.mypy_cache/ +.ruff_cache/ +.pytest_cache/ diff --git a/langchain/LICENSE b/langchain/LICENSE new file mode 100644 index 0000000..cede851 --- /dev/null +++ b/langchain/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 TinyFish + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/langchain/Makefile b/langchain/Makefile new file mode 100644 index 0000000..2b7fbbc --- /dev/null +++ b/langchain/Makefile @@ -0,0 +1,24 @@ +.PHONY: all format lint test integration_test check_imports clean + +all: format lint test + +TEST_FILE ?= tests/unit_tests/ +integration_test: TEST_FILE = tests/integration_tests/ + +format: + ruff format langchain_tinyfish tests + ruff check --fix langchain_tinyfish tests + +lint: + ruff check langchain_tinyfish tests + mypy langchain_tinyfish + +test integration_test: + pytest $(TEST_FILE) -v + +check_imports: $(shell find langchain_tinyfish -name '*.py') + python scripts/check_imports.py $^ + +clean: + rm -rf build dist *.egg-info .pytest_cache .mypy_cache .ruff_cache + find . -type d -name __pycache__ -exec rm -rf {} + diff --git a/langchain/README.md b/langchain/README.md new file mode 100644 index 0000000..3d41b3b --- /dev/null +++ b/langchain/README.md @@ -0,0 +1,166 @@ +# TinyFish Web Agent for LangChain + +![Powered by TinyFish](https://img.shields.io/badge/Powered%20by-TinyFish-blue) + +This package provides a LangChain Tool to run the TinyFish Web Agent directly within your LangChain Agents and Workflows. + +TinyFish is a platform for executing complex, goal-oriented tasks on the live web. Unlike traditional scrapers or local browser automation, TinyFish uses a fleet of remote, AI-powered web agents that can navigate complex sites, handle anti-bot protection, and return clean, structured JSON data. + +## Installation + +```bash +pip install langchain-tinyfish +``` + +## Configuration + +1. Get your TinyFish API key from [agent.tinyfish.ai/api-keys](https://agent.tinyfish.ai/api-keys). +2. Set it as an environment variable: + +```bash +export TINYFISH_API_KEY="YOUR_API_KEY" +``` + +## Quick Start + +Here is a simple example of how to use the TinyFish Web Agent to extract the current stock price of NVIDIA from Yahoo Finance. + +```python +from langchain_tinyfish import TinyFishWebAutomation + +tool = TinyFishWebAutomation() + +result = tool.invoke({ + "url": "https://finance.yahoo.com/quote/NVDA/", + "goal": "Extract the current stock price of NVIDIA", +}) + +print(result) +# Output: {"stock_price": 950.02} +``` + +### Search, Fetch, and Browser Sessions + +The package also exposes SDK-backed tools for TinyFish Search, Fetch, and Browser sessions: + +```python +from langchain_tinyfish import ( + TinyFishBrowserSession, + TinyFishFetch, + TinyFishSearch, +) + +search = TinyFishSearch() +fetch = TinyFishFetch() +browser = TinyFishBrowserSession() + +search_results = search.invoke({"query": "TinyFish Web Agent docs"}) +page_content = fetch.invoke({"urls": ["https://docs.tinyfish.ai"], "format": "markdown"}) +session = browser.invoke({"url": "https://example.com"}) +``` + +### With a LangChain Agent + +```python +from langchain_openai import ChatOpenAI +from langgraph.prebuilt import create_react_agent +from langchain_tinyfish import TinyFishFetch, TinyFishSearch, TinyFishWebAutomation + +llm = ChatOpenAI(model="gpt-4o") +tools = [ + TinyFishWebAutomation(), + TinyFishSearch(), + TinyFishFetch(), +] +agent = create_react_agent(llm, tools) + +result = agent.invoke({ + "messages": [("user", "Go to scrapeme.live/shop and extract the first 5 product names and prices")] +}) + +for message in result["messages"]: + print(message.content) +``` + +### Stealth Mode + Proxy + +For sites with bot protection (Cloudflare, CAPTCHAs, etc.): + +```python +from langchain_tinyfish import TinyFishAPIWrapper, TinyFishWebAutomation + +tool = TinyFishWebAutomation( + api_wrapper=TinyFishAPIWrapper( + browser_profile="stealth", + proxy_enabled=True, + proxy_country_code="US", # Also: GB, CA, DE, FR, JP, AU + ) +) +``` + +### Async Usage + +```python +import asyncio +from langchain_tinyfish import TinyFishWebAutomation + +async def main(): + tool = TinyFishWebAutomation() + result = await tool.ainvoke({ + "url": "https://example.com", + "goal": "Extract the page title", + }) + print(result) + +asyncio.run(main()) +``` + +## Use Cases + +- **AI Agent Enablement:** Give your AI agent the ability to perform deep research on the web. +- **Workflow Automation:** Monitor a competitor's pricing page and get a Slack notification when it changes. +- **Data Extraction:** Extract job postings, product details, or contact information into a structured format. + +## Configuration Options + +All parameters are set on the `TinyFishAPIWrapper`: + +| Parameter | Default | Description | +|-----------|---------|-------------| +| `api_key` | `$TINYFISH_API_KEY` | Your TinyFish API key | +| `browser_profile` | `"lite"` | `"lite"` (fast) or `"stealth"` (anti-detection) | +| `proxy_enabled` | `False` | Enable proxy routing | +| `proxy_country_code` | `"US"` | Proxy country: US, GB, CA, DE, FR, JP, AU | +| `timeout` | `300` | Request timeout in seconds | + +```python +from langchain_tinyfish import TinyFishAPIWrapper, TinyFishWebAutomation + +wrapper = TinyFishAPIWrapper( + api_key="sk-mino-...", + browser_profile="stealth", + timeout=600, +) +tool = TinyFishWebAutomation(api_wrapper=wrapper) +``` + +## Development + +```bash +# Install package + dev dependencies +pip install -e . +pip install -r requirements-dev.txt + +# Run unit tests +make test + +# Run linter +make lint + +# Run integration tests (requires TINYFISH_API_KEY) +make integration_test +``` + +## Support + +If you have any questions or need help, please reach out to [support@tinyfish.ai](mailto:support@tinyfish.ai) or join our [Discord community](https://discord.gg/agentql). diff --git a/langchain/langchain_tinyfish/__init__.py b/langchain/langchain_tinyfish/__init__.py new file mode 100644 index 0000000..b3dd6d2 --- /dev/null +++ b/langchain/langchain_tinyfish/__init__.py @@ -0,0 +1,33 @@ +"""LangChain integration for TinyFish Web Agent.""" + +from importlib import metadata + +from langchain_tinyfish._api_wrapper import TinyFishAPIWrapper +from langchain_tinyfish.tool import ( + TinyFishBrowserSession, + TinyFishBrowserSessionInput, + TinyFishFetch, + TinyFishFetchInput, + TinyFishInput, + TinyFishSearch, + TinyFishSearchInput, + TinyFishWebAutomation, +) + +try: + __version__: str = metadata.version(__package__ or __name__) +except metadata.PackageNotFoundError: + __version__ = "" + +__all__ = [ + "TinyFishAPIWrapper", + "TinyFishBrowserSession", + "TinyFishBrowserSessionInput", + "TinyFishFetch", + "TinyFishFetchInput", + "TinyFishInput", + "TinyFishSearch", + "TinyFishSearchInput", + "TinyFishWebAutomation", + "__version__", +] diff --git a/langchain/langchain_tinyfish/_api_wrapper.py b/langchain/langchain_tinyfish/_api_wrapper.py new file mode 100644 index 0000000..1a9cbab --- /dev/null +++ b/langchain/langchain_tinyfish/_api_wrapper.py @@ -0,0 +1,458 @@ +"""Wrapper around the TinyFish Web Agent SDK.""" + +from __future__ import annotations + +import asyncio +import json +import os +import time +from typing import Any, AsyncGenerator, Dict, Generator, Literal, Optional, cast + +from langchain_core.utils import secret_from_env +from pydantic import BaseModel, ConfigDict, Field, SecretStr +from tinyfish import ( + AsyncTinyFish, + BrowserProfile, + EventType, + ProxyConfig, + ProxyCountryCode, + RunStatus, + TinyFish, +) + +_NO_RESULT = {"status": "completed", "message": "No result data"} +_INTEGRATION_NAME = "langchain" +FetchFormat = Literal["markdown", "html", "json"] + + +class TinyFishAPIWrapper(BaseModel): + """Wrapper around TinyFish Web Agent SDK. + + Provides sync and async methods to run web automations via + the TinyFish Python SDK. + + Setup: + Set the ``TINYFISH_API_KEY`` environment variable, or pass ``api_key`` + directly. + + .. code-block:: bash + + export TINYFISH_API_KEY="sk-mino-..." + + Example: + .. code-block:: python + + from langchain_tinyfish import TinyFishAPIWrapper + + wrapper = TinyFishAPIWrapper() + result = wrapper.run( + url="https://example.com", + goal="Extract the page title", + ) + """ + + api_key: SecretStr = Field( + default_factory=secret_from_env(["TINYFISH_API_KEY"]), + ) + browser_profile: str = Field( + default="lite", + description="Browser profile: 'lite' (fast) or 'stealth' (anti-detection)", + ) + proxy_enabled: bool = Field(default=False) + proxy_country_code: str = Field( + default="US", + description="Proxy country code: US, GB, CA, DE, FR, JP, AU", + ) + timeout: int = Field( + default=300, + ge=1, + description="Request timeout in seconds", + ) + poll_interval: float = Field( + default=2.0, + gt=0, + description="Seconds between polls when using async run", + ) + + model_config = ConfigDict(extra="forbid") + + # ── helpers ────────────────────────────────────────────────────── + + @staticmethod + def _ensure_integration_tag() -> None: + """Set the default TinyFish integration tag for request attribution.""" + os.environ.setdefault("TF_API_INTEGRATION", _INTEGRATION_NAME) + + def _get_browser_profile(self) -> BrowserProfile: + """Convert string browser profile to SDK enum.""" + try: + return BrowserProfile(self.browser_profile) + except ValueError: + return cast(BrowserProfile, BrowserProfile.LITE) + + def _get_proxy_config(self) -> Optional[ProxyConfig]: + """Build SDK proxy config if enabled.""" + if not self.proxy_enabled: + return None + return ProxyConfig( + enabled=True, + country_code=ProxyCountryCode(self.proxy_country_code), + ) + + def _run_kwargs(self) -> Dict[str, Any]: + """Common kwargs for SDK agent.run/stream/queue calls.""" + kwargs: Dict[str, Any] = { + "browser_profile": self._get_browser_profile(), + } + proxy = self._get_proxy_config() + if proxy is not None: + kwargs["proxy_config"] = proxy + return kwargs + + def _make_client(self) -> TinyFish: + """Create a sync TinyFish SDK client.""" + self._ensure_integration_tag() + return TinyFish(api_key=self.api_key.get_secret_value()) + + def _make_async_client(self) -> AsyncTinyFish: + """Create an async TinyFish SDK client.""" + self._ensure_integration_tag() + return AsyncTinyFish(api_key=self.api_key.get_secret_value()) + + @staticmethod + def _normalize_event(event: Any) -> Dict[str, Any]: + """Convert an SDK stream event to a normalized dict.""" + etype = event.type + d: Dict[str, Any] = { + "type": etype.value if isinstance(etype, EventType) else str(etype), + } + status = getattr(event, "status", None) + if status is not None: + d["status"] = status.value if isinstance(status, RunStatus) else str(status) + for sdk_attr, key in [ + ("streaming_url", "streamingUrl"), + ("purpose", "purpose"), + ("result_json", "resultJson"), + ("run_id", "runId"), + ]: + val = getattr(event, sdk_attr, None) + if val is not None: + d[key] = val + return d + + @staticmethod + def _handle_run_result(run: Any) -> str: + """Process an SDK run/result object and return a JSON string. + + Handles both RunResult (from agent.run) and Run (from runs.get) + which share the same status/result/error pattern. + """ + status = getattr(run, "status", None) + if status == RunStatus.COMPLETED: + result = getattr(run, "result", None) + if result is not None: + return json.dumps(result) + return json.dumps(_NO_RESULT) + elif status == RunStatus.FAILED: + error = getattr(run, "error", None) + if error is not None: + message = ( + error.get("message", str(error)) + if isinstance(error, dict) + else str(error) + ) + else: + message = "Unknown error" + raise RuntimeError(f"TinyFish automation failed: {message}") + elif status == RunStatus.CANCELLED: + raise RuntimeError("TinyFish automation was cancelled") + else: + return json.dumps({"status": str(status), "run": str(run)}) + + @staticmethod + def _dump_json(value: Any) -> str: + """Serialize SDK response models to a JSON string.""" + if hasattr(value, "model_dump_json"): + return str(value.model_dump_json()) + if hasattr(value, "model_dump"): + return json.dumps(value.model_dump()) + return json.dumps(value) + + @staticmethod + def handle_complete_event(event: Dict[str, Any]) -> str: + """Extract result from a COMPLETE SSE event dict. + + Used by the tool layer to process the final streaming event. + """ + status = event.get("status") + if status == "COMPLETED": + data = event.get("resultJson") + if data is not None: + return json.dumps(data) + return json.dumps(_NO_RESULT) + elif status == "FAILED": + raise RuntimeError(f"TinyFish automation failed: {event}") + elif status == "CANCELLED": + raise RuntimeError("TinyFish automation was cancelled") + return json.dumps(event) + + # ── sync: run ──────────────────────────────────────────────────── + + def run(self, url: str, goal: str) -> str: + """Run a web automation task synchronously. + + Args: + url: The target URL to automate. + goal: Natural language description of what to do. + + Returns: + JSON string with the automation result. + + Raises: + RuntimeError: If the automation fails. + """ + client = self._make_client() + result = client.agent.run(url=url, goal=goal, **self._run_kwargs()) + return self._handle_run_result(result) + + # ── sync: SSE streaming ────────────────────────────────────────── + + def run_sse(self, url: str, goal: str) -> Generator[Dict[str, Any], None, None]: + """Run a web automation task with SSE streaming. + + Yields each SSE event as a dict with ``type``, and type-specific + fields like ``streamingUrl``, ``purpose``, ``status``, ``resultJson``. + + Args: + url: The target URL to automate. + goal: Natural language description of what to do. + + Yields: + Dict with the parsed event data. + """ + client = self._make_client() + with client.agent.stream(url=url, goal=goal, **self._run_kwargs()) as stream: + for event in stream: + yield self._normalize_event(event) + + # ── async: run ─────────────────────────────────────────────────── + + async def arun(self, url: str, goal: str) -> str: + """Run a web automation task asynchronously using queue + poll. + + Args: + url: The target URL to automate. + goal: Natural language description of what to do. + + Returns: + JSON string with the automation result. + + Raises: + RuntimeError: If the automation fails or is cancelled. + """ + client = self._make_async_client() + queue_result = await client.agent.queue( + url=url, goal=goal, **self._run_kwargs() + ) + + deadline = time.monotonic() + self.timeout + while True: + if time.monotonic() > deadline: + raise TimeoutError( + f"Polling timed out after {self.timeout}s " + f"for run {queue_result.run_id}" + ) + await asyncio.sleep(self.poll_interval) + run_id = queue_result.run_id + if run_id is None: + raise RuntimeError("TinyFish async run did not return a run ID") + run = await client.runs.get(run_id) + status = getattr(run, "status", None) + if status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED): + return self._handle_run_result(run) + + # ── async: SSE streaming ───────────────────────────────────────── + + async def arun_sse( + self, url: str, goal: str + ) -> AsyncGenerator[Dict[str, Any], None]: + """Run a web automation task with async SSE streaming. + + Yields each event as a dict (same format as ``run_sse``). + + Args: + url: The target URL to automate. + goal: Natural language description of what to do. + + Yields: + Dict with the parsed event data. + """ + client = self._make_async_client() + async with client.agent.stream( + url=url, goal=goal, **self._run_kwargs() + ) as stream: + async for event in stream: + yield self._normalize_event(event) + + # ── run management ─────────────────────────────────────────────── + + async def get_run(self, run_id: str) -> Dict[str, Any]: + """Get details of a specific automation run. + + Args: + run_id: The run ID to look up. + + Returns: + Dict with run details including status, result, error. + """ + client = self._make_async_client() + run = await client.runs.get(run_id) + st = run.status + return { + "run_id": getattr(run, "run_id", run_id), + "status": st.value if isinstance(st, RunStatus) else str(st), + "result": getattr(run, "result", None), + "error": getattr(run, "error", None), + "streaming_url": getattr(run, "streaming_url", None), + } + + async def list_runs( + self, + *, + status: Optional[str] = None, + limit: Optional[int] = None, + ) -> Dict[str, Any]: + """List automation runs with optional filtering. + + Args: + status: Filter by status (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED). + limit: Results per page, 1-100 (default: 20). + + Returns: + Dict with run data. + """ + client = self._make_async_client() + kwargs: Dict[str, Any] = {} + if limit is not None: + kwargs["limit"] = limit + if status is not None: + kwargs["status"] = RunStatus(status) + response = await client.runs.list(**kwargs) + runs = getattr(response, "runs", []) + return { + "data": [ + { + "run_id": getattr(r, "run_id", None), + "status": ( + r.status.value + if isinstance(r.status, RunStatus) + else str(r.status) + ), + "url": getattr(r, "url", None), + "goal": getattr(r, "goal", None), + } + for r in runs + ] + } + + # -- web search -------------------------------------------------------- + + def search( + self, + query: str, + *, + location: Optional[str] = None, + language: Optional[str] = None, + ) -> str: + """Search the web using the TinyFish SDK.""" + client = self._make_client() + response = client.search.query( + query=query, + location=location, + language=language, + ) + return self._dump_json(response) + + async def asearch( + self, + query: str, + *, + location: Optional[str] = None, + language: Optional[str] = None, + ) -> str: + """Async search the web using the TinyFish SDK.""" + client = self._make_async_client() + response = await client.search.query( + query=query, + location=location, + language=language, + ) + return self._dump_json(response) + + # -- content fetch ----------------------------------------------------- + + def fetch( + self, + urls: list[str], + *, + format: FetchFormat = "markdown", + links: Optional[bool] = None, + image_links: Optional[bool] = None, + ) -> str: + """Fetch clean page content using the TinyFish SDK.""" + client = self._make_client() + response = client.fetch.get_contents( + urls=urls, + format=format, + links=links, + image_links=image_links, + ) + return self._dump_json(response) + + async def afetch( + self, + urls: list[str], + *, + format: FetchFormat = "markdown", + links: Optional[bool] = None, + image_links: Optional[bool] = None, + ) -> str: + """Async fetch clean page content using the TinyFish SDK.""" + client = self._make_async_client() + response = await client.fetch.get_contents( + urls=urls, + format=format, + links=links, + image_links=image_links, + ) + return self._dump_json(response) + + # -- browser sessions ------------------------------------------------- + + def create_browser_session( + self, + *, + url: Optional[str] = None, + timeout_seconds: Optional[int] = None, + ) -> str: + """Create a remote browser session using the TinyFish SDK.""" + client = self._make_client() + response = client.browser.sessions.create( + url=url, + timeout_seconds=timeout_seconds, + ) + return self._dump_json(response) + + async def acreate_browser_session( + self, + *, + url: Optional[str] = None, + timeout_seconds: Optional[int] = None, + ) -> str: + """Async create a remote browser session using the TinyFish SDK.""" + client = self._make_async_client() + response = await client.browser.sessions.create( + url=url, + timeout_seconds=timeout_seconds, + ) + return self._dump_json(response) diff --git a/langchain/langchain_tinyfish/py.typed b/langchain/langchain_tinyfish/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/langchain/langchain_tinyfish/tool.py b/langchain/langchain_tinyfish/tool.py new file mode 100644 index 0000000..b11951e --- /dev/null +++ b/langchain/langchain_tinyfish/tool.py @@ -0,0 +1,405 @@ +"""TinyFish Web Agent tool for LangChain.""" + +from __future__ import annotations + +import json +import logging +import os +import traceback +from typing import Any, Callable, Literal, Optional, Type, cast + +from langchain_core.callbacks import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain_core.tools import BaseTool +from pydantic import BaseModel, Field + +from langchain_tinyfish._api_wrapper import TinyFishAPIWrapper + +logger = logging.getLogger(__name__) + +# Cache the LangGraph import so we don't try/except on every invocation. +_stream_writer_fn: Optional[Callable[[], Any]] = None +_stream_writer_checked = False + + +def _get_stream_writer_fn() -> Optional[Callable[[], Any]]: + """Return the ``get_stream_writer`` callable from langgraph, or None.""" + global _stream_writer_fn, _stream_writer_checked + if not _stream_writer_checked: + _stream_writer_checked = True + try: + from langgraph.config import get_stream_writer + + _stream_writer_fn = get_stream_writer + except Exception: + _stream_writer_fn = None + return _stream_writer_fn + + +class TinyFishInput(BaseModel): + """Input schema for the TinyFish Web Agent tool.""" + + url: str = Field(description="The URL of the website to automate") + goal: str = Field( + description=( + "Natural language instructions describing what to do on the page. " + "Be specific: include field names, button labels, " + "and expected output format." + ) + ) + + +class TinyFishSearchInput(BaseModel): + """Input schema for the TinyFish web search tool.""" + + query: str = Field(description="The web search query to run") + location: Optional[str] = Field( + default=None, + description="Optional location to scope results, such as 'United States'", + ) + language: Optional[str] = Field( + default=None, + description="Optional language code for results, such as 'en'", + ) + + +class TinyFishFetchInput(BaseModel): + """Input schema for the TinyFish content fetch tool.""" + + urls: list[str] = Field( + description="One to ten URLs to fetch and extract clean content from", + ) + format: Literal["markdown", "html", "json"] = Field( + default="markdown", + description="Output format for extracted content", + ) + links: Optional[bool] = Field( + default=None, + description="Whether to include extracted page links", + ) + image_links: Optional[bool] = Field( + default=None, + description="Whether to include extracted image links", + ) + + +class TinyFishBrowserSessionInput(BaseModel): + """Input schema for creating a TinyFish remote browser session.""" + + url: Optional[str] = Field( + default=None, + description="Optional target URL to open when the browser session starts", + ) + timeout_seconds: Optional[int] = Field( + default=None, + description="Optional inactivity timeout for the browser session", + ) + + +class TinyFishWebAutomation(BaseTool): + """AI-powered web automation tool using TinyFish Web Agent. + + Automates any website using natural language instructions. Can navigate + pages, fill forms, click buttons, extract structured data, and perform + multi-step workflows. + + Setup: + Install ``langchain-tinyfish`` and set the ``TINYFISH_API_KEY`` + environment variable. + + .. code-block:: bash + + pip install langchain-tinyfish + export TINYFISH_API_KEY="sk-mino-..." + + Key init args: + api_wrapper: TinyFishAPIWrapper instance with configuration options. + + Instantiate: + .. code-block:: python + + from langchain_tinyfish import TinyFishWebAutomation + + tool = TinyFishWebAutomation() + + Invoke directly: + .. code-block:: python + + result = tool.invoke({ + "url": "https://example.com/products", + "goal": "Extract all product names and prices as JSON", + }) + + Use with an agent: + .. code-block:: python + + from langchain_openai import ChatOpenAI + from langgraph.prebuilt import create_react_agent + + llm = ChatOpenAI(model="gpt-4o") + agent = create_react_agent(llm, [TinyFishWebAutomation()]) + + With stealth mode and proxy: + .. code-block:: python + + from langchain_tinyfish import TinyFishAPIWrapper, TinyFishWebAutomation + + tool = TinyFishWebAutomation( + api_wrapper=TinyFishAPIWrapper( + browser_profile="stealth", + proxy_enabled=True, + proxy_country_code="US", + ) + ) + """ + + name: str = "tinyfish_web_automation" + description: str = ( + "An AI-powered web automation tool that controls a real browser using " + "natural language. Use this to navigate websites, extract structured data, " + "fill forms, click buttons, and perform multi-step workflows on any website. " + "Input requires a URL and a goal describing what to do on the page. " + "Returns structured JSON results extracted from the page." + ) + args_schema: Type[BaseModel] = TinyFishInput + api_wrapper: TinyFishAPIWrapper = Field(default_factory=TinyFishAPIWrapper) + + def _get_stream_writer(self) -> Optional[Callable[..., Any]]: + """Try to get LangGraph's stream writer. Returns None outside LangGraph.""" + fn = _get_stream_writer_fn() + if fn is None: + return None + try: + writer = fn() + return cast(Callable[..., Any], writer) + except Exception: + return None + + @staticmethod + def _format_error(e: Exception) -> str: + """Format an exception as a structured JSON string for agent consumption.""" + payload: dict[str, Any] = { + "error": True, + "type": type(e).__name__, + "message": str(e), + } + if os.getenv("TINYFISH_DEBUG", "").lower() in ("1", "true"): + payload["traceback"] = traceback.format_exc() + return json.dumps(payload) + + @staticmethod + def _dispatch_event( + writer: Callable[..., Any], event: dict[str, Any] + ) -> Optional[dict[str, Any]]: + """Dispatch a single SSE event to the writer. Returns the event if COMPLETE.""" + etype = event.get("type") + if etype == "STREAMING_URL": + writer({"type": "streaming_url", "url": event.get("streamingUrl", "")}) + elif etype == "PROGRESS": + writer({"type": "progress", "message": event.get("purpose", "")}) + elif etype == "COMPLETE": + return event + return None + + def _run( + self, + url: str, + goal: str, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Run the tool synchronously. + + When invoked inside a LangGraph execution context, uses the SSE + streaming endpoint and emits progress events via ``get_stream_writer()``. + Falls back to the simple blocking run when no stream writer is available. + """ + try: + writer = self._get_stream_writer() + if writer is None: + return self.api_wrapper.run(url=url, goal=goal) + + result = None + for event in self.api_wrapper.run_sse(url=url, goal=goal): + complete = self._dispatch_event(writer, event) + if complete is not None: + result = complete + + if result is None: + raise RuntimeError("SSE stream ended without a COMPLETE event") + return TinyFishAPIWrapper.handle_complete_event(result) + except Exception as e: + logger.exception("TinyFish tool error during _run") + return self._format_error(e) + + async def _arun( + self, + url: str, + goal: str, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + """Run the tool asynchronously. + + When invoked inside a LangGraph execution context, uses the async SSE + streaming endpoint and emits progress events via ``get_stream_writer()``. + Falls back to async queue+poll when no stream writer is available. + """ + try: + writer = self._get_stream_writer() + if writer is None: + return await self.api_wrapper.arun(url=url, goal=goal) + + result = None + async for event in self.api_wrapper.arun_sse(url=url, goal=goal): + complete = self._dispatch_event(writer, event) + if complete is not None: + result = complete + + if result is None: + raise RuntimeError("SSE stream ended without a COMPLETE event") + return TinyFishAPIWrapper.handle_complete_event(result) + except Exception as e: + logger.exception("TinyFish tool error during _arun") + return self._format_error(e) + + +class TinyFishSearch(BaseTool): + """LangChain tool for TinyFish web search.""" + + name: str = "tinyfish_search" + description: str = ( + "Search the web using TinyFish and return structured search results. " + "Use this when an agent needs to discover relevant URLs or current " + "web pages before fetching content or running browser automation." + ) + args_schema: Type[BaseModel] = TinyFishSearchInput + api_wrapper: TinyFishAPIWrapper = Field(default_factory=TinyFishAPIWrapper) + + def _run( + self, + query: str, + location: Optional[str] = None, + language: Optional[str] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + try: + return self.api_wrapper.search( + query=query, + location=location, + language=language, + ) + except Exception as e: + logger.exception("TinyFish search tool error during _run") + return TinyFishWebAutomation._format_error(e) + + async def _arun( + self, + query: str, + location: Optional[str] = None, + language: Optional[str] = None, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + try: + return await self.api_wrapper.asearch( + query=query, + location=location, + language=language, + ) + except Exception as e: + logger.exception("TinyFish search tool error during _arun") + return TinyFishWebAutomation._format_error(e) + + +class TinyFishFetch(BaseTool): + """LangChain tool for TinyFish content fetch.""" + + name: str = "tinyfish_fetch" + description: str = ( + "Fetch clean content from one or more URLs using TinyFish. Use this " + "when an agent already knows the URL and needs readable page content, " + "metadata, links, or image links without controlling a browser." + ) + args_schema: Type[BaseModel] = TinyFishFetchInput + api_wrapper: TinyFishAPIWrapper = Field(default_factory=TinyFishAPIWrapper) + + def _run( + self, + urls: list[str], + format: Literal["markdown", "html", "json"] = "markdown", + links: Optional[bool] = None, + image_links: Optional[bool] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + try: + return self.api_wrapper.fetch( + urls=urls, + format=format, + links=links, + image_links=image_links, + ) + except Exception as e: + logger.exception("TinyFish fetch tool error during _run") + return TinyFishWebAutomation._format_error(e) + + async def _arun( + self, + urls: list[str], + format: Literal["markdown", "html", "json"] = "markdown", + links: Optional[bool] = None, + image_links: Optional[bool] = None, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + try: + return await self.api_wrapper.afetch( + urls=urls, + format=format, + links=links, + image_links=image_links, + ) + except Exception as e: + logger.exception("TinyFish fetch tool error during _arun") + return TinyFishWebAutomation._format_error(e) + + +class TinyFishBrowserSession(BaseTool): + """LangChain tool for creating TinyFish remote browser sessions.""" + + name: str = "tinyfish_browser_session" + description: str = ( + "Create a TinyFish remote browser session and return its session ID, " + "CDP WebSocket URL, and base URL. Use this for low-level browser " + "control from external browser automation clients." + ) + args_schema: Type[BaseModel] = TinyFishBrowserSessionInput + api_wrapper: TinyFishAPIWrapper = Field(default_factory=TinyFishAPIWrapper) + + def _run( + self, + url: Optional[str] = None, + timeout_seconds: Optional[int] = None, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + try: + return self.api_wrapper.create_browser_session( + url=url, + timeout_seconds=timeout_seconds, + ) + except Exception as e: + logger.exception("TinyFish browser session tool error during _run") + return TinyFishWebAutomation._format_error(e) + + async def _arun( + self, + url: Optional[str] = None, + timeout_seconds: Optional[int] = None, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + try: + return await self.api_wrapper.acreate_browser_session( + url=url, + timeout_seconds=timeout_seconds, + ) + except Exception as e: + logger.exception("TinyFish browser session tool error during _arun") + return TinyFishWebAutomation._format_error(e) diff --git a/langchain/pyproject.toml b/langchain/pyproject.toml new file mode 100644 index 0000000..2ca3891 --- /dev/null +++ b/langchain/pyproject.toml @@ -0,0 +1,60 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "langchain-tinyfish" +version = "0.1.0" +description = "LangChain integration for TinyFish Web Agent - AI-powered web automation" +readme = "README.md" +license = { text = "MIT" } +requires-python = ">=3.10,<4.0" +authors = [{ name = "TinyFish", email = "support@tinyfish.io" }] +keywords = ["langchain", "tinyfish", "web-automation", "ai-agent", "browser-automation"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Topic :: Software Development :: Libraries", + "Topic :: Internet :: WWW/HTTP :: Browsers", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] +dependencies = [ + "langchain-core>=0.3.0,<2.0.0", + "tinyfish>=0.2.5", +] + +[project.urls] +Homepage = "https://tinyfish.ai" +Documentation = "https://docs.tinyfish.ai" +Repository = "https://github.com/tinyfish-io/tinyfish-web-agent-integrations" + +[tool.hatch.build.targets.wheel] +packages = ["langchain_tinyfish"] + +[tool.ruff] +target-version = "py310" +line-length = 88 + +[tool.ruff.lint] +select = ["E", "F", "I", "T201", "W"] + +[tool.mypy] +python_version = "3.10" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +ignore_missing_imports = true + +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "--strict-markers --strict-config --durations=5" +markers = [ + "compile: mark placeholder test used to compile integration tests without running them", +] +asyncio_mode = "auto" diff --git a/langchain/requirements-dev.txt b/langchain/requirements-dev.txt new file mode 100644 index 0000000..c67fbec --- /dev/null +++ b/langchain/requirements-dev.txt @@ -0,0 +1,7 @@ +pytest>=7.0,<9.0 +pytest-asyncio>=0.21,<1.0 +responses>=0.23,<1.0 +langchain-tests>=0.3.0,<1.0.0 +ruff>=0.4,<1.0 +mypy>=1.0,<2.0 +types-requests>=2.28,<3.0 diff --git a/langchain/scripts/check_imports.py b/langchain/scripts/check_imports.py new file mode 100644 index 0000000..350d246 --- /dev/null +++ b/langchain/scripts/check_imports.py @@ -0,0 +1,22 @@ +"""Validate that all Python files in the package can be imported.""" + +import importlib.util +import sys +import traceback + + +if __name__ == "__main__": + files = sys.argv[1:] + has_failure = False + for file in files: + try: + spec = importlib.util.spec_from_file_location("x", file) + assert spec is not None and spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + except Exception: + has_failure = True + print(file) # noqa: T201 + traceback.print_exc() + print() # noqa: T201 + sys.exit(1 if has_failure else 0) diff --git a/langchain/tests/__init__.py b/langchain/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/langchain/tests/integration_tests/__init__.py b/langchain/tests/integration_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/langchain/tests/integration_tests/test_compile.py b/langchain/tests/integration_tests/test_compile.py new file mode 100644 index 0000000..7e7e71b --- /dev/null +++ b/langchain/tests/integration_tests/test_compile.py @@ -0,0 +1,9 @@ +"""Placeholder test used to compile integration tests without running them.""" + +import pytest + + +@pytest.mark.compile +def test_placeholder() -> None: + """Used for compiling integration tests without running any real tests.""" + pass diff --git a/langchain/tests/integration_tests/test_integration.py b/langchain/tests/integration_tests/test_integration.py new file mode 100644 index 0000000..2ac78dc --- /dev/null +++ b/langchain/tests/integration_tests/test_integration.py @@ -0,0 +1,59 @@ +"""Integration tests for TinyFish LangChain tool. + +These tests make real API calls and require TINYFISH_API_KEY to be set. +They are skipped automatically if the key is not available. +""" + +from __future__ import annotations + +import json +import os + +import pytest + +from langchain_tinyfish import TinyFishAPIWrapper, TinyFishWebAutomation + +skip_no_key = pytest.mark.skipif( + not os.environ.get("TINYFISH_API_KEY"), + reason="TINYFISH_API_KEY not set", +) + + +@skip_no_key +class TestAPIWrapperIntegration: + """Integration tests for TinyFishAPIWrapper.""" + + def test_sync_extraction(self) -> None: + wrapper = TinyFishAPIWrapper() + result = wrapper.run( + url="https://scrapeme.live/shop/", + goal="Extract the first 2 product names and prices. Return as JSON.", + ) + parsed = json.loads(result) + assert parsed is not None + + @pytest.mark.asyncio + async def test_async_extraction(self) -> None: + wrapper = TinyFishAPIWrapper() + result = await wrapper.arun( + url="https://scrapeme.live/shop/", + goal="Extract the first 2 product names and prices. Return as JSON.", + ) + parsed = json.loads(result) + assert parsed is not None + + +@skip_no_key +class TestToolIntegration: + """Integration tests for the TinyFishWebAutomation tool.""" + + def test_tool_invoke(self) -> None: + tool = TinyFishWebAutomation() + result = tool.invoke( + { + "url": "https://scrapeme.live/shop/", + "goal": "Extract the first product name on the page.", + } + ) + assert isinstance(result, str) + assert len(result) > 0 diff --git a/langchain/tests/integration_tests/test_standard.py b/langchain/tests/integration_tests/test_standard.py new file mode 100644 index 0000000..ab20e8b --- /dev/null +++ b/langchain/tests/integration_tests/test_standard.py @@ -0,0 +1,26 @@ +"""Standard integration tests for TinyFish tool using langchain-tests.""" + +from __future__ import annotations + +from typing import Any + +from langchain_tests.integration_tests import ToolsIntegrationTests + +from langchain_tinyfish import TinyFishWebAutomation + + +class TestTinyFishToolIntegration(ToolsIntegrationTests): + @property + def tool_constructor(self) -> type[TinyFishWebAutomation]: + return TinyFishWebAutomation + + @property + def tool_constructor_params(self) -> dict[str, Any]: + return {} + + @property + def tool_invoke_params_example(self) -> dict[str, Any]: + return { + "url": "https://scrapeme.live/shop/", + "goal": "Extract the first product name on the page", + } diff --git a/langchain/tests/unit_tests/__init__.py b/langchain/tests/unit_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/langchain/tests/unit_tests/test_api_wrapper.py b/langchain/tests/unit_tests/test_api_wrapper.py new file mode 100644 index 0000000..9d1494c --- /dev/null +++ b/langchain/tests/unit_tests/test_api_wrapper.py @@ -0,0 +1,440 @@ +"""Unit tests for TinyFishAPIWrapper.""" + +from __future__ import annotations + +import json +import os +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pydantic import SecretStr +from tinyfish import BrowserProfile, ProxyCountryCode, RunStatus + +from langchain_tinyfish._api_wrapper import TinyFishAPIWrapper + + +def _run_result(status=RunStatus.COMPLETED, result=None, error=None): + """Create a mock run result object.""" + return SimpleNamespace(status=status, result=result, error=error) + + +def _queue_result(run_id="run-abc"): + """Create a mock queue result.""" + return SimpleNamespace(run_id=run_id) + + +class TestAPIKeyHandling: + """Tests for API key configuration.""" + + def test_explicit_api_key(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test-123")) + assert wrapper.api_key.get_secret_value() == "sk-test-123" + + def test_api_key_from_env(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-env-key") + wrapper = TinyFishAPIWrapper() + assert wrapper.api_key.get_secret_value() == "sk-env-key" + + def test_api_key_not_in_repr(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-secret-key")) + assert "sk-secret-key" not in repr(wrapper) + assert "sk-secret-key" not in str(wrapper) + + def test_missing_api_key_raises(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("TINYFISH_API_KEY", raising=False) + with pytest.raises(ValueError): + TinyFishAPIWrapper() + + +class TestIntegrationTag: + """Tests for automatic integration attribution.""" + + def test_make_client_sets_default_integration_tag( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.delenv("TF_API_INTEGRATION", raising=False) + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + + with patch("langchain_tinyfish._api_wrapper.TinyFish") as mock_tinyfish: + wrapper._make_client() + + assert os.environ["TF_API_INTEGRATION"] == "langchain" + mock_tinyfish.assert_called_once_with(api_key="sk-test") + + def test_make_client_preserves_existing_integration_tag( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv("TF_API_INTEGRATION", "custom-tag") + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + + with patch("langchain_tinyfish._api_wrapper.TinyFish"): + wrapper._make_client() + + assert os.environ["TF_API_INTEGRATION"] == "custom-tag" + + +class TestBrowserProfileAndProxy: + """Tests for SDK config helpers.""" + + def test_default_browser_profile(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + assert wrapper._get_browser_profile() == BrowserProfile.LITE + + def test_stealth_browser_profile(self) -> None: + wrapper = TinyFishAPIWrapper( + api_key=SecretStr("sk-test"), browser_profile="stealth" + ) + assert wrapper._get_browser_profile() == BrowserProfile.STEALTH + + def test_no_proxy_by_default(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + assert wrapper._get_proxy_config() is None + + def test_proxy_config(self) -> None: + wrapper = TinyFishAPIWrapper( + api_key=SecretStr("sk-test"), + proxy_enabled=True, + proxy_country_code="GB", + ) + proxy = wrapper._get_proxy_config() + assert proxy is not None + assert proxy.enabled is True + assert proxy.country_code == ProxyCountryCode("GB") + + +class TestHandleRunResult: + """Tests for _handle_run_result.""" + + def test_completed_with_result(self) -> None: + result = TinyFishAPIWrapper._handle_run_result( + _run_result(result={"title": "Example"}) + ) + assert json.loads(result) == {"title": "Example"} + + def test_completed_no_result(self) -> None: + result = TinyFishAPIWrapper._handle_run_result(_run_result(result=None)) + parsed = json.loads(result) + assert parsed["status"] == "completed" + + def test_failed_with_error_dict(self) -> None: + with pytest.raises(RuntimeError, match="Page not found"): + TinyFishAPIWrapper._handle_run_result( + _run_result( + status=RunStatus.FAILED, + error={"message": "Page not found"}, + ) + ) + + def test_failed_with_error_string(self) -> None: + with pytest.raises(RuntimeError, match="Something went wrong"): + TinyFishAPIWrapper._handle_run_result( + _run_result( + status=RunStatus.FAILED, + error="Something went wrong", + ) + ) + + def test_cancelled_raises(self) -> None: + with pytest.raises(RuntimeError, match="cancelled"): + TinyFishAPIWrapper._handle_run_result( + _run_result(status=RunStatus.CANCELLED) + ) + + +class TestRunSync: + """Tests for the synchronous run method.""" + + def test_successful_run(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = MagicMock() + mock_client.agent.run.return_value = _run_result( + result={"products": [{"name": "Widget", "price": "$9.99"}]} + ) + + with patch.object(wrapper, "_make_client", return_value=mock_client): + result = wrapper.run("https://example.com/products", "Extract products") + + parsed = json.loads(result) + assert parsed["products"][0]["name"] == "Widget" + mock_client.agent.run.assert_called_once() + + def test_failed_run(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = MagicMock() + mock_client.agent.run.return_value = _run_result( + status=RunStatus.FAILED, + error={"message": "Timeout"}, + ) + + with patch.object(wrapper, "_make_client", return_value=mock_client): + with pytest.raises(RuntimeError, match="Timeout"): + wrapper.run("https://example.com", "Do something") + + def test_stealth_with_proxy(self) -> None: + wrapper = TinyFishAPIWrapper( + api_key=SecretStr("sk-test"), + browser_profile="stealth", + proxy_enabled=True, + proxy_country_code="JP", + ) + mock_client = MagicMock() + mock_client.agent.run.return_value = _run_result(result={"data": "ok"}) + + with patch.object(wrapper, "_make_client", return_value=mock_client): + wrapper.run("https://example.com", "Extract") + + call_kwargs = mock_client.agent.run.call_args.kwargs + assert call_kwargs["browser_profile"] == BrowserProfile.STEALTH + assert call_kwargs["proxy_config"].enabled is True + assert call_kwargs["proxy_config"].country_code == ProxyCountryCode("JP") + + +class TestModelConfig: + """Tests for Pydantic model config.""" + + def test_extra_fields_forbidden(self) -> None: + with pytest.raises(Exception): + TinyFishAPIWrapper(api_key=SecretStr("sk-test"), unknown_field="value") + + def test_default_values(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + assert wrapper.browser_profile == "lite" + assert wrapper.proxy_enabled is False + assert wrapper.proxy_country_code == "US" + assert wrapper.timeout == 300 + assert wrapper.poll_interval == 2.0 + + +class TestArun: + """Tests for the async arun method (queue + polling).""" + + @pytest.mark.asyncio + async def test_arun_polls_until_complete(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test"), timeout=10) + mock_client = AsyncMock() + mock_client.agent.queue = AsyncMock(return_value=_queue_result("run-abc")) + + poll_pending = SimpleNamespace( + status=RunStatus.RUNNING, result=None, error=None + ) + poll_done = SimpleNamespace( + status=RunStatus.COMPLETED, result={"title": "Hello"}, error=None + ) + mock_client.runs.get = AsyncMock( + side_effect=[poll_pending, poll_pending, poll_done] + ) + + with patch.object(wrapper, "_make_async_client", return_value=mock_client): + with patch("asyncio.sleep", new_callable=AsyncMock): + result = await wrapper.arun("https://example.com", "Get title") + + assert json.loads(result) == {"title": "Hello"} + assert mock_client.runs.get.call_count == 3 + + @pytest.mark.asyncio + async def test_arun_failed_run(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test"), timeout=10) + mock_client = AsyncMock() + mock_client.agent.queue = AsyncMock(return_value=_queue_result("run-fail")) + mock_client.runs.get = AsyncMock( + return_value=SimpleNamespace( + status=RunStatus.FAILED, + result=None, + error={"message": "Element not found"}, + ) + ) + + with patch.object(wrapper, "_make_async_client", return_value=mock_client): + with patch("asyncio.sleep", new_callable=AsyncMock): + with pytest.raises(RuntimeError, match="Element not found"): + await wrapper.arun("https://example.com", "Click button") + + +class TestGetRun: + """Tests for get_run.""" + + @pytest.mark.asyncio + async def test_get_run(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = AsyncMock() + mock_client.runs.get = AsyncMock( + return_value=SimpleNamespace( + run_id="run-123", + status=RunStatus.COMPLETED, + result={"x": 1}, + error=None, + streaming_url=None, + ) + ) + + with patch.object(wrapper, "_make_async_client", return_value=mock_client): + result = await wrapper.get_run("run-123") + + assert result["run_id"] == "run-123" + assert result["status"] == "COMPLETED" + assert result["result"] == {"x": 1} + + +class TestListRuns: + """Tests for list_runs.""" + + @pytest.mark.asyncio + async def test_list_runs_with_filters(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = AsyncMock() + mock_run = SimpleNamespace( + run_id="r1", status=RunStatus.COMPLETED, url="https://a.com", goal="Get A" + ) + mock_client.runs.list = AsyncMock(return_value=SimpleNamespace(runs=[mock_run])) + + with patch.object(wrapper, "_make_async_client", return_value=mock_client): + result = await wrapper.list_runs(status="COMPLETED", limit=10) + + assert len(result["data"]) == 1 + assert result["data"][0]["run_id"] == "r1" + mock_client.runs.list.assert_called_once_with( + limit=10, status=RunStatus("COMPLETED") + ) + + +class TestSearchFetchAndBrowser: + """Tests for SDK-backed search, fetch, and browser helpers.""" + + def test_search_uses_sdk_resource(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = MagicMock() + mock_client.search.query.return_value = SimpleNamespace( + model_dump=lambda: { + "query": "tinyfish", + "results": [{"title": "TinyFish", "url": "https://tinyfish.ai"}], + "total_results": 1, + } + ) + + with patch.object(wrapper, "_make_client", return_value=mock_client): + result = wrapper.search("tinyfish", location="United States", language="en") + + assert json.loads(result)["total_results"] == 1 + mock_client.search.query.assert_called_once_with( + query="tinyfish", + location="United States", + language="en", + ) + + @pytest.mark.asyncio + async def test_asearch_uses_sdk_resource(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = AsyncMock() + mock_client.search.query = AsyncMock( + return_value=SimpleNamespace( + model_dump=lambda: { + "query": "tinyfish", + "results": [], + "total_results": 0, + } + ) + ) + + with patch.object(wrapper, "_make_async_client", return_value=mock_client): + result = await wrapper.asearch("tinyfish") + + assert json.loads(result)["query"] == "tinyfish" + mock_client.search.query.assert_called_once_with( + query="tinyfish", + location=None, + language=None, + ) + + def test_fetch_uses_sdk_resource(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = MagicMock() + mock_client.fetch.get_contents.return_value = SimpleNamespace( + model_dump=lambda: { + "results": [{"url": "https://example.com", "text": "Example"}], + "errors": [], + } + ) + + with patch.object(wrapper, "_make_client", return_value=mock_client): + result = wrapper.fetch( + ["https://example.com"], + format="markdown", + links=True, + image_links=False, + ) + + assert json.loads(result)["results"][0]["text"] == "Example" + mock_client.fetch.get_contents.assert_called_once_with( + urls=["https://example.com"], + format="markdown", + links=True, + image_links=False, + ) + + @pytest.mark.asyncio + async def test_afetch_uses_sdk_resource(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = AsyncMock() + mock_client.fetch.get_contents = AsyncMock( + return_value=SimpleNamespace( + model_dump=lambda: {"results": [], "errors": []} + ) + ) + + with patch.object(wrapper, "_make_async_client", return_value=mock_client): + result = await wrapper.afetch(["https://example.com"]) + + assert json.loads(result) == {"results": [], "errors": []} + mock_client.fetch.get_contents.assert_called_once_with( + urls=["https://example.com"], + format="markdown", + links=None, + image_links=None, + ) + + def test_create_browser_session_uses_sdk_resource(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = MagicMock() + mock_client.browser.sessions.create.return_value = SimpleNamespace( + model_dump=lambda: { + "session_id": "tf-session", + "cdp_url": "wss://example.test", + "base_url": "https://example.test", + } + ) + + with patch.object(wrapper, "_make_client", return_value=mock_client): + result = wrapper.create_browser_session( + url="https://example.com", + timeout_seconds=120, + ) + + assert json.loads(result)["session_id"] == "tf-session" + mock_client.browser.sessions.create.assert_called_once_with( + url="https://example.com", + timeout_seconds=120, + ) + + @pytest.mark.asyncio + async def test_acreate_browser_session_uses_sdk_resource(self) -> None: + wrapper = TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + mock_client = AsyncMock() + mock_client.browser.sessions.create = AsyncMock( + return_value=SimpleNamespace( + model_dump=lambda: { + "session_id": "tf-session", + "cdp_url": "wss://example.test", + "base_url": "https://example.test", + } + ) + ) + + with patch.object(wrapper, "_make_async_client", return_value=mock_client): + result = await wrapper.acreate_browser_session() + + assert json.loads(result)["base_url"] == "https://example.test" + mock_client.browser.sessions.create.assert_called_once_with( + url=None, + timeout_seconds=None, + ) diff --git a/langchain/tests/unit_tests/test_imports.py b/langchain/tests/unit_tests/test_imports.py new file mode 100644 index 0000000..a76352e --- /dev/null +++ b/langchain/tests/unit_tests/test_imports.py @@ -0,0 +1,20 @@ +"""Test that all expected symbols are exported from the package.""" + +from langchain_tinyfish import __all__ + +EXPECTED_ALL = [ + "TinyFishAPIWrapper", + "TinyFishBrowserSession", + "TinyFishBrowserSessionInput", + "TinyFishFetch", + "TinyFishFetchInput", + "TinyFishInput", + "TinyFishSearch", + "TinyFishSearchInput", + "TinyFishWebAutomation", + "__version__", +] + + +def test_all_imports() -> None: + assert sorted(EXPECTED_ALL) == sorted(__all__) diff --git a/langchain/tests/unit_tests/test_packaging.py b/langchain/tests/unit_tests/test_packaging.py new file mode 100644 index 0000000..8361f3b --- /dev/null +++ b/langchain/tests/unit_tests/test_packaging.py @@ -0,0 +1,16 @@ +"""Packaging metadata tests.""" + +from __future__ import annotations + +from pathlib import Path + +import tomllib + + +def test_tinyfish_dependency_floor() -> None: + pyproject_path = Path(__file__).resolve().parents[2] / "pyproject.toml" + pyproject = tomllib.loads(pyproject_path.read_text()) + + dependencies = pyproject["project"]["dependencies"] + + assert "tinyfish>=0.2.5" in dependencies diff --git a/langchain/tests/unit_tests/test_standard.py b/langchain/tests/unit_tests/test_standard.py new file mode 100644 index 0000000..97882fb --- /dev/null +++ b/langchain/tests/unit_tests/test_standard.py @@ -0,0 +1,39 @@ +"""Standard unit tests for TinyFish tool using langchain-tests.""" + +from __future__ import annotations + +from typing import Any + +from langchain_tests.unit_tests import ToolsUnitTests +from pydantic import SecretStr + +from langchain_tinyfish import TinyFishAPIWrapper, TinyFishWebAutomation + + +class TestTinyFishToolStandard(ToolsUnitTests): + @property + def tool_constructor(self) -> type[TinyFishWebAutomation]: + return TinyFishWebAutomation + + @property + def tool_constructor_params(self) -> dict[str, Any]: + return { + "api_wrapper": TinyFishAPIWrapper(api_key=SecretStr("sk-fake-key")), + } + + @property + def tool_invoke_params_example(self) -> dict[str, Any]: + return { + "url": "https://example.com", + "goal": "Extract the page title", + } + + @property + def init_from_env_params( + self, + ) -> tuple[dict[str, str], dict[str, Any], dict[str, Any]]: + return ( + {"TINYFISH_API_KEY": "sk-test-from-env"}, + {}, + {}, + ) diff --git a/langchain/tests/unit_tests/test_tool.py b/langchain/tests/unit_tests/test_tool.py new file mode 100644 index 0000000..fad8e26 --- /dev/null +++ b/langchain/tests/unit_tests/test_tool.py @@ -0,0 +1,404 @@ +"""Unit tests for TinyFishWebAutomation tool.""" + +from __future__ import annotations + +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from pydantic import SecretStr + +from langchain_tinyfish import ( + TinyFishAPIWrapper, + TinyFishBrowserSession, + TinyFishBrowserSessionInput, + TinyFishFetch, + TinyFishFetchInput, + TinyFishInput, + TinyFishSearch, + TinyFishSearchInput, + TinyFishWebAutomation, +) + + +def _make_tool(**wrapper_kwargs): + """Create a tool with an explicit API key wrapper.""" + wrapper_kwargs.setdefault("api_key", SecretStr("sk-test")) + return TinyFishWebAutomation(api_wrapper=TinyFishAPIWrapper(**wrapper_kwargs)) + + +class TestToolMetadata: + """Tests for tool class attributes.""" + + def test_name(self) -> None: + assert _make_tool().name == "tinyfish_web_automation" + + def test_description(self) -> None: + desc = _make_tool().description.lower() + assert "web automation" in desc + assert "natural language" in desc + + def test_args_schema(self) -> None: + assert _make_tool().args_schema is TinyFishInput + + def test_args_schema_fields(self) -> None: + schema = TinyFishInput.model_json_schema() + assert "url" in schema["properties"] + assert "goal" in schema["properties"] + assert set(schema["required"]) == {"url", "goal"} + + +class TestToolRunNoWriter: + """Tests for _run/_arun when no stream writer is available (direct invoke).""" + + def test_run_falls_back_to_sync(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.run.return_value = '{"title": "Example"}' + + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=None): + result = tool._run(url="https://example.com", goal="Extract title") + + mock_wrapper.run.assert_called_once_with( + url="https://example.com", goal="Extract title" + ) + assert result == '{"title": "Example"}' + + def test_run_catches_exceptions(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.run.side_effect = RuntimeError("API error") + + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=None): + result = tool._run(url="https://example.com", goal="Do something") + + parsed = json.loads(result) + assert parsed["error"] is True + assert parsed["type"] == "RuntimeError" + assert parsed["message"] == "API error" + assert "traceback" not in parsed + + @pytest.mark.asyncio + async def test_arun_falls_back_to_async(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.arun = AsyncMock(return_value='{"data": "async result"}') + + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=None): + result = await tool._arun(url="https://example.com", goal="Extract data") + + mock_wrapper.arun.assert_called_once_with( + url="https://example.com", goal="Extract data" + ) + assert result == '{"data": "async result"}' + + @pytest.mark.asyncio + async def test_arun_catches_exceptions(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.arun = AsyncMock(side_effect=RuntimeError("async error")) + + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=None): + result = await tool._arun(url="https://example.com", goal="Do something") + + parsed = json.loads(result) + assert parsed["error"] is True + assert parsed["type"] == "RuntimeError" + assert parsed["message"] == "async error" + + +def _make_sse_events( + run_id="run-1", + streaming_url="https://stream.example.com/run-1", + progress_messages=None, + result_json=None, +): + """Build a standard SSE event sequence for testing.""" + events = [ + {"type": "STARTED", "runId": run_id}, + {"type": "STREAMING_URL", "runId": run_id, "streamingUrl": streaming_url}, + ] + for msg in progress_messages or ["Working"]: + events.append({"type": "PROGRESS", "runId": run_id, "purpose": msg}) + events.append( + { + "type": "COMPLETE", + "runId": run_id, + "status": "COMPLETED", + "resultJson": result_json or {"ok": True}, + } + ) + return events + + +class TestToolRunWithWriter: + """Tests for _run/_arun when a stream writer IS available (LangGraph).""" + + def test_run_uses_sse_and_emits_events(self) -> None: + sse_events = _make_sse_events( + progress_messages=["Visiting the page", "Extracting data"], + result_json={"items": [1, 2]}, + ) + + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.run_sse.return_value = iter(sse_events) + + writer = MagicMock() + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=writer): + result = tool._run(url="https://example.com", goal="Get items") + + mock_wrapper.run_sse.assert_called_once_with( + url="https://example.com", goal="Get items" + ) + mock_wrapper.run.assert_not_called() + + assert writer.call_count == 3 # streaming_url + 2 progress + calls = [c.args[0] for c in writer.call_args_list] + assert calls[0] == { + "type": "streaming_url", + "url": "https://stream.example.com/run-1", + } + assert calls[1] == {"type": "progress", "message": "Visiting the page"} + assert calls[2] == {"type": "progress", "message": "Extracting data"} + + assert json.loads(result) == {"items": [1, 2]} + + def test_run_sse_error_is_caught(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.run_sse.side_effect = ConnectionError("Network error") + + writer = MagicMock() + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=writer): + result = tool._run(url="https://example.com", goal="Test") + + parsed = json.loads(result) + assert parsed["error"] is True + assert parsed["type"] == "ConnectionError" + assert parsed["message"] == "Network error" + + def test_run_sse_no_complete_event(self) -> None: + """SSE stream that ends without a COMPLETE event returns an error.""" + sse_events = [ + {"type": "STARTED", "runId": "run-x"}, + {"type": "PROGRESS", "runId": "run-x", "purpose": "Loading"}, + ] + + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.run_sse.return_value = iter(sse_events) + + writer = MagicMock() + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=writer): + result = tool._run(url="https://example.com", goal="Test") + + parsed = json.loads(result) + assert parsed["error"] is True + assert parsed["type"] == "RuntimeError" + assert "COMPLETE" in parsed["message"] + + @pytest.mark.asyncio + async def test_arun_uses_sse_and_emits_events(self) -> None: + sse_events = _make_sse_events( + run_id="run-2", + streaming_url="https://stream.example.com/run-2", + progress_messages=["Filling form"], + result_json={"ok": True}, + ) + + async def mock_arun_sse(url: str, goal: str): + for event in sse_events: + yield event + + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.arun_sse = mock_arun_sse + + writer = MagicMock() + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=writer): + result = await tool._arun(url="https://example.com", goal="Fill form") + + mock_wrapper.arun.assert_not_called() + + assert writer.call_count == 2 # streaming_url + 1 progress + calls = [c.args[0] for c in writer.call_args_list] + assert calls[0] == { + "type": "streaming_url", + "url": "https://stream.example.com/run-2", + } + assert calls[1] == {"type": "progress", "message": "Filling form"} + + assert json.loads(result) == {"ok": True} + + +class TestToolInvoke: + """Tests for the public invoke interface.""" + + def test_invoke_with_dict(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.run.return_value = '{"products": []}' + + tool = TinyFishWebAutomation(api_wrapper=mock_wrapper) + with patch.object(tool, "_get_stream_writer", return_value=None): + result = tool.invoke( + {"url": "https://example.com/shop", "goal": "Get products"} + ) + + mock_wrapper.run.assert_called_once_with( + url="https://example.com/shop", goal="Get products" + ) + assert result == '{"products": []}' + + +class TestToolInstantiation: + """Tests for different ways to create the tool.""" + + def test_with_explicit_wrapper(self) -> None: + tool = _make_tool(browser_profile="stealth", proxy_enabled=True) + assert tool.api_wrapper.browser_profile == "stealth" + assert tool.api_wrapper.proxy_enabled is True + + +class TestAdditionalTools: + """Tests for SDK-backed search, fetch, and browser-session tools.""" + + def test_search_tool_metadata(self) -> None: + tool = TinyFishSearch( + api_wrapper=TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + ) + + assert tool.name == "tinyfish_search" + assert tool.args_schema is TinyFishSearchInput + + def test_search_tool_run(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.search.return_value = '{"results": []}' + tool = TinyFishSearch(api_wrapper=mock_wrapper) + + result = tool._run(query="tinyfish", location="United States", language="en") + + assert result == '{"results": []}' + mock_wrapper.search.assert_called_once_with( + query="tinyfish", + location="United States", + language="en", + ) + + @pytest.mark.asyncio + async def test_search_tool_arun(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.asearch = AsyncMock(return_value='{"results": []}') + tool = TinyFishSearch(api_wrapper=mock_wrapper) + + result = await tool._arun(query="tinyfish") + + assert result == '{"results": []}' + mock_wrapper.asearch.assert_called_once_with( + query="tinyfish", + location=None, + language=None, + ) + + def test_fetch_tool_metadata(self) -> None: + tool = TinyFishFetch( + api_wrapper=TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + ) + + assert tool.name == "tinyfish_fetch" + assert tool.args_schema is TinyFishFetchInput + + def test_fetch_tool_run(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.fetch.return_value = '{"results": []}' + tool = TinyFishFetch(api_wrapper=mock_wrapper) + + result = tool._run( + urls=["https://example.com"], + format="markdown", + links=True, + image_links=False, + ) + + assert result == '{"results": []}' + mock_wrapper.fetch.assert_called_once_with( + urls=["https://example.com"], + format="markdown", + links=True, + image_links=False, + ) + + @pytest.mark.asyncio + async def test_fetch_tool_arun(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.afetch = AsyncMock(return_value='{"results": []}') + tool = TinyFishFetch(api_wrapper=mock_wrapper) + + result = await tool._arun(urls=["https://example.com"]) + + assert result == '{"results": []}' + mock_wrapper.afetch.assert_called_once_with( + urls=["https://example.com"], + format="markdown", + links=None, + image_links=None, + ) + + def test_browser_session_tool_metadata(self) -> None: + tool = TinyFishBrowserSession( + api_wrapper=TinyFishAPIWrapper(api_key=SecretStr("sk-test")) + ) + + assert tool.name == "tinyfish_browser_session" + assert tool.args_schema is TinyFishBrowserSessionInput + + def test_browser_session_tool_run(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.create_browser_session.return_value = '{"session_id": "tf-1"}' + tool = TinyFishBrowserSession(api_wrapper=mock_wrapper) + + result = tool._run(url="https://example.com", timeout_seconds=120) + + assert result == '{"session_id": "tf-1"}' + mock_wrapper.create_browser_session.assert_called_once_with( + url="https://example.com", + timeout_seconds=120, + ) + + @pytest.mark.asyncio + async def test_browser_session_tool_arun(self) -> None: + mock_wrapper = MagicMock(spec=TinyFishAPIWrapper) + mock_wrapper.acreate_browser_session = AsyncMock( + return_value='{"session_id": "tf-1"}' + ) + tool = TinyFishBrowserSession(api_wrapper=mock_wrapper) + + result = await tool._arun() + + assert result == '{"session_id": "tf-1"}' + mock_wrapper.acreate_browser_session.assert_called_once_with( + url=None, + timeout_seconds=None, + ) + + def test_with_env_key(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("TINYFISH_API_KEY", "sk-env-test") + tool = TinyFishWebAutomation() + assert tool.api_wrapper.api_key.get_secret_value() == "sk-env-test" + + +class TestGetStreamWriter: + """Tests for _get_stream_writer helper.""" + + def test_returns_none_without_langgraph(self) -> None: + tool = _make_tool() + with ( + patch("langchain_tinyfish.tool._stream_writer_checked", False), + patch("langchain_tinyfish.tool._stream_writer_fn", None), + patch.dict("sys.modules", {"langgraph": None, "langgraph.config": None}), + ): + assert tool._get_stream_writer() is None + + def test_returns_none_when_not_in_context(self) -> None: + tool = _make_tool() + assert tool._get_stream_writer() is None From b33beccab1b63a2fc4590db49476bb0afeca2680 Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Mon, 11 May 2026 22:49:43 +0800 Subject: [PATCH 2/3] Address LangChain CodeRabbit review findings Stop SSE consumption after terminal events, validate fetch URL counts, skip integration tests without credentials, and add CI build coverage. --- .github/workflows/langchain-ci.yml | 2 ++ langchain/langchain_tinyfish/tool.py | 6 ++++++ langchain/tests/integration_tests/test_standard.py | 8 ++++++++ langchain/tests/unit_tests/test_packaging.py | 7 ++++++- 4 files changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/langchain-ci.yml b/.github/workflows/langchain-ci.yml index d13304c..05041c2 100644 --- a/.github/workflows/langchain-ci.yml +++ b/.github/workflows/langchain-ci.yml @@ -37,3 +37,5 @@ jobs: - run: make lint - run: make test + + - run: python -m build diff --git a/langchain/langchain_tinyfish/tool.py b/langchain/langchain_tinyfish/tool.py index b11951e..da7c462 100644 --- a/langchain/langchain_tinyfish/tool.py +++ b/langchain/langchain_tinyfish/tool.py @@ -225,6 +225,7 @@ def _run( complete = self._dispatch_event(writer, event) if complete is not None: result = complete + break if result is None: raise RuntimeError("SSE stream ended without a COMPLETE event") @@ -255,6 +256,7 @@ async def _arun( complete = self._dispatch_event(writer, event) if complete is not None: result = complete + break if result is None: raise RuntimeError("SSE stream ended without a COMPLETE event") @@ -332,6 +334,8 @@ def _run( run_manager: Optional[CallbackManagerForToolRun] = None, ) -> str: try: + if not 1 <= len(urls) <= 10: + raise ValueError("urls must contain between 1 and 10 items") return self.api_wrapper.fetch( urls=urls, format=format, @@ -351,6 +355,8 @@ async def _arun( run_manager: Optional[AsyncCallbackManagerForToolRun] = None, ) -> str: try: + if not 1 <= len(urls) <= 10: + raise ValueError("urls must contain between 1 and 10 items") return await self.api_wrapper.afetch( urls=urls, format=format, diff --git a/langchain/tests/integration_tests/test_standard.py b/langchain/tests/integration_tests/test_standard.py index ab20e8b..6debcd0 100644 --- a/langchain/tests/integration_tests/test_standard.py +++ b/langchain/tests/integration_tests/test_standard.py @@ -2,13 +2,21 @@ from __future__ import annotations +import os from typing import Any +import pytest from langchain_tests.integration_tests import ToolsIntegrationTests from langchain_tinyfish import TinyFishWebAutomation +skip_no_key = pytest.mark.skipif( + not os.environ.get("TINYFISH_API_KEY"), + reason="TINYFISH_API_KEY not set", +) + +@skip_no_key class TestTinyFishToolIntegration(ToolsIntegrationTests): @property def tool_constructor(self) -> type[TinyFishWebAutomation]: diff --git a/langchain/tests/unit_tests/test_packaging.py b/langchain/tests/unit_tests/test_packaging.py index 8361f3b..71deeca 100644 --- a/langchain/tests/unit_tests/test_packaging.py +++ b/langchain/tests/unit_tests/test_packaging.py @@ -13,4 +13,9 @@ def test_tinyfish_dependency_floor() -> None: dependencies = pyproject["project"]["dependencies"] - assert "tinyfish>=0.2.5" in dependencies + tinyfish_dep = next( + (dep for dep in dependencies if dep.startswith("tinyfish")), + None, + ) + assert tinyfish_dep is not None + assert ">=0.2.5" in tinyfish_dep From ffd253a988174a87b31e909f029daa0ca7d68c10 Mon Sep 17 00:00:00 2001 From: Pranav Janakiraman Date: Mon, 11 May 2026 22:55:36 +0800 Subject: [PATCH 3/3] Install build in LangChain CI Add the build package to LangChain dev requirements so the CI packaging smoke test can run python -m build. --- langchain/requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/langchain/requirements-dev.txt b/langchain/requirements-dev.txt index c67fbec..dcb900a 100644 --- a/langchain/requirements-dev.txt +++ b/langchain/requirements-dev.txt @@ -1,3 +1,4 @@ +build>=1.0,<2.0 pytest>=7.0,<9.0 pytest-asyncio>=0.21,<1.0 responses>=0.23,<1.0