From 37f6c4aacef9a5af58158981eaea1801a29ba952 Mon Sep 17 00:00:00 2001 From: ntohidi Date: Tue, 5 May 2026 08:41:38 +0200 Subject: [PATCH] fix: arun() returns CrawlResult directly instead of CrawlResultContainer (#1898) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CrawlResultContainer proxies attributes via __getattr__ which is invisible to type checkers. Since arun() always returns a single result, return CrawlResult directly so Pylance/Pyright can see .markdown, .html, etc. Deep crawl streaming (via DeepCrawlDecorator) still returns an AsyncGenerator — that path is unaffected. --- crawl4ai/async_webcrawler.py | 20 ++++++++++---------- tests/test_type_annotations.py | 28 +++++++--------------------- 2 files changed, 17 insertions(+), 31 deletions(-) diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index f81096c07..7c22d3891 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -210,7 +210,7 @@ async def arun( url: str, config: CrawlerRunConfig = None, **kwargs, - ) -> CrawlResultContainer: + ) -> CrawlResult: """ Runs the crawler for a single source: URL (web, local file, or raw HTML). @@ -237,9 +237,11 @@ async def arun( [other parameters maintained for backwards compatibility] Returns: - CrawlResultContainer: A single-result container that proxies - attribute access to the underlying CrawlResult for backwards - compatibility (e.g. result.markdown, result.html). + CrawlResult: The result of crawling and processing the URL. + + Note: + When deep_crawl_strategy is set with stream=True, the return type + is overridden by DeepCrawlDecorator to AsyncGenerator[CrawlResult, None]. """ # Auto-start if not ready if not self.ready: @@ -657,7 +659,7 @@ async def arun( if cache_context.should_write() and not bool(cached_result): await async_db_manager.acache_url(crawl_result) - return CrawlResultContainer(crawl_result) + return crawl_result else: self.logger.url_status( @@ -672,7 +674,7 @@ async def arun( # For raw: URLs, don't fall back to the raw HTML string as redirected_url is_raw_url = url.startswith("raw:") or url.startswith("raw://") cached_result.redirected_url = cached_result.redirected_url or (None if is_raw_url else url) - return CrawlResultContainer(cached_result) + return cached_result except Exception as e: error_context = get_error_context(sys.exc_info()) @@ -690,10 +692,8 @@ async def arun( tag="ERROR", ) - return CrawlResultContainer( - CrawlResult( - url=url, html="", success=False, error_message=error_message - ) + return CrawlResult( + url=url, html="", success=False, error_message=error_message ) async def aprocess_html( diff --git a/tests/test_type_annotations.py b/tests/test_type_annotations.py index 4554e322a..2f6970f4e 100644 --- a/tests/test_type_annotations.py +++ b/tests/test_type_annotations.py @@ -119,35 +119,21 @@ class TestAnnotationConsistency: def test_arun_actually_returns_what_annotation_says(self): """ - Whatever arun()'s return annotation says, verify that - CrawlResultContainer is compatible with it. + arun() should be annotated as returning CrawlResult directly. """ from crawl4ai import AsyncWebCrawler - from crawl4ai.models import CrawlResultContainer + from crawl4ai.models import CrawlResult hints = typing.get_type_hints(AsyncWebCrawler.arun) ret = hints.get("return") if ret is None: pytest.skip("No return annotation to check") - # Get all types in the annotation - origin = getattr(ret, "__origin__", None) - if origin is Union: - allowed_types = [ - getattr(arg, "__origin__", arg) for arg in ret.__args__ - ] - else: - allowed_types = [getattr(ret, "__origin__", ret)] - - # CrawlResultContainer should be in the allowed types - is_compatible = any( - t is CrawlResultContainer - or (isinstance(t, type) and issubclass(CrawlResultContainer, t)) - for t in allowed_types - ) - assert is_compatible, ( - f"arun() returns CrawlResultContainer at runtime, but annotation {ret} " - f"doesn't include it. Allowed types: {allowed_types}" + # Get the actual type (unwrap generics if needed) + actual_type = getattr(ret, "__origin__", ret) + + assert actual_type is CrawlResult, ( + f"arun() should return CrawlResult, but annotation is {ret}" ) def test_config_classes_init_params_match_attributes(self):