Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions crawl4ai/async_webcrawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ async def arun(
url: str,
config: CrawlerRunConfig = None,
**kwargs,
) -> CrawlResultContainer:
) -> CrawlResult:
"""
Runs the crawler for a single source: URL (web, local file, or raw HTML).

Expand All @@ -237,9 +237,11 @@ async def arun(
[other parameters maintained for backwards compatibility]

Returns:
CrawlResultContainer: A single-result container that proxies
attribute access to the underlying CrawlResult for backwards
compatibility (e.g. result.markdown, result.html).
CrawlResult: The result of crawling and processing the URL.

Note:
When deep_crawl_strategy is set with stream=True, the return type
is overridden by DeepCrawlDecorator to AsyncGenerator[CrawlResult, None].
"""
# Auto-start if not ready
if not self.ready:
Expand Down Expand Up @@ -657,7 +659,7 @@ async def arun(
if cache_context.should_write() and not bool(cached_result):
await async_db_manager.acache_url(crawl_result)

return CrawlResultContainer(crawl_result)
return crawl_result

else:
self.logger.url_status(
Expand All @@ -672,7 +674,7 @@ async def arun(
# For raw: URLs, don't fall back to the raw HTML string as redirected_url
is_raw_url = url.startswith("raw:") or url.startswith("raw://")
cached_result.redirected_url = cached_result.redirected_url or (None if is_raw_url else url)
return CrawlResultContainer(cached_result)
return cached_result

except Exception as e:
error_context = get_error_context(sys.exc_info())
Expand All @@ -690,10 +692,8 @@ async def arun(
tag="ERROR",
)

return CrawlResultContainer(
CrawlResult(
url=url, html="", success=False, error_message=error_message
)
return CrawlResult(
url=url, html="", success=False, error_message=error_message
)

async def aprocess_html(
Expand Down
28 changes: 7 additions & 21 deletions tests/test_type_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,35 +119,21 @@ class TestAnnotationConsistency:

def test_arun_actually_returns_what_annotation_says(self):
"""
Whatever arun()'s return annotation says, verify that
CrawlResultContainer is compatible with it.
arun() should be annotated as returning CrawlResult directly.
"""
from crawl4ai import AsyncWebCrawler
from crawl4ai.models import CrawlResultContainer
from crawl4ai.models import CrawlResult

hints = typing.get_type_hints(AsyncWebCrawler.arun)
ret = hints.get("return")
if ret is None:
pytest.skip("No return annotation to check")

# Get all types in the annotation
origin = getattr(ret, "__origin__", None)
if origin is Union:
allowed_types = [
getattr(arg, "__origin__", arg) for arg in ret.__args__
]
else:
allowed_types = [getattr(ret, "__origin__", ret)]

# CrawlResultContainer should be in the allowed types
is_compatible = any(
t is CrawlResultContainer
or (isinstance(t, type) and issubclass(CrawlResultContainer, t))
for t in allowed_types
)
assert is_compatible, (
f"arun() returns CrawlResultContainer at runtime, but annotation {ret} "
f"doesn't include it. Allowed types: {allowed_types}"
# Get the actual type (unwrap generics if needed)
actual_type = getattr(ret, "__origin__", ret)

assert actual_type is CrawlResult, (
f"arun() should return CrawlResult, but annotation is {ret}"
)

def test_config_classes_init_params_match_attributes(self):
Expand Down