diff --git a/README.md b/README.md index 71d86d5..536411c 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ from instaparser import InstaparserClient client = InstaparserClient(api_key="your-api-key") # Parse an article from a URL -article = client.Article(url="https://example.com/article") +article = client.article(url="https://example.com/article") # Access article properties print(article.title) @@ -46,26 +46,26 @@ from instaparser import InstaparserClient client = InstaparserClient(api_key="your-api-key") # Parse from URL (HTML output) -article = client.Article(url="https://example.com/article") +article = client.article(url="https://example.com/article") print(article.html) # HTML content print(article.body) # Same as html when output='html' # Parse from URL (text output) -article = client.Article(url="https://example.com/article", output="text") +article = client.article(url="https://example.com/article", output="text") print(article.text) # Plain text content print(article.body) # Same as text when output='text' # Parse from URL (markdown output) -article = client.Article(url="https://example.com/article", output="markdown") +article = client.article(url="https://example.com/article", output="markdown") print(article.markdown) # Markdown content print(article.body) # Same as markdown when output='markdown' # Parse from HTML content html_content = "
Content
" -article = client.Article(url="https://example.com/article", content=html_content) +article = client.article(url="https://example.com/article", content=html_content) # Disable cache -article = client.Article(url="https://example.com/article", use_cache=False) +article = client.article(url="https://example.com/article", use_cache=False) ``` ### Article Properties @@ -73,7 +73,7 @@ article = client.Article(url="https://example.com/article", use_cache=False) The `Article` object provides access to all parsed metadata: ```python -article = client.Article(url="https://example.com/article") +article = client.article(url="https://example.com/article") # Basic properties article.url # Canonical URL @@ -103,7 +103,7 @@ Generate AI-powered summaries: ```python # Generate summary -summary = client.Summary(url="https://example.com/article") +summary = client.summary(url="https://example.com/article") print(summary.overview) # Concise summary print(summary.key_sentences) # List of key sentences @@ -112,7 +112,7 @@ print(summary.key_sentences) # List of key sentences def on_stream_line(line): print(f"Streaming: {line}") -summary = client.Summary( +summary = client.summary( url="https://example.com/article", stream_callback=on_stream_line ) @@ -124,19 +124,19 @@ Parse PDFs from URLs or files. The PDF class inherits from Article, so it has al ```python # Parse PDF from URL -pdf = client.PDF(url="https://example.com/document.pdf") +pdf = client.pdf(url="https://example.com/document.pdf") # Parse PDF from file with open('document.pdf', 'rb') as f: - pdf = client.PDF(file=f) + pdf = client.pdf(file=f) # Parse PDF with text output -pdf = client.PDF(url="https://example.com/document.pdf", output="text") +pdf = client.pdf(url="https://example.com/document.pdf", output="text") print(pdf.text) print(pdf.body) # Same as text when output='text' # Parse PDF with markdown output -pdf = client.PDF(url="https://example.com/document.pdf", output="markdown") +pdf = client.pdf(url="https://example.com/document.pdf", output="markdown") print(pdf.markdown) print(pdf.body) # Same as markdown when output='markdown' @@ -162,7 +162,7 @@ from instaparser import ( client = InstaparserClient(api_key="your-api-key") try: - article = client.Article(url="https://example.com/article") + article = client.article(url="https://example.com/article") except InstaparserAuthenticationError: print("Invalid API key") except InstaparserRateLimitError: @@ -185,7 +185,7 @@ Initialize the client. - `api_key`: Your Instaparser API key -#### `Article(url: str, content: Optional[str] = None, output: str = 'html', use_cache: bool = True) -> Article` +#### `article(url: str, content: Optional[str] = None, output: str = 'html', use_cache: bool = True) -> Article` Parse an article from a URL or HTML content. @@ -196,7 +196,7 @@ Parse an article from a URL or HTML content. Returns: `Article` object -#### `Summary(url: str, content: Optional[str] = None, use_cache: bool = True, stream_callback: Optional[Callable[[str], None]] = None) -> Summary` +#### `summary(url: str, content: Optional[str] = None, use_cache: bool = True, stream_callback: Optional[Callable[[str], None]] = None) -> Summary` Generate a summary of an article. @@ -207,7 +207,7 @@ Generate a summary of an article. Returns: `Summary` object with `key_sentences` and `overview` attributes -#### `PDF(url: Optional[str] = None, file: Optional[Union[BinaryIO, bytes]] = None, output: str = 'html', use_cache: bool = True) -> PDF` +#### `pdf(url: Optional[str] = None, file: Optional[Union[BinaryIO, bytes]] = None, output: str = 'html', use_cache: bool = True) -> PDF` Parse a PDF from a URL or file. diff --git a/example.py b/example.py index 00e4bf4..ffd8d60 100644 --- a/example.py +++ b/example.py @@ -30,7 +30,7 @@ def get_api_key(args: argparse.Namespace) -> str: def cmd_article(client: InstaparserClient, args: argparse.Namespace) -> None: """Call Article API and print result.""" - article = client.Article( + article = client.article( url=args.url, output=args.output, use_cache=not args.no_cache, @@ -53,7 +53,7 @@ def on_stream(line: str) -> None: if args.stream: print(f" {line}") - summary = client.Summary( + summary = client.summary( url=args.url, use_cache=not args.no_cache, stream_callback=on_stream if args.stream else None, @@ -68,7 +68,7 @@ def cmd_pdf(client: InstaparserClient, args: argparse.Namespace) -> None: """Call PDF API and print result.""" if args.file: with open(args.file, "rb") as f: - pdf = client.PDF( + pdf = client.pdf( file=f, url=args.url or None, output=args.output, @@ -78,7 +78,7 @@ def cmd_pdf(client: InstaparserClient, args: argparse.Namespace) -> None: if not args.url: print("Error: --url or --file required for pdf.", file=sys.stderr) sys.exit(1) - pdf = client.PDF( + pdf = client.pdf( url=args.url, output=args.output, use_cache=not args.no_cache, diff --git a/instaparser/client.py b/instaparser/client.py index 9d6dbcf..ab7c057 100644 --- a/instaparser/client.py +++ b/instaparser/client.py @@ -4,6 +4,7 @@ import json import uuid +import warnings from collections.abc import Callable from http.client import HTTPResponse from typing import Any, BinaryIO, NoReturn @@ -101,7 +102,7 @@ class InstaparserClient: Example: >>> client = InstaparserClient(api_key="your-api-key") - >>> article = client.Article(url="https://example.com/article") + >>> article = client.article(url="https://example.com/article") >>> print(article.body) """ @@ -186,7 +187,7 @@ def _read_json(self, response: HTTPResponse) -> dict[str, Any]: pass return {"raw": body} - def Article(self, url: str, content: str | None = None, output: str = "html", use_cache: bool = True) -> Article: + def article(self, url: str, content: str | None = None, output: str = "html", use_cache: bool = True) -> Article: """ Parse an article from a URL or HTML content. @@ -200,7 +201,7 @@ def Article(self, url: str, content: str | None = None, output: str = "html", us Article object with parsed content Example: - >>> article = client.Article(url="https://example.com/article") + >>> article = client.article(url="https://example.com/article") >>> print(article.title) >>> print(article.body) """ @@ -239,7 +240,7 @@ def Article(self, url: str, content: str | None = None, output: str = "html", us markdown=data.get("markdown"), ) - def Summary( + def summary( self, url: str, content: str | None = None, @@ -260,14 +261,14 @@ def Summary( Summary object with key_sentences and overview attributes Example: - >>> summary = client.Summary(url="https://example.com/article") + >>> summary = client.summary(url="https://example.com/article") >>> print(summary.overview) >>> print(summary.key_sentences) >>> # With streaming callback >>> def on_line(line): ... print(f"Received: {line}") - >>> summary = client.Summary(url="https://example.com/article", stream_callback=on_line) + >>> summary = client.summary(url="https://example.com/article", stream_callback=on_line) """ payload: dict[str, Any] = { "url": url, @@ -306,7 +307,7 @@ def Summary( data = self._read_json(response) return Summary(key_sentences=data.get("key_sentences", []), overview=data.get("overview", "")) - def PDF( + def pdf( self, url: str | None = None, file: BinaryIO | bytes | None = None, @@ -327,11 +328,11 @@ def PDF( Example: >>> # Parse PDF from URL - >>> pdf = client.PDF(url="https://example.com/document.pdf") + >>> pdf = client.pdf(url="https://example.com/document.pdf") >>> # Parse PDF from file >>> with open('document.pdf', 'rb') as f: - ... pdf = client.PDF(file=f) + ... pdf = client.pdf(file=f) """ if output not in ("html", "text", "markdown"): raise InstaparserValidationError("output must be 'html', 'text', or 'markdown'") @@ -382,3 +383,32 @@ def PDF( text=result.get("text"), markdown=result.get("markdown"), ) + + # Deprecated aliases for backwards compatibility: + + def Article(self, *args: Any, **kwargs: Any) -> Article: # noqa: N802 + """Deprecated: use client.article() instead.""" + warnings.warn( + "client.Article() is deprecated, use client.article() instead", + DeprecationWarning, + stacklevel=2, + ) + return self.article(*args, **kwargs) + + def PDF(self, *args: Any, **kwargs: Any) -> PDF: # noqa: N802 + """Deprecated: use client.pdf() instead.""" + warnings.warn( + "client.PDF() is deprecated, use client.pdf() instead", + DeprecationWarning, + stacklevel=2, + ) + return self.pdf(*args, **kwargs) + + def Summary(self, *args: Any, **kwargs: Any) -> Summary: # noqa: N802 + """Deprecated: use client.summary() instead.""" + warnings.warn( + "client.Summary() is deprecated, use client.summary() instead", + DeprecationWarning, + stacklevel=2, + ) + return self.summary(*args, **kwargs) diff --git a/pyproject.toml b/pyproject.toml index 1773397..5977377 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,4 +77,3 @@ line-length = 120 [tool.ruff.lint] select = ["E", "F", "I", "N", "W", "UP"] -ignore = ["N802"] diff --git a/tests/test_client.py b/tests/test_client.py index 6dd1307..e24297f 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -134,7 +134,7 @@ class TestArticle: def test_basic_parse(self, client, mock_request): mock_request.return_value = make_response(json_data=ARTICLE_DATA) - article = client.Article(url="https://example.com/article") + article = client.article(url="https://example.com/article") assert article.title == "Test Article Title" assert article.url == "https://example.com/article" @@ -151,35 +151,35 @@ def test_basic_parse(self, client, mock_request): def test_text_output(self, client, mock_request): data = {**ARTICLE_DATA, "html": None, "text": "Plain text body."} mock_request.return_value = make_response(json_data=data) - article = client.Article(url="u", output="text") + article = client.article(url="u", output="text") assert article.body == "Plain text body." assert mock_request.call_args[1]["json_data"]["output"] == "text" def test_markdown_output(self, client, mock_request): mock_request.return_value = make_response(json_data={"url": "u", "markdown": "# MD"}) - article = client.Article(url="u", output="markdown") + article = client.article(url="u", output="markdown") assert article.markdown == "# MD" assert article.body == "# MD" def test_with_content(self, client, mock_request): mock_request.return_value = make_response(json_data=ARTICLE_DATA) - client.Article(url="u", content="hi") + client.article(url="u", content="hi") assert mock_request.call_args[1]["json_data"]["content"] == "hi" def test_use_cache_false(self, client, mock_request): mock_request.return_value = make_response(json_data=ARTICLE_DATA) - client.Article(url="u", use_cache=False) + client.article(url="u", use_cache=False) assert mock_request.call_args[1]["json_data"]["use_cache"] == "false" def test_invalid_output(self, client): with pytest.raises(InstaparserValidationError, match="output must be"): - client.Article(url="u", output="invalid") + client.article(url="u", output="invalid") def test_malformed_json_response(self, client, mock_request): mock_request.return_value = make_response(text="Not valid JSON {") - article = client.Article(url="u") + article = client.article(url="u") assert isinstance(article, Article) @@ -187,7 +187,7 @@ class TestSummary: def test_basic_summary(self, client, mock_request): mock_request.return_value = make_response(json_data=SUMMARY_DATA) - summary = client.Summary(url="https://example.com/article") + summary = client.summary(url="https://example.com/article") assert len(summary.key_sentences) == 3 assert summary.overview == "This is a comprehensive overview of the article content." @@ -195,17 +195,17 @@ def test_basic_summary(self, client, mock_request): def test_with_content(self, client, mock_request): mock_request.return_value = make_response(json_data=SUMMARY_DATA) - client.Summary(url="u", content="hi") + client.summary(url="u", content="hi") assert mock_request.call_args[1]["json_data"]["content"] == "hi" def test_use_cache_false(self, client, mock_request): mock_request.return_value = make_response(json_data=SUMMARY_DATA) - client.Summary(url="u", use_cache=False) + client.summary(url="u", use_cache=False) assert mock_request.call_args[1]["json_data"]["use_cache"] == "false" def test_empty_response(self, client, mock_request): mock_request.return_value = make_response(json_data={}) - summary = client.Summary(url="u") + summary = client.summary(url="u") assert summary.key_sentences == [] assert summary.overview == "" @@ -220,7 +220,7 @@ def test_streaming_callback(self, client, mock_request): ) received = [] - summary = client.Summary(url="u", stream_callback=received.append) + summary = client.summary(url="u", stream_callback=received.append) assert len(received) == 4 assert received[0] == 'key_sentences: ["Sentence 1", "Sentence 2"]' @@ -237,7 +237,7 @@ def test_streaming_empty_lines_filtered(self, client, mock_request): b"\r\n", ] ) - summary = client.Summary(url="u", stream_callback=lambda _: None) + summary = client.summary(url="u", stream_callback=lambda _: None) assert summary.overview == "Content" def test_streaming_malformed_key_sentences(self, client, mock_request): @@ -248,21 +248,21 @@ def test_streaming_malformed_key_sentences(self, client, mock_request): ] ) with pytest.raises(InstaparserAPIError) as exc_info: - client.Summary(url="u", stream_callback=lambda _: None) + client.summary(url="u", stream_callback=lambda _: None) assert exc_info.value.status_code == 412 assert "Unable to generate key sentences" in str(exc_info.value) def test_streaming_error_response(self, client, mock_request): mock_request.side_effect = make_error(401, json_data={"reason": "Invalid API key"}) with pytest.raises(InstaparserAuthenticationError): - client.Summary(url="u", stream_callback=lambda _: None) + client.summary(url="u", stream_callback=lambda _: None) class TestPDF: def test_from_url(self, client, mock_request): mock_request.return_value = make_response(json_data=PDF_DATA) - pdf = client.PDF(url="https://example.com/document.pdf") + pdf = client.pdf(url="https://example.com/document.pdf") assert pdf.title == "Test PDF Document" assert pdf.is_rtl is False @@ -276,7 +276,7 @@ def test_from_file(self, client, mock_request): pdf_file = Mock() mock_request.return_value = make_response(json_data=PDF_DATA) - pdf = client.PDF(file=pdf_file) + pdf = client.pdf(file=pdf_file) assert pdf.title == "Test PDF Document" assert mock_request.call_args[0][0] == "POST" @@ -284,52 +284,52 @@ def test_from_file(self, client, mock_request): def test_from_bytes(self, client, mock_request): mock_request.return_value = make_response(json_data=PDF_DATA) - client.PDF(file=b"PDF content") + client.pdf(file=b"PDF content") assert mock_request.call_args[1]["multipart_files"]["file"] == b"PDF content" def test_file_and_url(self, client, mock_request): """When both file and url are given, POST with url in form fields.""" pdf_file = Mock() mock_request.return_value = make_response(json_data=PDF_DATA) - client.PDF(url="https://example.com/document.pdf", file=pdf_file) + client.pdf(url="https://example.com/document.pdf", file=pdf_file) assert mock_request.call_args[1]["multipart_fields"]["url"] == "https://example.com/document.pdf" @pytest.mark.parametrize("output", ["text", "markdown"]) def test_output_formats_url(self, client, mock_request, output): data = {**PDF_DATA, "html": None, output: f"content in {output}"} mock_request.return_value = make_response(json_data=data) - client.PDF(url="https://example.com/document.pdf", output=output) + client.pdf(url="https://example.com/document.pdf", output=output) assert mock_request.call_args[1]["params"]["output"] == output def test_use_cache_false_url(self, client, mock_request): mock_request.return_value = make_response(json_data=PDF_DATA) - client.PDF(url="https://example.com/document.pdf", use_cache=False) + client.pdf(url="https://example.com/document.pdf", use_cache=False) assert mock_request.call_args[1]["params"]["use_cache"] == "false" def test_use_cache_false_file(self, client, mock_request): mock_request.return_value = make_response(json_data=PDF_DATA) - client.PDF(file=Mock(), use_cache=False) + client.pdf(file=Mock(), use_cache=False) assert mock_request.call_args[1]["multipart_fields"]["use_cache"] == "false" def test_invalid_output(self, client): with pytest.raises(InstaparserValidationError, match="output must be"): - client.PDF(url="u", output="invalid") + client.pdf(url="u", output="invalid") def test_no_url_or_file(self, client): with pytest.raises(InstaparserValidationError, match="Either 'url' or 'file'"): - client.PDF() + client.pdf() class TestTransportErrors: def test_url_error(self, client, mock_request): mock_request.side_effect = URLError("Connection failed") with pytest.raises(URLError): - client.Article(url="u") + client.article(url="u") def test_timeout_error(self, client, mock_request): mock_request.side_effect = TimeoutError("timed out") with pytest.raises(TimeoutError): - client.Article(url="u") + client.article(url="u") class TestURLConstruction: @@ -338,11 +338,31 @@ def test_base_url_joining(self, base, monkeypatch): client = InstaparserClient(api_key=API_KEY, base_url=base) mock_urlopen = Mock(return_value=make_response(json_data={})) monkeypatch.setattr("instaparser.client.urlopen", mock_urlopen) - client.Article(url="u") + client.article(url="u") req = mock_urlopen.call_args[0][0] assert req.full_url.startswith("https://api.test.com/api/1/article") +class TestDeprecatedAliases: + def test_article_alias(self, client, mock_request): + mock_request.return_value = make_response(json_data=ARTICLE_DATA) + with pytest.warns(DeprecationWarning, match="client.Article.*deprecated.*client.article"): + article = client.Article(url="https://example.com/article") + assert article.title == "Test Article Title" + + def test_pdf_alias(self, client, mock_request): + mock_request.return_value = make_response(json_data=PDF_DATA) + with pytest.warns(DeprecationWarning, match="client.PDF.*deprecated.*client.pdf"): + pdf = client.PDF(url="https://example.com/document.pdf") + assert pdf.title == "Test PDF Document" + + def test_summary_alias(self, client, mock_request): + mock_request.return_value = make_response(json_data=SUMMARY_DATA) + with pytest.warns(DeprecationWarning, match="client.Summary.*deprecated.*client.summary"): + summary = client.Summary(url="https://example.com/article") + assert summary.overview == SUMMARY_DATA["overview"] + + class TestMultipleClients: def test_independent_instances(self): c1 = InstaparserClient(api_key="key1", base_url="https://api1.com") @@ -352,8 +372,8 @@ def test_independent_instances(self): def test_client_reuse(self, client, mock_request): mock_request.return_value = make_response(json_data=ARTICLE_DATA) - a1 = client.Article(url="u1") - a2 = client.Article(url="u2") + a1 = client.article(url="u1") + a2 = client.article(url="u2") assert mock_request.call_count == 2 assert a1.title == a2.title == "Test Article Title" @@ -363,35 +383,35 @@ class TestErrorPropagation: def test_article_errors(self, client, mock_request, status, exc_cls): mock_request.side_effect = make_error(status, json_data={"reason": f"Error {status}"}) with pytest.raises(exc_cls) as exc_info: - client.Article(url="u") + client.article(url="u") assert exc_info.value.status_code == status @pytest.mark.parametrize("status, exc_cls", ERROR_CODES) def test_summary_errors(self, client, mock_request, status, exc_cls): mock_request.side_effect = make_error(status, json_data={"reason": f"Error {status}"}) with pytest.raises(exc_cls): - client.Summary(url="u") + client.summary(url="u") @pytest.mark.parametrize("status, exc_cls", ERROR_CODES) def test_pdf_errors(self, client, mock_request, status, exc_cls): mock_request.side_effect = make_error(status, json_data={"reason": f"Error {status}"}) with pytest.raises(exc_cls): - client.PDF(url="u") + client.pdf(url="u") def test_error_without_reason_field(self, client, mock_request): mock_request.side_effect = make_error(500, json_data={}) with pytest.raises(InstaparserAPIError, match="API request failed"): - client.Article(url="u") + client.article(url="u") def test_error_plain_text_body(self, client, mock_request): mock_request.side_effect = make_error(500, text="Error message") with pytest.raises(InstaparserAPIError, match="Error message"): - client.Article(url="u") + client.article(url="u") def test_error_empty_body(self, client, mock_request): mock_request.side_effect = make_error(500, text="") with pytest.raises(InstaparserAPIError) as exc_info: - client.Article(url="u") + client.article(url="u") assert exc_info.value.status_code == 500 @@ -406,7 +426,7 @@ class TestOutputFormats: ) def test_article_output_formats(self, client, mock_request, output, field, body_text): mock_request.return_value = make_response(json_data={"url": "u", field: body_text}) - article = client.Article(url="u", output=output) + article = client.article(url="u", output=output) assert article.body == body_text @pytest.mark.parametrize( @@ -419,7 +439,7 @@ def test_article_output_formats(self, client, mock_request, output, field, body_ ) def test_pdf_output_formats(self, client, mock_request, output, field, body_text): mock_request.return_value = make_response(json_data={"url": "u", field: body_text}) - pdf = client.PDF(url="u", output=output) + pdf = client.pdf(url="u", output=output) assert pdf.body == body_text