Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ from instaparser import InstaparserClient
client = InstaparserClient(api_key="your-api-key")

# Parse an article from a URL
article = client.Article(url="https://example.com/article")
article = client.article(url="https://example.com/article")

# Access article properties
print(article.title)
Expand Down Expand Up @@ -46,34 +46,34 @@ from instaparser import InstaparserClient
client = InstaparserClient(api_key="your-api-key")

# Parse from URL (HTML output)
article = client.Article(url="https://example.com/article")
article = client.article(url="https://example.com/article")
print(article.html) # HTML content
print(article.body) # Same as html when output='html'

# Parse from URL (text output)
article = client.Article(url="https://example.com/article", output="text")
article = client.article(url="https://example.com/article", output="text")
print(article.text) # Plain text content
print(article.body) # Same as text when output='text'

# Parse from URL (markdown output)
article = client.Article(url="https://example.com/article", output="markdown")
article = client.article(url="https://example.com/article", output="markdown")
print(article.markdown) # Markdown content
print(article.body) # Same as markdown when output='markdown'

# Parse from HTML content
html_content = "<html><body><h1>Title</h1><p>Content</p></body></html>"
article = client.Article(url="https://example.com/article", content=html_content)
article = client.article(url="https://example.com/article", content=html_content)

# Disable cache
article = client.Article(url="https://example.com/article", use_cache=False)
article = client.article(url="https://example.com/article", use_cache=False)
```

### Article Properties

The `Article` object provides access to all parsed metadata:

```python
article = client.Article(url="https://example.com/article")
article = client.article(url="https://example.com/article")

# Basic properties
article.url # Canonical URL
Expand Down Expand Up @@ -103,7 +103,7 @@ Generate AI-powered summaries:

```python
# Generate summary
summary = client.Summary(url="https://example.com/article")
summary = client.summary(url="https://example.com/article")

print(summary.overview) # Concise summary
print(summary.key_sentences) # List of key sentences
Expand All @@ -112,7 +112,7 @@ print(summary.key_sentences) # List of key sentences
def on_stream_line(line):
print(f"Streaming: {line}")

summary = client.Summary(
summary = client.summary(
url="https://example.com/article",
stream_callback=on_stream_line
)
Expand All @@ -124,19 +124,19 @@ Parse PDFs from URLs or files. The PDF class inherits from Article, so it has al

```python
# Parse PDF from URL
pdf = client.PDF(url="https://example.com/document.pdf")
pdf = client.pdf(url="https://example.com/document.pdf")

# Parse PDF from file
with open('document.pdf', 'rb') as f:
pdf = client.PDF(file=f)
pdf = client.pdf(file=f)

# Parse PDF with text output
pdf = client.PDF(url="https://example.com/document.pdf", output="text")
pdf = client.pdf(url="https://example.com/document.pdf", output="text")
print(pdf.text)
print(pdf.body) # Same as text when output='text'

# Parse PDF with markdown output
pdf = client.PDF(url="https://example.com/document.pdf", output="markdown")
pdf = client.pdf(url="https://example.com/document.pdf", output="markdown")
print(pdf.markdown)
print(pdf.body) # Same as markdown when output='markdown'

Expand All @@ -162,7 +162,7 @@ from instaparser import (
client = InstaparserClient(api_key="your-api-key")

try:
article = client.Article(url="https://example.com/article")
article = client.article(url="https://example.com/article")
except InstaparserAuthenticationError:
print("Invalid API key")
except InstaparserRateLimitError:
Expand All @@ -185,7 +185,7 @@ Initialize the client.

- `api_key`: Your Instaparser API key

#### `Article(url: str, content: Optional[str] = None, output: str = 'html', use_cache: bool = True) -> Article`
#### `article(url: str, content: Optional[str] = None, output: str = 'html', use_cache: bool = True) -> Article`

Parse an article from a URL or HTML content.

Expand All @@ -196,7 +196,7 @@ Parse an article from a URL or HTML content.

Returns: `Article` object

#### `Summary(url: str, content: Optional[str] = None, use_cache: bool = True, stream_callback: Optional[Callable[[str], None]] = None) -> Summary`
#### `summary(url: str, content: Optional[str] = None, use_cache: bool = True, stream_callback: Optional[Callable[[str], None]] = None) -> Summary`

Generate a summary of an article.

Expand All @@ -207,7 +207,7 @@ Generate a summary of an article.

Returns: `Summary` object with `key_sentences` and `overview` attributes

#### `PDF(url: Optional[str] = None, file: Optional[Union[BinaryIO, bytes]] = None, output: str = 'html', use_cache: bool = True) -> PDF`
#### `pdf(url: Optional[str] = None, file: Optional[Union[BinaryIO, bytes]] = None, output: str = 'html', use_cache: bool = True) -> PDF`

Parse a PDF from a URL or file.

Expand Down
8 changes: 4 additions & 4 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def get_api_key(args: argparse.Namespace) -> str:

def cmd_article(client: InstaparserClient, args: argparse.Namespace) -> None:
"""Call Article API and print result."""
article = client.Article(
article = client.article(
url=args.url,
output=args.output,
use_cache=not args.no_cache,
Expand All @@ -53,7 +53,7 @@ def on_stream(line: str) -> None:
if args.stream:
print(f" {line}")

summary = client.Summary(
summary = client.summary(
url=args.url,
use_cache=not args.no_cache,
stream_callback=on_stream if args.stream else None,
Expand All @@ -68,7 +68,7 @@ def cmd_pdf(client: InstaparserClient, args: argparse.Namespace) -> None:
"""Call PDF API and print result."""
if args.file:
with open(args.file, "rb") as f:
pdf = client.PDF(
pdf = client.pdf(
file=f,
url=args.url or None,
output=args.output,
Expand All @@ -78,7 +78,7 @@ def cmd_pdf(client: InstaparserClient, args: argparse.Namespace) -> None:
if not args.url:
print("Error: --url or --file required for pdf.", file=sys.stderr)
sys.exit(1)
pdf = client.PDF(
pdf = client.pdf(
url=args.url,
output=args.output,
use_cache=not args.no_cache,
Expand Down
48 changes: 39 additions & 9 deletions instaparser/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json
import uuid
import warnings
from collections.abc import Callable
from http.client import HTTPResponse
from typing import Any, BinaryIO, NoReturn
Expand Down Expand Up @@ -101,7 +102,7 @@ class InstaparserClient:

Example:
>>> client = InstaparserClient(api_key="your-api-key")
>>> article = client.Article(url="https://example.com/article")
>>> article = client.article(url="https://example.com/article")
>>> print(article.body)
"""

Expand Down Expand Up @@ -186,7 +187,7 @@ def _read_json(self, response: HTTPResponse) -> dict[str, Any]:
pass
return {"raw": body}

def Article(self, url: str, content: str | None = None, output: str = "html", use_cache: bool = True) -> Article:
def article(self, url: str, content: str | None = None, output: str = "html", use_cache: bool = True) -> Article:
"""
Parse an article from a URL or HTML content.

Expand All @@ -200,7 +201,7 @@ def Article(self, url: str, content: str | None = None, output: str = "html", us
Article object with parsed content

Example:
>>> article = client.Article(url="https://example.com/article")
>>> article = client.article(url="https://example.com/article")
>>> print(article.title)
>>> print(article.body)
"""
Expand Down Expand Up @@ -239,7 +240,7 @@ def Article(self, url: str, content: str | None = None, output: str = "html", us
markdown=data.get("markdown"),
)

def Summary(
def summary(
self,
url: str,
content: str | None = None,
Expand All @@ -260,14 +261,14 @@ def Summary(
Summary object with key_sentences and overview attributes

Example:
>>> summary = client.Summary(url="https://example.com/article")
>>> summary = client.summary(url="https://example.com/article")
>>> print(summary.overview)
>>> print(summary.key_sentences)

>>> # With streaming callback
>>> def on_line(line):
... print(f"Received: {line}")
>>> summary = client.Summary(url="https://example.com/article", stream_callback=on_line)
>>> summary = client.summary(url="https://example.com/article", stream_callback=on_line)
"""
payload: dict[str, Any] = {
"url": url,
Expand Down Expand Up @@ -306,7 +307,7 @@ def Summary(
data = self._read_json(response)
return Summary(key_sentences=data.get("key_sentences", []), overview=data.get("overview", ""))

def PDF(
def pdf(
self,
url: str | None = None,
file: BinaryIO | bytes | None = None,
Expand All @@ -327,11 +328,11 @@ def PDF(

Example:
>>> # Parse PDF from URL
>>> pdf = client.PDF(url="https://example.com/document.pdf")
>>> pdf = client.pdf(url="https://example.com/document.pdf")

>>> # Parse PDF from file
>>> with open('document.pdf', 'rb') as f:
... pdf = client.PDF(file=f)
... pdf = client.pdf(file=f)
"""
if output not in ("html", "text", "markdown"):
raise InstaparserValidationError("output must be 'html', 'text', or 'markdown'")
Expand Down Expand Up @@ -382,3 +383,32 @@ def PDF(
text=result.get("text"),
markdown=result.get("markdown"),
)

# Deprecated aliases for backwards compatibility:

def Article(self, *args: Any, **kwargs: Any) -> Article: # noqa: N802
"""Deprecated: use client.article() instead."""
warnings.warn(
"client.Article() is deprecated, use client.article() instead",
DeprecationWarning,
stacklevel=2,
)
return self.article(*args, **kwargs)

def PDF(self, *args: Any, **kwargs: Any) -> PDF: # noqa: N802
"""Deprecated: use client.pdf() instead."""
warnings.warn(
"client.PDF() is deprecated, use client.pdf() instead",
DeprecationWarning,
stacklevel=2,
)
return self.pdf(*args, **kwargs)

def Summary(self, *args: Any, **kwargs: Any) -> Summary: # noqa: N802
"""Deprecated: use client.summary() instead."""
warnings.warn(
"client.Summary() is deprecated, use client.summary() instead",
DeprecationWarning,
stacklevel=2,
)
return self.summary(*args, **kwargs)
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,3 @@ line-length = 120

[tool.ruff.lint]
select = ["E", "F", "I", "N", "W", "UP"]
ignore = ["N802"]
Loading