Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
1a1c732
fix: Python 3.14 segmentation fault using Stable ABI (abi3) and PyO3 …
ElissonRodrigues Mar 28, 2026
d33ab65
fix: resolve Python 3.14 stability and improve error handling
ElissonRodrigues Mar 28, 2026
1bb43f3
chore: refine pyproject.toml for production and organize dev dependen…
ElissonRodrigues Mar 28, 2026
3482544
test: restore unit test suite and fix benchmark compatibility
ElissonRodrigues Mar 28, 2026
0bf348a
feat: add Python type stubs and markers for IDE support
ElissonRodrigues Mar 28, 2026
e5d457b
chore: set minimum python version to 3.9 due to dependencies and orig…
ElissonRodrigues Mar 28, 2026
4d74788
chore: update uv.lock with finalized dependencies
ElissonRodrigues Mar 28, 2026
c911d01
chore: correct original author email to info@qdrant.com
ElissonRodrigues Mar 28, 2026
7045225
refactor: update Rust methods to modern PyO3 API and optimize processing
ElissonRodrigues Mar 28, 2026
f11aba4
build: sync version and abi3 configuration
ElissonRodrigues Mar 28, 2026
b8f5090
typ: update stubs to match internal changes
ElissonRodrigues Mar 28, 2026
46fd379
build: update pyo3 abi3 feature to py39 to match python requirements
ElissonRodrigues Mar 28, 2026
3e6aad9
refactor: optimize stemming memory allocations and parallel iterable …
ElissonRodrigues Mar 29, 2026
d410490
build: update version and sync uv lockfile
ElissonRodrigues Mar 29, 2026
74ab785
typ: update stubs for type generics and iterables
ElissonRodrigues Mar 29, 2026
cc2a821
Merge branch 'qdrant:master' into master
ElissonRodrigues May 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# --- Rust ---
/target
# Usually Cargo.lock is committed for reproducible builds,
# but uncomment the next line if you prefer to ignore it:
# Cargo.lock

# --- Python ---
__pycache__/
*.py[cod]
*$py.class
*.so
*.pyd
.venv/
env/
venv/
ENV/
build/
dist/
*.egg-info/
.python-version
.pytest_cache/
.mypy_cache/
.ruff_cache/

# --- UV ---
# uv.lock is recommended to be committed for projects
# uv.lock

# --- IDEs & OS ---
.vscode/
.idea/
.DS_Store
*.swp
*.swo
*~
225 changes: 225 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ edition = "2021"
crate-type = ["cdylib"]

[dependencies]
pyo3 = { version = "0.18", features = ["extension-module"] }
pyo3 = { version = "0.28.2", features = ["abi3-py39"] }
rust-stemmers = "1.2.0"
rayon = "1.6"

[dev-dependencies]
pyo3 = { version = "0.18", features = ["extension-module"] }
pyo3 = { version = "0.28.2", features = ["abi3-py39"] }

[profile.release]
opt-level = 3
Expand Down
3 changes: 3 additions & 0 deletions py_rust_stemmers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .py_rust_stemmers import SnowballStemmer # type: ignore

__all__ = ["SnowballStemmer"]
55 changes: 55 additions & 0 deletions py_rust_stemmers/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from typing import Iterable, List, TypeVar

_S = TypeVar("_S", bound=str)

class SnowballStemmer:
"""
A fast and parallel Snowball stemmer implemented in Rust.
"""
def __init__(self, lang: str) -> None:
"""
Create a new SnowballStemmer for the specified language.

Args:
lang: The language to use (e.g., 'english', 'spanish').

Raises:
ValueError: If the language is not supported.
"""
...

def stem_word(self, input: str) -> str:
"""
Stem a single word.

Args:
input: The word to stem.

Returns:
The stemmed word.
"""
...

def stem_words(self, inputs: Iterable[_S]) -> List[str]:
"""
Stem an iterable of words sequentially.

Args:
inputs: An iterable of words to stem.

Returns:
A list of stemmed words.
"""
...

def stem_words_parallel(self, inputs: Iterable[_S]) -> List[str]:
"""
Stem an iterable of words in parallel using Rayon.

Args:
inputs: An iterable of words to stem.

Returns:
A list of stemmed words.
"""
...
Empty file added py_rust_stemmers/py.typed
Empty file.
30 changes: 27 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,37 @@
name = "py_rust_stemmers"
version = "0.1.5"
description = "Fast and parallel snowball stemmer"

# Include a long description
readme = "README.md"
requires-python = ">=3.9"
authors = [{ name = "Qdrant", email = "info@qdrant.com" }]
license = { text = "MIT" }
repository = "https://github.com/ElissonRodrigues/py-rust-stemmers"
classifiers = [
"Programming Language :: Rust",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Intended Audience :: Developers",
"Topic :: Text Processing :: Linguistic",
]

[build-system]
requires = ["maturin>=0.12"]
build-backend = "maturin"

[tool.maturin]
features = ["pyo3/abi3-py39"]

# Other fields like authors, dependencies, etc.
[dependency-groups]
dev = [
"pytest",
"tqdm",
"snowballstemmer",
"puccinialin", # Necessary for build in this environment
]
Loading