From eb98ceab979d9967cba4eea7521670538ac1ad34 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:20:04 +0000 Subject: [PATCH 1/3] Initial plan From 6c6e30c661b8e2bd98f396298b757fbeecabd3e5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:25:38 +0000 Subject: [PATCH 2/3] Set up MkDocs documentation framework with initial pages Co-authored-by: jackrua <155536850+jackrua@users.noreply.github.com> --- .gitignore | 1 + Makefile | 8 +- README.md | 17 +++ docs/examples/openai.md | 177 +++++++++++++++++++++++++ docs/getting-started/installation.md | 188 +++++++++++++++++++++++++++ docs/index.md | 70 ++++++++++ mkdocs.yml | 50 +++++++ requirements.txt | 2 + 8 files changed, 512 insertions(+), 1 deletion(-) create mode 100644 docs/examples/openai.md create mode 100644 docs/getting-started/installation.md create mode 100644 docs/index.md create mode 100644 mkdocs.yml diff --git a/.gitignore b/.gitignore index c55ff44..20af990 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ venv/ __pycache__ .pytest_cache/ examples/rag/README.md +site/ diff --git a/Makefile b/Makefile index f0831c4..10af071 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: lint build publish clean +.PHONY: lint build publish clean docs docs-serve lint: pycodestyle . --ignore=E501 @@ -11,3 +11,9 @@ publish: clean build clean: rm -rf .pytest_cache dist pgvector.egg-info + +docs: + mkdocs build + +docs-serve: + mkdocs serve diff --git a/README.md b/README.md index 7c302b1..1e6c94a 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,23 @@ Run: pip install pgvector ``` +## Documentation + +Full documentation is available at the [documentation site](https://pgvector.github.io/pgvector-python/). + +To build the documentation locally: + +```sh +pip install mkdocs mkdocs-material +make docs +``` + +To serve the documentation locally: + +```sh +make docs-serve +``` + And follow the instructions for your database library: - [Django](#django) diff --git a/docs/examples/openai.md b/docs/examples/openai.md new file mode 100644 index 0000000..dbc0f56 --- /dev/null +++ b/docs/examples/openai.md @@ -0,0 +1,177 @@ +# OpenAI Embeddings Example + +This example demonstrates how to use pgvector with OpenAI's embedding API to store and search text embeddings. + +## Overview + +This example shows how to: + +- Generate embeddings using OpenAI's API +- Store embeddings in PostgreSQL with pgvector +- Perform similarity search to find related documents + +## Prerequisites + +- OpenAI API key +- PostgreSQL with pgvector extension installed +- Python packages: `openai`, `pgvector`, `psycopg` or another supported database adapter + +## Installation + +Install the required packages: + +```sh +pip install pgvector openai psycopg[binary] +``` + +## Basic Example + +Here's a simple example using Psycopg 3: + +```python +import openai +import psycopg +from pgvector.psycopg import register_vector + +# Set up OpenAI API +openai.api_key = 'your-api-key' + +# Connect to database +conn = psycopg.connect(dbname='mydb') +register_vector(conn) + +# Enable the extension +conn.execute('CREATE EXTENSION IF NOT EXISTS vector') + +# Create a table +conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding vector(1536))') + +# Generate and store embeddings +def add_document(content): + response = openai.embeddings.create( + input=content, + model="text-embedding-3-small" + ) + embedding = response.data[0].embedding + conn.execute('INSERT INTO documents (content, embedding) VALUES (%s, %s)', (content, embedding)) + +# Add some documents +add_document('The cat sits on the mat') +add_document('A dog runs in the park') +add_document('Feline animals are independent') + +conn.commit() + +# Search for similar documents +def search(query, limit=5): + response = openai.embeddings.create( + input=query, + model="text-embedding-3-small" + ) + embedding = response.data[0].embedding + + results = conn.execute( + 'SELECT content, embedding <=> %s as distance FROM documents ORDER BY distance LIMIT %s', + (embedding, limit) + ).fetchall() + + return results + +# Find documents similar to a query +results = search('cat') +for content, distance in results: + print(f'{content}: {distance}') +``` + +## Using with SQLAlchemy + +Here's the same example using SQLAlchemy: + +```python +import openai +from sqlalchemy import create_engine, select, text +from sqlalchemy.orm import Session, DeclarativeBase, Mapped, mapped_column +from pgvector.sqlalchemy import Vector + +# Set up database +engine = create_engine('postgresql://user:password@localhost/dbname') + +class Base(DeclarativeBase): + pass + +class Document(Base): + __tablename__ = 'documents' + + id: Mapped[int] = mapped_column(primary_key=True) + content: Mapped[str] + embedding: Mapped[list] = mapped_column(Vector(1536)) + +# Create tables +with Session(engine) as session: + session.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) + session.commit() + +Base.metadata.create_all(engine) + +# Generate and store embeddings +def add_document(content): + response = openai.embeddings.create( + input=content, + model="text-embedding-3-small" + ) + embedding = response.data[0].embedding + + with Session(engine) as session: + doc = Document(content=content, embedding=embedding) + session.add(doc) + session.commit() + +# Search for similar documents +def search(query, limit=5): + response = openai.embeddings.create( + input=query, + model="text-embedding-3-small" + ) + embedding = response.data[0].embedding + + with Session(engine) as session: + results = session.scalars( + select(Document) + .order_by(Document.embedding.l2_distance(embedding)) + .limit(limit) + ).all() + + return results +``` + +## Performance Tips + +### Add an Index + +For better performance with larger datasets, add an HNSW index: + +```python +conn.execute('CREATE INDEX ON documents USING hnsw (embedding vector_l2_ops)') +``` + +### Use Half-Precision Vectors + +To save storage space, you can use half-precision vectors: + +```python +# Create table with halfvec +conn.execute('CREATE TABLE documents (id bigserial PRIMARY KEY, content text, embedding halfvec(1536))') + +# Index with half-precision +conn.execute('CREATE INDEX ON documents USING hnsw (embedding halfvec_l2_ops)') +``` + +## Complete Example + +For a complete working example, see the [example.py](https://github.com/pgvector/pgvector-python/blob/master/examples/openai/example.py) file in the repository. + +## Next Steps + +- Learn about [hybrid search](https://github.com/pgvector/pgvector-python/blob/master/examples/hybrid_search/rrf.py) combining vector and keyword search +- Explore [RAG (Retrieval-Augmented Generation)](https://github.com/pgvector/pgvector-python/blob/master/examples/rag/example.py) patterns +- Try other embedding providers like [Cohere](https://github.com/pgvector/pgvector-python/blob/master/examples/cohere/example.py) or [SentenceTransformers](https://github.com/pgvector/pgvector-python/blob/master/examples/sentence_transformers/example.py) diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md new file mode 100644 index 0000000..b005db1 --- /dev/null +++ b/docs/getting-started/installation.md @@ -0,0 +1,188 @@ +# Installation + +## Requirements + +- Python >= 3.9 +- PostgreSQL with pgvector extension installed + +## Install pgvector + +First, install the Python package: + +```sh +pip install pgvector +``` + +## Database Library Setup + +pgvector-python supports multiple database libraries. Choose the one you're using and follow the setup instructions below. + +### Django + +Create a migration to enable the extension: + +```python +from pgvector.django import VectorExtension + +class Migration(migrations.Migration): + operations = [ + VectorExtension() + ] +``` + +Add a vector field to your model: + +```python +from pgvector.django import VectorField + +class Item(models.Model): + embedding = VectorField(dimensions=3) +``` + +Also supports `HalfVectorField`, `BitField`, and `SparseVectorField` + +### SQLAlchemy + +Enable the extension: + +```python +from sqlalchemy import text + +session.execute(text('CREATE EXTENSION IF NOT EXISTS vector')) +``` + +Add a vector column: + +```python +from pgvector.sqlalchemy import Vector + +class Item(Base): + embedding = mapped_column(Vector(3)) +``` + +Also supports `HALFVEC`, `BIT`, and `SPARSEVEC` + +### SQLModel + +Enable the extension: + +```python +from sqlalchemy import text + +session.exec(text('CREATE EXTENSION IF NOT EXISTS vector')) +``` + +Add a vector column: + +```python +from typing import Any +from sqlmodel import Field +from pgvector.sqlalchemy import Vector + +class Item(SQLModel, table=True): + embedding: Any = Field(sa_type=Vector(3)) +``` + +Also supports `HALFVEC`, `BIT`, and `SPARSEVEC` + +### Psycopg 3 + +Enable the extension: + +```python +conn.execute('CREATE EXTENSION IF NOT EXISTS vector') +``` + +Register the types with your connection: + +```python +from pgvector.psycopg import register_vector + +register_vector(conn) +``` + +For async connections, use: + +```python +from pgvector.psycopg import register_vector_async + +await register_vector_async(conn) +``` + +### Psycopg 2 + +Enable the extension: + +```python +cur = conn.cursor() +cur.execute('CREATE EXTENSION IF NOT EXISTS vector') +``` + +Register the types with your connection or cursor: + +```python +from pgvector.psycopg2 import register_vector + +register_vector(conn) +``` + +### asyncpg + +Enable the extension: + +```python +await conn.execute('CREATE EXTENSION IF NOT EXISTS vector') +``` + +Register the types with your connection: + +```python +from pgvector.asyncpg import register_vector + +await register_vector(conn) +``` + +Or your pool: + +```python +import asyncpg + +async def init(conn): + await register_vector(conn) + +pool = await asyncpg.create_pool(..., init=init) +``` + +### pg8000 + +Enable the extension: + +```python +conn.run('CREATE EXTENSION IF NOT EXISTS vector') +``` + +Register the types with your connection: + +```python +from pgvector.pg8000 import register_vector + +register_vector(conn) +``` + +### Peewee + +Add a vector column: + +```python +from pgvector.peewee import VectorField + +class Item(BaseModel): + embedding = VectorField(dimensions=3) +``` + +Also supports `HalfVectorField`, `FixedBitField`, and `SparseVectorField` + +## Next Steps + +- Check out the [examples](../examples/openai.md) to see how to use pgvector with different services +- Learn about vector operations and indexing in the full documentation diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..944f30f --- /dev/null +++ b/docs/index.md @@ -0,0 +1,70 @@ +# pgvector-python + +[pgvector](https://github.com/pgvector/pgvector) support for Python + +Supports [Django](https://github.com/django/django), [SQLAlchemy](https://github.com/sqlalchemy/sqlalchemy), [SQLModel](https://github.com/tiangolo/sqlmodel), [Psycopg 3](https://github.com/psycopg/psycopg), [Psycopg 2](https://github.com/psycopg/psycopg2), [asyncpg](https://github.com/MagicStack/asyncpg), [pg8000](https://github.com/tlocke/pg8000), and [Peewee](https://github.com/coleifer/peewee) + +[![Build Status](https://github.com/pgvector/pgvector-python/actions/workflows/build.yml/badge.svg)](https://github.com/pgvector/pgvector-python/actions) + +## Quick Start + +Install the package: + +```sh +pip install pgvector +``` + +Follow the instructions for your database library: + +- [Django](getting-started/installation.md#django) +- [SQLAlchemy](getting-started/installation.md#sqlalchemy) +- [SQLModel](getting-started/installation.md#sqlmodel) +- [Psycopg 3](getting-started/installation.md#psycopg-3) +- [Psycopg 2](getting-started/installation.md#psycopg-2) +- [asyncpg](getting-started/installation.md#asyncpg) +- [pg8000](getting-started/installation.md#pg8000) +- [Peewee](getting-started/installation.md#peewee) + +## Examples + +Check out these examples to get started: + +- [Retrieval-augmented generation](https://github.com/pgvector/pgvector-python/blob/master/examples/rag/example.py) with Ollama +- [Embeddings](examples/openai.md) with OpenAI +- [Binary embeddings](https://github.com/pgvector/pgvector-python/blob/master/examples/cohere/example.py) with Cohere +- [Sentence embeddings](https://github.com/pgvector/pgvector-python/blob/master/examples/sentence_transformers/example.py) with SentenceTransformers +- [Hybrid search](https://github.com/pgvector/pgvector-python/blob/master/examples/hybrid_search/rrf.py) with SentenceTransformers (Reciprocal Rank Fusion) +- [Hybrid search](https://github.com/pgvector/pgvector-python/blob/master/examples/hybrid_search/cross_encoder.py) with SentenceTransformers (cross-encoder) +- [Sparse search](https://github.com/pgvector/pgvector-python/blob/master/examples/sparse_search/example.py) with Transformers +- [Late interaction search](https://github.com/pgvector/pgvector-python/blob/master/examples/colbert/exact.py) with ColBERT +- [Visual document retrieval](https://github.com/pgvector/pgvector-python/blob/master/examples/colpali/exact.py) with ColPali +- [Image search](https://github.com/pgvector/pgvector-python/blob/master/examples/image_search/example.py) with PyTorch +- [Image search](https://github.com/pgvector/pgvector-python/blob/master/examples/imagehash/example.py) with perceptual hashing +- [Morgan fingerprints](https://github.com/pgvector/pgvector-python/blob/master/examples/rdkit/example.py) with RDKit +- [Topic modeling](https://github.com/pgvector/pgvector-python/blob/master/examples/gensim/example.py) with Gensim +- [Implicit feedback recommendations](https://github.com/pgvector/pgvector-python/blob/master/examples/implicit/example.py) with Implicit +- [Explicit feedback recommendations](https://github.com/pgvector/pgvector-python/blob/master/examples/surprise/example.py) with Surprise +- [Recommendations](https://github.com/pgvector/pgvector-python/blob/master/examples/lightfm/example.py) with LightFM +- [Horizontal scaling](https://github.com/pgvector/pgvector-python/blob/master/examples/citus/example.py) with Citus +- [Bulk loading](https://github.com/pgvector/pgvector-python/blob/master/examples/loading/example.py) with `COPY` + +## Features + +- **Multiple Database Adapters**: Works seamlessly with popular Python database libraries +- **Vector Operations**: Support for L2 distance, inner product, cosine distance, and more +- **Approximate Indexes**: HNSW and IVFFlat index support for fast similarity search +- **Vector Types**: Support for regular vectors, half-precision vectors, bit vectors, and sparse vectors +- **Batch Operations**: Efficient bulk loading and batch processing capabilities + +## Contributing + +Everyone is encouraged to help improve this project. Here are a few ways you can help: + +- [Report bugs](https://github.com/pgvector/pgvector-python/issues) +- Fix bugs and [submit pull requests](https://github.com/pgvector/pgvector-python/pulls) +- Write, clarify, or fix documentation +- Suggest or add new features + +## History + +View the [changelog](https://github.com/pgvector/pgvector-python/blob/master/CHANGELOG.md) diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..aae7dae --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,50 @@ +site_name: pgvector-python +site_description: pgvector support for Python +site_url: https://github.com/pgvector/pgvector-python +repo_url: https://github.com/pgvector/pgvector-python +repo_name: pgvector/pgvector-python + +theme: + name: material + palette: + - scheme: default + primary: indigo + accent: indigo + toggle: + icon: material/brightness-7 + name: Switch to dark mode + - scheme: slate + primary: indigo + accent: indigo + toggle: + icon: material/brightness-4 + name: Switch to light mode + features: + - navigation.tabs + - navigation.sections + - navigation.top + - search.suggest + - search.highlight + - content.code.copy + +markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - admonition + - pymdownx.details + - tables + - attr_list + - md_in_html + +nav: + - Home: index.md + - Getting Started: + - Installation: getting-started/installation.md + - Examples: + - OpenAI Embeddings: examples/openai.md + +plugins: + - search diff --git a/requirements.txt b/requirements.txt index a13be06..a138e86 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ pytest-asyncio scipy SQLAlchemy[asyncio]>=2 sqlmodel>=0.0.12 +mkdocs>=1.5.0 +mkdocs-material>=9.0.0 From f416dfe04bb922a5b338bc523e8f941a6cddb748 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 13 Jan 2026 13:26:23 +0000 Subject: [PATCH 3/3] Add documentation README for contributors Co-authored-by: jackrua <155536850+jackrua@users.noreply.github.com> --- docs/README.md | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docs/README.md diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..75c3dd1 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,42 @@ +# Documentation + +This directory contains the documentation for pgvector-python, built with [MkDocs](https://www.mkdocs.org/) and the [Material theme](https://squidfunk.github.io/mkdocs-material/). + +## Building the Documentation + +To build the documentation locally: + +```sh +pip install mkdocs mkdocs-material +make docs +``` + +The built documentation will be in the `site/` directory. + +## Serving the Documentation + +To serve the documentation locally for development: + +```sh +make docs-serve +``` + +This will start a development server at `http://127.0.0.1:8000/`. + +## Documentation Structure + +- `docs/index.md` - Home page +- `docs/getting-started/` - Getting started guides + - `installation.md` - Installation instructions for different database adapters +- `docs/examples/` - Example usage guides + - `openai.md` - OpenAI embeddings example + +## Adding New Pages + +1. Create a new Markdown file in the appropriate directory under `docs/` +2. Add the page to the navigation in `mkdocs.yml` +3. Build and test locally with `make docs-serve` + +## Configuration + +The documentation configuration is in `mkdocs.yml` at the root of the repository.