-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path20.ai-ddl.sql
More file actions
136 lines (122 loc) · 4.69 KB
/
20.ai-ddl.sql
File metadata and controls
136 lines (122 loc) · 4.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
-- =====================================================
-- INSTALL PG_Vector extension to PostgreSQL 18 Bookworm Edition
-- apt-get install -y --no-install-recommends postgresql-18-pgvector
--
--
-- Concept 1 — Embeddings are NOT text search
-- - SQL full-text search → flexible (LIKE, tsquery)
-- - embeddings → require vector representation
--
-- Concept 2 — Embedding generation is external
-- - PostgreSQL does NOT create embeddings
--
-- It only:
-- - stores them
-- - indexes them
-- - searches them
--
-- Concept 3 — Deterministic vs dynamic systems
-- Your function = deterministic system: input must exist → otherwise fail
-- Real AI system = dynamic: any input → generate embedding → always works
--
-- Why you added the EXCEPTION (and why it's actually good)
--
-- This enforces:
-- - correctness
-- - visibility (no silent failures)
-- - predictable behavior for demos
-- =====================================================
-- =====================================================
-- Enable pgvector Extension
-- Demonstrates: Activating vector support in PostgreSQL
-- =====================================================
CREATE EXTENSION IF NOT EXISTS vector;
-- =====================================================
-- Recreate Movies Table
-- Demonstrates: Storing movies together with embeddings
-- =====================================================
DROP TABLE IF EXISTS omdb.movies;
CREATE TABLE IF NOT EXISTS omdb.movies
(
id BIGINT PRIMARY KEY,
name TEXT NOT NULL,
description TEXT NOT NULL,
movie_embedding VECTOR(1024),
release_date DATE,
runtime INT,
budget NUMERIC,
revenue NUMERIC,
vote_average NUMERIC,
votes_count BIGINT
);
-- =================================================
-- Add Generated Full-Text Search Column
-- Demonstrates: Preparing hybrid search with full-text + vectors
-- =====================================================
ALTER TABLE omdb.movies
DROP COLUMN IF EXISTS search_vector;
ALTER TABLE omdb.movies
ADD COLUMN search_vector tsvector GENERATED ALWAYS AS (
setweight(to_tsvector('english', coalesce(name, '')), 'A') ||
setweight(to_tsvector('english', coalesce(description, '')), 'B')
) STORED;
-- =====================================================
-- Recreate Phrases Dictionary Table
-- Demonstrates: Dictionary of phrases mapped to embeddings
-- =====================================================
DROP TABLE IF EXISTS omdb.phrases_dictionary;
CREATE TABLE omdb.phrases_dictionary
(
phrase TEXT NOT NULL,
phrase_embedding VECTOR(1024)
);
-- =====================================================
-- Create Embedding Lookup Function
-- Demonstrates: Retrieving a precomputed embedding by phrase
-- =====================================================
CREATE OR REPLACE FUNCTION omdb.get_embedding(input_phrase TEXT)
RETURNS VECTOR(1024) AS
$$
DECLARE
embedding VECTOR(1024);
BEGIN
SELECT phrase_embedding
INTO embedding
FROM omdb.phrases_dictionary
WHERE LOWER(phrase) = LOWER(input_phrase);
IF NOT FOUND THEN
RAISE EXCEPTION
'The search phrase does not exist in the dictionary table.';
END IF;
RETURN embedding;
END;
$$ LANGUAGE plpgsql;
-- =====================================================
-- Create GIN Index for Full-Text Search
-- Demonstrates: Production-grade indexing for lexical search
-- =====================================================
CREATE INDEX IF NOT EXISTS idx_movies_search_vector
ON omdb.movies
USING GIN (search_vector);
-- =====================================================
-- Create HNSW Index for Cosine Similarity
-- Demonstrates: Fast approximate nearest-neighbor search for embeddings
-- =====================================================
CREATE INDEX IF NOT EXISTS idx_movies_embedding_hnsw_cosine
ON omdb.movies
USING hnsw (movie_embedding vector_cosine_ops);
-- =====================================================
-- Create HNSW Index for Inner Product Similarity
-- Demonstrates: Alternative ANN index for embeddings when inner product is preferred
-- =====================================================
CREATE INDEX IF NOT EXISTS idx_movies_embedding_hnsw_ip
ON omdb.movies
USING hnsw (movie_embedding vector_ip_ops);
-- =====================================================
-- Create IVFFlat Index for Cosine Similarity
-- Demonstrates: Another ANN indexing option requiring training/probing
-- =====================================================
CREATE INDEX IF NOT EXISTS idx_movies_embedding_ivfflat_cosine
ON omdb.movies
USING ivfflat (movie_embedding vector_cosine_ops)
WITH (lists = 100);