-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdemo.py
More file actions
77 lines (64 loc) · 2.49 KB
/
demo.py
File metadata and controls
77 lines (64 loc) · 2.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os
import ollama
from math import sqrt
# === CONFIGURATION ===
DATA_FILE = 'cat-facts.txt'
EMBEDDING_MODEL = 'hf.co/CompendiumLabs/bge-base-en-v1.5-gguf'
LANGUAGE_MODEL = 'hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF'
# === Load and Chunk the Dataset ===
dataset = []
with open(DATA_FILE, 'r', encoding='utf-8') as file:
dataset = [line.strip() for line in file if line.strip()]
# === Build the Vector Store ===
VECTOR_DB = []
def add_chunk_to_database(chunk):
embedding = ollama.embed(model=EMBEDDING_MODEL, input=chunk)['embeddings'][0]
VECTOR_DB.append((chunk, embedding))
for chunk in dataset:
add_chunk_to_database(chunk)
# === Cosine Similarity ===
def cosine_similarity(a, b):
dot = sum(x * y for x, y in zip(a, b))
norm_a = sqrt(sum(x * x for x in a))
norm_b = sqrt(sum(y * y for y in b))
return dot / (norm_a * norm_b)
# === Retrieval Function ===
def retrieve(query, top_n=3):
query_embedding = ollama.embed(model=EMBEDDING_MODEL, input=query)['embeddings'][0]
similarities = []
for chunk, embedding in VECTOR_DB:
similarity = cosine_similarity(query_embedding, embedding)
similarities.append((chunk, similarity))
similarities.sort(key=lambda x: x[1], reverse=True)
return similarities[:top_n]
# === Chatbot Response Generation ===
def generate_response(input_query):
retrieved_knowledge = retrieve(input_query)
print('\n📚 Retrieved knowledge:')
for chunk, similarity in retrieved_knowledge:
print(f' - (similarity: {similarity:.2f}) {chunk}')
instruction_prompt = f"""You are a helpful chatbot.
Use only the following pieces of context to answer the question. Don't make up any new information:
{chr(10).join([f' - {chunk}' for chunk, _ in retrieved_knowledge])}
"""
stream = ollama.chat(
model=LANGUAGE_MODEL,
messages=[
{'role': 'system', 'content': instruction_prompt},
{'role': 'user', 'content': input_query},
],
stream=True,
)
print('\n🤖 Chatbot response:')
for chunk in stream:
print(chunk['message']['content'], end='', flush=True)
# === Main ===
if __name__ == '__main__':
while True:
try:
input_query = input('\n\n📝 Ask me a question (or type "exit"): ')
if input_query.strip().lower() == 'exit':
break
generate_response(input_query)
except KeyboardInterrupt:
break