Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 47 additions & 13 deletions CAG.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@
from colorama import Fore, Style, init
import json
import time
import logging

# ------------------ Logging Setup ------------------
logging.basicConfig(
level=logging.INFO, # Change to DEBUG if you want detailed logs
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler("extinct_animals.log"), # log to file
logging.StreamHandler() # also print to console
]
)

# Initialize colorama
init(autoreset=True)
Expand All @@ -15,41 +26,63 @@

# Step 1: Extract Text from PDF
def extract_text_from_pdf(pdf_path):
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
return ''.join([page.extract_text() for page in reader.pages])
try:
logging.info(f"Extracting text from PDF: {pdf_path}")
with open(pdf_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ''.join([page.extract_text() for page in reader.pages])
logging.info("PDF text extraction successful")
return text
except Exception as e:
logging.error(f"Failed to extract PDF: {e}")
return ""

# Step 2: Initialize Knowledge Cache
extinct_animals_pdf = "Extinct Animals.pdf"
knowledge_cache = extract_text_from_pdf(extinct_animals_pdf)[:6000] # Truncate to fit context limit

if not knowledge_cache:
logging.warning("Knowledge cache is empty. Check the PDF file.")

# Step 3: Set Up Groq API Client
GROQ_API_KEY = os.environ["GROQ_API_KEY"]
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
if not GROQ_API_KEY:
logging.error("GROQ_API_KEY not found in environment variables!")
else:
logging.info("Groq API key loaded successfully")

async def generate_response_async(prompt):
async with AsyncGroq(api_key=GROQ_API_KEY) as client:
chat_completion = await client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.1-8b-instant",
max_tokens=1024,
)
return chat_completion.choices[0].message.content.strip()
try:
async with AsyncGroq(api_key=GROQ_API_KEY) as client:
logging.debug("Sending request to Groq API...")
chat_completion = await client.chat.completions.create(
messages=[{"role": "user", "content": prompt}],
model="llama-3.1-8b-instant",
max_tokens=1024,
)
response = chat_completion.choices[0].message.content.strip()
logging.info("Groq API response received")
return response
except Exception as e:
logging.error(f"Error generating response: {e}")
return "Sorry, I couldn't process your request."

# Step 4: Implement CAG Logic
async def extinct_animal_query(question: str) -> str:
logging.info(f"Processing question: {question}")
prompt = f"""EXTINCT ANIMAL KNOWLEDGE BASE:
{knowledge_cache}

QUESTION: {question}
ANSWER:"""

response = await generate_response_async(prompt)
logging.info(f"Answer generated: {response[:100]}...") # log first 100 chars
return response

# Step 5: Save Interaction History to JSON
def save_to_history(question, answer):
history_file = 'history.json'

# This is save history
# Load existing history if it exists
if os.path.exists(history_file):
with open(history_file, 'r') as file:
Expand Down Expand Up @@ -92,3 +125,4 @@ def save_to_history(question, answer):

except Exception as e:
print(Fore.RED + f"An error occurred: {str(e)}")

4 changes: 4 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
# Load environment variables
load_dotenv()

for i n range(10):
print(i)

# --- Setup ---
def extract_text_from_pdf(pdf_path):
with open(pdf_path, 'rb') as file:
Expand Down Expand Up @@ -139,3 +142,4 @@ async def augmented_generation(question: str, knowledge_cache: str, groq_api_key

# Add assistant response to chat history
st.session_state["chat_history"].append({"role": "assistant", "content": full_response})