diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..7672523 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,157 @@ +# KnowledgeSpace Agent - Examples + +This directory contains minimal, reproducible examples to help new contributors and GSoC students understand how the KnowledgeSpace AI Agent works. + +## Prerequisites + +- **Python**: 3.11 or higher +- **Google API Key**: Required for Gemini LLM (get one free at [Google AI Studio](https://aistudio.google.com/apikey)) + +## Quick Setup + +### 1. Install Dependencies + +From the project root: + +```bash +# Using UV (recommended) +uv sync + +# Or using pip +pip install -r requirements.txt +``` + +### 2. Set Environment Variables + +Create a `.env` file in the project root (or set environment variables): + +```bash +# Required: Your Google Gemini API key +GOOGLE_API_KEY=your_api_key_here + +# Use standard Gemini API (not Vertex AI) +GEMINI_USE_VERTEX=false +``` + +> **Note:** The examples require only `GOOGLE_API_KEY` to run. Other environment variables (BigQuery, Vertex AI Vector Search) are optional and only needed for full production functionality. + +## Running the Examples + +### Basic Demo Script + +```bash +# From the project root +cd examples +python basic_demo.py +``` + +### What to Expect + +The script will: + +1. **Initialize** the NeuroscienceAssistant agent +2. **Send** a sample query about neuroscience datasets +3. **Display**: + - The final synthesized response produced by the agent + - (Optionally) selected intermediate signals for learning and debugging purposes + +**Example Output (illustrative):** + +``` +============================================================ +KnowledgeSpace Agent - Basic Demo +============================================================ + +Initializing the NeuroscienceAssistant... +āœ“ Agent initialized successfully + +Sending query: "Find datasets about hippocampus neurons in mice" + +Processing... (this may take a few seconds) + +--- Agent Response --- +### šŸ”¬ Neuroscience Datasets Found + +#### 1. Mouse Hippocampus CA1 Recordings +- **Source:** DANDI Archive +- **Description:** Extracellular recordings from hippocampal neurons... +... +``` + +## Files in This Directory + +| File | Purpose | +|------|---------| +| `README.md` | This setup guide | +| `basic_demo.py` | Minimal Python script demonstrating agent usage | +| `local_knowledge.json` | Sample mock dataset entries (for reference only) | + +> **Note:** `local_knowledge.json` is provided as illustrative sample data for contributors; the current agent uses remote KnowledgeSpace APIs and does not directly load this file. + +## Understanding the Agent Workflow + +A simplified view of the agent's high-level workflow: + +> This diagram is a conceptual overview intended for learning and onboarding purposes. + +``` +User Query + ↓ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ 1. Extract Keywords │ ← Gemini extracts search terms +│ 2. Detect Intents │ ← Classify query type (data discovery, etc.) +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + ↓ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ 3. Execute Search │ ← Query KnowledgeSpace API + Vector DB +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + ↓ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ 4. Fuse Results │ ← Combine and rank results +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + ↓ +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ 5. Synthesize │ ← Gemini generates natural language response +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ + ↓ +Final Response +``` + +## Troubleshooting + +### "GOOGLE_API_KEY must be set" + +Make sure you've set the environment variable: + +```bash +# Windows PowerShell +$env:GOOGLE_API_KEY = "your_key_here" + +# Windows CMD +set GOOGLE_API_KEY=your_key_here + +# Linux/macOS +export GOOGLE_API_KEY=your_key_here +``` + +### Import errors + +Make sure you're running from the correct directory and dependencies are installed: + +```bash +cd knowledge-space-agent +uv sync # or pip install -e . +cd examples +python basic_demo.py +``` + +### Rate limiting + +If you see rate limit errors, wait a few seconds and try again. The free Gemini API tier has request limits. + +## Next Steps + +- Explore `backend/agents.py` to understand the full agent implementation +- Check `backend/ks_search_tool.py` for KnowledgeSpace API integration +- Visit the [hosted demo](https://chat.knowledge-space.org/) to see the full application + diff --git a/examples/basic_demo.py b/examples/basic_demo.py new file mode 100644 index 0000000..2b1e812 --- /dev/null +++ b/examples/basic_demo.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3 +""" +KnowledgeSpace Agent - Basic Demo +================================= + +This script demonstrates how to use the KnowledgeSpace AI Agent programmatically. +It shows the core workflow: initializing the agent, sending a query, and +inspecting the response. + +Requirements: + - Python 3.11+ + - Dependencies installed (uv sync or pip install) + - GOOGLE_API_KEY environment variable set + +Usage: + cd examples + python basic_demo.py + +For more details, see examples/README.md +""" + +import os +import sys +import asyncio + +# Add the backend directory to Python path so we can import the agent +# This allows running the script from the examples/ directory +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "backend")) + +# Load environment variables from .env file if it exists +try: + from dotenv import load_dotenv + # Look for .env in project root + env_path = os.path.join(os.path.dirname(__file__), "..", ".env") + load_dotenv(env_path) +except ImportError: + pass # dotenv is optional + + +def print_separator(title: str = "") -> None: + """Print a visual separator for better output readability.""" + print("\n" + "=" * 60) + if title: + print(f" {title}") + print("=" * 60) + + +def check_environment() -> bool: + """ + Verify that required environment variables are set. + Returns True if environment is properly configured. + """ + # Check for API key + api_key = os.getenv("GOOGLE_API_KEY") + use_vertex = os.getenv("GEMINI_USE_VERTEX", "false").lower() in ("true", "1", "yes") + + if use_vertex: + # Vertex AI mode requires GCP_PROJECT_ID + project_id = os.getenv("GCP_PROJECT_ID") + if not project_id: + print("āŒ Error: GEMINI_USE_VERTEX is enabled but GCP_PROJECT_ID is not set.") + print(" Set GCP_PROJECT_ID or disable Vertex mode with GEMINI_USE_VERTEX=false") + return False + print(f"āœ“ Using Vertex AI mode (project: {project_id})") + else: + # Standard API key mode + if not api_key: + print("āŒ Error: GOOGLE_API_KEY environment variable is not set.") + print("") + print("To fix this:") + print(" 1. Get a free API key from: https://aistudio.google.com/apikey") + print(" 2. Set the environment variable:") + print("") + print(" Windows PowerShell:") + print(' $env:GOOGLE_API_KEY = "your_key_here"') + print("") + print(" Windows CMD:") + print(" set GOOGLE_API_KEY=your_key_here") + print("") + print(" Linux/macOS:") + print(" export GOOGLE_API_KEY=your_key_here") + print("") + print(" Or add it to a .env file in the project root.") + return False + print("āœ“ Using Google API Key mode") + + return True + + +async def run_demo() -> None: + """ + Main demo function that shows how to use the KnowledgeSpace Agent. + + This demonstrates: + 1. Initializing the NeuroscienceAssistant + 2. Sending a sample neuroscience query + 3. Displaying the response + """ + + print_separator("KnowledgeSpace Agent - Basic Demo") + + # Step 1: Check environment + print("\nChecking environment configuration...") + if not check_environment(): + return + + # Step 2: Import and initialize the agent + # Note: We import here (after path setup) to avoid import errors + print("\nInitializing the NeuroscienceAssistant...") + + try: + from agents import NeuroscienceAssistant + assistant = NeuroscienceAssistant() + print("āœ“ Agent initialized successfully") + except ImportError as e: + print(f"āŒ Import error: {e}") + print(" Make sure you've installed dependencies: uv sync") + return + except Exception as e: + print(f"āŒ Initialization error: {e}") + return + + # Step 3: Define a sample query + # You can modify this to test different queries + sample_queries = [ + "Find datasets about hippocampus neurons in mice", + # Alternative queries you can try: + # "Show me human EEG datasets with BIDS format", + # "What fMRI datasets are available with CC0 license?", + # "Find electrophysiology recordings from rat prefrontal cortex", + ] + + query = sample_queries[0] + + print(f'\nSending query: "{query}"') + print("\nProcessing... (this may take a few seconds)") + + # Step 4: Send the query to the agent + # The handle_chat method is async, so we await it + try: + response = await assistant.handle_chat( + session_id="demo_session", # Unique session ID for conversation history + query=query, + reset=True, # Start fresh (clear any previous conversation) + ) + except Exception as e: + print(f"\nāŒ Error during query processing: {e}") + print(" This might be due to API rate limits or network issues.") + print(" Wait a moment and try again.") + return + + # Step 5: Display the response + print_separator("Agent Response") + print(response) + + # Step 6: (Optional) Inspect internal session state for learning/debugging + # + # NOTE: + # The following section accesses internal session memory for educational purposes. + # This is NOT part of the public API and may change in future versions. + # New users can safely ignore this section. + print_separator("Session Details (for debugging)") + + session_memory = {} + + if hasattr(assistant, "session_memory"): + session_memory = assistant.session_memory.get("demo_session", {}) + + if session_memory: + print(f"\nšŸ“Œ Effective Query: {session_memory.get('effective_query', 'N/A')}") + # Best-effort fields: availability may vary depending on agent configuration + print(f"šŸ“Œ Detected Intents: {session_memory.get('intents', [])}") + print(f"šŸ“Œ Extracted Keywords: {session_memory.get('keywords', [])}") + print(f"šŸ“Œ Total Results Found: {len(session_memory.get('all_results', []))}") + else: + print("(No session memory available)") + + print("\nāœ“ Demo completed successfully!") + print("\nNext steps:") + print(" - Modify the 'query' variable above to try different searches") + print(" - Explore backend/agents.py to understand the full implementation") + print(" - Visit https://chat.knowledge-space.org/ for the full web interface") + + +def main(): + """Entry point for the demo script.""" + try: + # Run the async demo function + asyncio.run(run_demo()) + except KeyboardInterrupt: + print("\n\nDemo interrupted by user.") + except Exception as e: + print(f"\nāŒ Unexpected error: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main() diff --git a/examples/local_knowledge.json b/examples/local_knowledge.json new file mode 100644 index 0000000..1747e36 --- /dev/null +++ b/examples/local_knowledge.json @@ -0,0 +1,138 @@ +{ + "description": "Sample neuroscience dataset entries for demonstration purposes only. These are simplified mock entries to help understand the data structure.", + "datasets": [ + { + "id": "demo_001", + "title": "Mouse Hippocampus Electrophysiology Dataset", + "description": "Extracellular recordings from CA1 pyramidal neurons in freely moving mice during spatial navigation tasks. Includes spike times and local field potentials.", + "datasource_name": "DANDI Archive", + "species": [ + "Mus musculus" + ], + "brain_regions": [ + "Hippocampus", + "CA1" + ], + "technique": "Electrophysiology", + "data_format": [ + "NWB" + ], + "license": "CC-BY-4.0", + "subjects": 12, + "authors": [ + "Smith, J.", + "Chen, L.", + "Garcia, M." + ], + "year": 2023, + "url": "https://dandiarchive.org/example/001" + }, + { + "id": "demo_002", + "title": "Human fMRI Visual Cortex Responses", + "description": "BOLD fMRI responses to natural images in human visual cortex. Includes V1, V2, V3, and V4 ROI data with stimulus timing.", + "datasource_name": "OpenNeuro", + "species": [ + "Homo sapiens" + ], + "brain_regions": [ + "Visual Cortex", + "V1", + "V2", + "V4" + ], + "technique": "fMRI", + "data_format": [ + "BIDS", + "NIfTI" + ], + "license": "CC0", + "subjects": 24, + "authors": [ + "Johnson, A.", + "Williams, B." + ], + "year": 2024, + "url": "https://openneuro.org/example/002" + }, + { + "id": "demo_003", + "title": "Rat Prefrontal Cortex Single-Unit Recordings", + "description": "Single-unit recordings from medial prefrontal cortex during decision-making tasks. Includes behavioral timestamps and neural spike data.", + "datasource_name": "NeuroElectro", + "species": [ + "Rattus norvegicus" + ], + "brain_regions": [ + "Prefrontal Cortex", + "mPFC" + ], + "technique": "Single-unit recording", + "data_format": [ + "NWB", + "HDF5" + ], + "license": "PDDL", + "subjects": 8, + "authors": [ + "Lee, K.", + "Brown, R.", + "Davis, S.", + "Miller, T." + ], + "year": 2023, + "url": "https://neuroelectro.org/example/003" + }, + { + "id": "demo_004", + "title": "Primate Motor Cortex Neural Population Dataset", + "description": "Multi-electrode array recordings from primary motor cortex during reaching movements. Contains population spiking activity and hand kinematics.", + "datasource_name": "Brain/MINDS", + "species": [ + "Macaca mulatta" + ], + "brain_regions": [ + "Motor Cortex", + "M1" + ], + "technique": "Multi-electrode array", + "data_format": [ + "NWB" + ], + "license": "CC-BY-4.0", + "subjects": 2, + "authors": [ + "Tanaka, Y.", + "Yamamoto, H." + ], + "year": 2022, + "url": "https://brainminds.org/example/004" + }, + { + "id": "demo_005", + "title": "Human EEG Sleep Stage Dataset", + "description": "High-density EEG recordings during sleep with expert-labeled sleep stages. Includes 64-channel recordings across full night sessions.", + "datasource_name": "EBRAINS", + "species": [ + "Homo sapiens" + ], + "brain_regions": [ + "Whole brain" + ], + "technique": "EEG", + "data_format": [ + "BIDS", + "EDF" + ], + "license": "CC-BY-SA-4.0", + "subjects": 50, + "authors": [ + "Mueller, F.", + "Schmidt, P.", + "Weber, A." + ], + "year": 2024, + "url": "https://ebrains.eu/example/005" + } + ] +} \ No newline at end of file