deepakdgupta1
diff --git a/‎src/knowcode/background_indexer.py‎
Lines changed: 12 additions & 2 deletions b/‎src/knowcode/background_indexer.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎src/knowcode/chunk_repository.py‎
Lines changed: 5 additions & 0 deletions b/‎src/knowcode/chunk_repository.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/knowcode/chunker.py‎
Lines changed: 31 additions & 10 deletions b/‎src/knowcode/chunker.py‎
Lines changed: 31 additions & 10 deletions
diff --git a/‎src/knowcode/cli.py‎
Lines changed: 3 additions & 3 deletions b/‎src/knowcode/cli.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/knowcode/completeness.py‎
Lines changed: 10 additions & 2 deletions b/‎src/knowcode/completeness.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎src/knowcode/embedding.py‎
Lines changed: 17 additions & 0 deletions b/‎src/knowcode/embedding.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/knowcode/hybrid_index.py‎
Lines changed: 17 additions & 2 deletions b/‎src/knowcode/hybrid_index.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎src/knowcode/indexer.py‎
Lines changed: 33 additions & 4 deletions b/‎src/knowcode/indexer.py‎
Lines changed: 33 additions & 4 deletions
@@ -12,6 +12,11 @@ class BackgroundIndexer:
     """Runs indexing in background thread."""
 
     def __init__(self, indexer: Indexer) -> None:
+        """Initialize the background worker with an Indexer instance.
+
+        Args:
+            indexer: Indexer used to process queued files.
+        """
         self.indexer = indexer
         self._queue: queue.Queue = queue.Queue()
         self._thread: Optional[threading.Thread] = None
@@ -31,16 +36,21 @@ def stop(self) -> None:
             self._thread.join(timeout=5.0)
 
     def queue_file(self, path: Path) -> None:
-        """Queue a file for indexing."""
+        """Queue a file for indexing.
+
+        Args:
+            path: File path to enqueue for processing.
+        """
         self._queue.put(path)
 
     def _worker(self) -> None:
-        """Worker thread that processes indexing queue."""
+        """Worker thread that processes the indexing queue."""
         while self._running:
             try:
                 # Use timeout to allow checking self._running
                 path = self._queue.get(timeout=1.0)
                 if path is None:
+                    self._queue.task_done()
                     break
                 self.indexer.index_file(path)
                 self._queue.task_done()
 
@@ -39,24 +39,29 @@ class InMemoryChunkRepository(ChunkRepository):
     """In-memory implementation of ChunkRepository."""
 
     def __init__(self) -> None:
+        """Initialize the in-memory storage structures."""
         self._chunks: dict[str, CodeChunk] = {}
         self._by_entity: dict[str, list[str]] = {}  # entity_id -> chunk_ids
 
     def add(self, chunk: CodeChunk) -> None:
+        """Add a chunk to the in-memory index."""
         self._chunks[chunk.id] = chunk
         if chunk.entity_id not in self._by_entity:
             self._by_entity[chunk.entity_id] = []
         if chunk.id not in self._by_entity[chunk.entity_id]:
             self._by_entity[chunk.entity_id].append(chunk.id)
 
     def get(self, chunk_id: str) -> Optional[CodeChunk]:
+        """Fetch a chunk by its ID."""
         return self._chunks.get(chunk_id)
 
     def get_by_entity(self, entity_id: str) -> list[CodeChunk]:
+        """Return all chunks associated with an entity."""
         chunk_ids = self._by_entity.get(entity_id, [])
         return [self._chunks[cid] for cid in chunk_ids if cid in self._chunks]
 
     def search_by_tokens(self, tokens: list[str], limit: int = 10) -> list[CodeChunk]:
+        """Perform a simple token-overlap search over stored chunks."""
         # Simple token overlap scoring
         scores: list[tuple[float, CodeChunk]] = []
         query_set = set(tokens)
 
@@ -12,14 +12,21 @@
 
 
 class Chunker:
-    """Chunks code entities into smaller units."""
+    """Chunks code entities into smaller, searchable units."""
 
     def __init__(self, config: Optional[ChunkingConfig] = None) -> None:
         self.config = config or ChunkingConfig()
         self.chunks: list[CodeChunk] = []
 
     def process_parse_result(self, result: ParseResult) -> list[CodeChunk]:
-        """Process a ParseResult and generate chunks."""
+        """Convert a ParseResult into a list of CodeChunk objects.
+
+        Args:
+            result: Parsed entities, relationships, and errors for a single file.
+
+        Returns:
+            List of generated CodeChunk objects in priority order.
+        """
         self.chunks = []  # Single initialization at start of process
 
         file_path = result.file_path
@@ -42,16 +49,19 @@ def process_parse_result(self, result: ParseResult) -> list[CodeChunk]:
 
         # 2. Entity Chunks (Classes, Functions, Methods)
         for entity in result.entities:
-            if entity.kind == EntityKind.MODULE:
-                continue
             if entity.kind == EntityKind.MODULE:
                 continue
             self._chunk_entity(entity, last_modified)
 
         return self.chunks
 
     def _emit_module_chunks(self, file_path: str, source: str) -> None:
-        """Extract module-level header and imports."""
+        """Extract module-level header and imports into dedicated chunks.
+
+        Args:
+            file_path: File path used to namespace chunk IDs.
+            source: Full source code for the module.
+        """
         # Module Header
         header = self._extract_module_header(source)
         if header:
@@ -77,7 +87,7 @@ def _emit_module_chunks(self, file_path: str, source: str) -> None:
             self.chunks.append(import_chunk)
 
     def _extract_module_header(self, source: str) -> str:
-        """Extract first docstring and module definition."""
+        """Extract the leading module header and docstring block."""
         lines = source.splitlines()
         header_lines = []
         in_docstring = False
@@ -112,7 +122,7 @@ def _extract_module_header(self, source: str) -> str:
         return "\n".join(header_lines).strip()
 
     def _extract_imports(self, source: str) -> str:
-        """Extract all import statements."""
+        """Extract all import statements from the source."""
         lines = []
         for line in source.splitlines():
             stripped = line.strip()
@@ -121,7 +131,12 @@ def _extract_imports(self, source: str) -> str:
         return "\n".join(lines).strip()
 
     def _chunk_entity(self, entity: Entity, last_modified: Optional[str] = None) -> None:
-        """Create chunks for an entity."""
+        """Create chunks for an entity and append them to the in-memory list.
+
+        Args:
+            entity: Entity to chunk (class, function, method, etc.).
+            last_modified: Optional timestamp used for ranking signals.
+        """
         content = ""
 
         if self.config.include_signatures and entity.signature:
@@ -136,8 +151,10 @@ def _chunk_entity(self, entity: Entity, last_modified: Optional[str] = None) ->
             content += entity.name
 
         # Sliding window chunking
+        has_docstring = "true" if entity.docstring else "false"
+
         if len(content) <= self.config.max_chunk_size:
-            metadata = {"kind": entity.kind.value}
+            metadata = {"kind": entity.kind.value, "has_docstring": has_docstring}
             if last_modified:
                 metadata["last_modified"] = last_modified
 
@@ -157,7 +174,11 @@ def _chunk_entity(self, entity: Entity, last_modified: Optional[str] = None) ->
                 end = min(start + self.config.max_chunk_size, len(content))
                 chunk_content = content[start:end]
 
-                metadata = {"kind": entity.kind.value, "chunk_index": str(chunk_index)}
+                metadata = {
+                    "kind": entity.kind.value,
+                    "chunk_index": str(chunk_index),
+                    "has_docstring": has_docstring,
+                }
                 if last_modified:
                     metadata["last_modified"] = last_modified
 
 
@@ -8,7 +8,7 @@
 import click
 
 from knowcode import __version__
-from knowcode.models import EntityKind
+from knowcode.models import EntityKind, RelationshipKind
 from knowcode.service import KnowCodeService
 from knowcode.knowledge_store import KnowledgeStore
 
@@ -431,7 +431,7 @@ def history(target: Optional[str], store: str, limit: int) -> None:
             author_rels = knowledge.get_incoming_relationships(commit.id)
             author = "Unknown"
             for rel in author_rels:
-                if rel.kind == "authored":
+                if rel.kind == RelationshipKind.AUTHORED:
                     # rel.source_id is author
                     a_ent = knowledge.get_entity(rel.source_id)
                     if a_ent:
@@ -460,7 +460,7 @@ def history(target: Optional[str], store: str, limit: int) -> None:
         rels = knowledge.get_outgoing_relationships(entity.id)
         changes = []
         for rel in rels:
-            if rel.kind == "changed_by":
+            if rel.kind == RelationshipKind.CHANGED_BY:
                 commit = knowledge.get_entity(rel.target_id)
                 if commit:
                     # Get modification stats from edge metadata
 
@@ -15,10 +15,18 @@ def expand_dependencies(
     knowledge_store: KnowledgeStore,
     max_depth: int = 1
 ) -> list[CodeChunk]:
-    """Expand chunk to include dependency context.
-    
+    """Expand a chunk to include dependency context.
     Uses knowledge graph to find related entities,
     then retrieves their chunks.
+
+    Args:
+        chunk: Starting chunk whose dependencies should be expanded.
+        chunk_repo: Repository used to fetch chunks by entity.
+        knowledge_store: Graph store used to resolve dependencies.
+        max_depth: Depth of dependency expansion (1 = direct callees only).
+
+    Returns:
+        List of chunks including the input chunk and its dependencies.
     """
     expanded: list[CodeChunk] = [chunk]
     visited: set[str] = {chunk.entity_id}
 
@@ -12,6 +12,7 @@ class EmbeddingProvider(ABC):
     """Abstract interface for generating embeddings."""
 
     def __init__(self, config: EmbeddingConfig) -> None:
+        """Initialize the provider with the embedding configuration."""
         self.config = config
 
     @abstractmethod
@@ -29,6 +30,11 @@ class OpenAIEmbeddingProvider(EmbeddingProvider):
     """OpenAI embedding provider."""
 
     def __init__(self, config: EmbeddingConfig) -> None:
+        """Create an OpenAI-backed embedding provider.
+
+        Args:
+            config: Embedding configuration settings.
+        """
         super().__init__(config)
         api_key = os.environ.get("OPENAI_API_KEY")
         if not api_key:
@@ -38,6 +44,7 @@ def __init__(self, config: EmbeddingConfig) -> None:
             self.client = OpenAI(api_key=api_key)
 
     def _get_client(self) -> OpenAI:
+        """Return an initialized OpenAI client, loading credentials if needed."""
         if not self.client:
             api_key = os.environ.get("OPENAI_API_KEY")
             if not api_key:
@@ -46,6 +53,14 @@ def _get_client(self) -> OpenAI:
         return self.client
 
     def embed(self, texts: list[str]) -> list[list[float]]:
+        """Generate embeddings for a batch of texts.
+
+        Args:
+            texts: Input texts to embed.
+
+        Returns:
+            List of embedding vectors (one per input).
+        """
         if not texts:
             return []
 
@@ -62,9 +77,11 @@ def embed(self, texts: list[str]) -> list[list[float]]:
         return embeddings
 
     def embed_single(self, text: str) -> list[float]:
+        """Generate an embedding for a single text input."""
         return self.embed([text])[0]
 
     def _normalize(self, vec: list[float]) -> list[float]:
+        """Normalize a vector to unit length for cosine similarity."""
         import math
         norm = math.sqrt(sum(x*x for x in vec))
         return [x / norm for x in vec] if norm > 0 else vec
@@ -17,6 +17,13 @@ def __init__(
         vector_store: VectorStore,
         alpha: float = 0.5  # Weight for dense vs sparse (0.5 = equal weight)
     ) -> None:
+        """Initialize the hybrid index.
+
+        Args:
+            chunk_repo: Repository providing BM25-style token search.
+            vector_store: Dense vector store for semantic similarity.
+            alpha: Blend weight for dense vs sparse results.
+        """
         self.chunk_repo = chunk_repo
         self.vector_store = vector_store
         self.alpha = alpha
@@ -27,9 +34,17 @@ def search(
         query_embedding: list[float],
         limit: int = 10
     ) -> list[tuple[CodeChunk, float]]:
-        """Search using hybrid retrieval.
-        
+        """Search using hybrid retrieval. 
+        Combines BM25 sparse retrieval with dense vector search.         
         Returns chunks with combined scores using Reciprocal Rank Fusion (RRF).
+
+        Args:
+            query: Raw query string for sparse matching.
+            query_embedding: Dense embedding of the query.
+            limit: Maximum number of chunks to return.
+
+        Returns:
+            List of (chunk, score) tuples ranked by reciprocal rank fusion.
         """
         # 1. BM25 Search
         query_tokens = tokenize_code(query)
 
@@ -23,13 +23,27 @@ def __init__(
         chunk_repo: Optional[InMemoryChunkRepository] = None,
         vector_store: Optional[VectorStore] = None,
     ) -> None:
+        """Initialize an indexer with optional storage backends.
+
+        Args:
+            embedding_provider: Provider used to generate chunk embeddings.
+            chunk_repo: Optional chunk repository (defaults to in-memory).
+            vector_store: Optional vector store (defaults to FAISS-backed store).
+        """
         self.embedding_provider = embedding_provider
         self.chunk_repo = chunk_repo or InMemoryChunkRepository()
         self.vector_store = vector_store or VectorStore(dimension=embedding_provider.config.dimension)
         self.chunker = Chunker()
 
     def index_directory(self, root_dir: str | Path) -> int:
-        """Index all files in a directory."""
+        """Index all supported files under a directory.
+
+        Args:
+            root_dir: Root directory to scan for supported files.
+
+        Returns:
+            Total number of chunks added to the index.
+        """
         root_path = Path(root_dir)
 
         # Use existing GraphBuilder to get semantic entities
@@ -67,7 +81,11 @@ def index_directory(self, root_dir: str | Path) -> int:
         return total_chunks
 
     def save(self, path: str | Path) -> None:
-        """Save the entire index to disk."""
+        """Persist vector index and chunk metadata to disk.
+
+        Args:
+            path: Directory path to write index files into.
+        """
         path = Path(path)
         path.mkdir(parents=True, exist_ok=True)
 
@@ -92,7 +110,11 @@ def save(self, path: str | Path) -> None:
             json.dump(metadata, f)
 
     def load(self, path: str | Path) -> None:
-        """Load the entire index from disk."""
+        """Load the entire vector index and chunk metadata from disk.
+
+        Args:
+            path: Directory path containing previously saved index files.
+        """
         path = Path(path)
 
         # Load vector store
@@ -111,7 +133,14 @@ def load(self, path: str | Path) -> None:
                     self.chunk_repo.add(chunk)
 
     def index_file(self, file_path: str | Path) -> int:
-        """Index a single file (incremental)."""
+        """Index a single file for incremental updates.
+
+        Args:
+            file_path: File path to process.
+
+        Returns:
+            Number of chunks created for the file.
+        """
         file_path = Path(file_path)
         # Simplified for Task 3.6
         builder = GraphBuilder()