graphrag · pull · Feb 28, 2026 · Feb 27, 2026 · Feb 27, 2026
diff --git a/.semversioner/3.0.5.json b/.semversioner/3.0.5.json
@@ -0,0 +1,18 @@
+{
+  "changes": [
+    {
+      "description": "fix csv reader",
+      "type": "patch"
+    },
+    {
+      "description": "update version",
+      "type": "patch"
+    },
+    {
+      "description": "vector load_documents in batches",
+      "type": "patch"
+    }
+  ],
+  "created_at": "2026-02-27T21:01:53+00:00",
+  "version": "3.0.5"
+}
diff --git a/.semversioner/next-release/patch-20260224222358053700.json b/.semversioner/next-release/patch-20260224222358053700.json
diff --git a/.semversioner/next-release/patch-20260225001919068435.json b/.semversioner/next-release/patch-20260225001919068435.json
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,12 @@
 # Changelog
 Note: version releases in the 0.x.y range may introduce breaking changes.
 
+## 3.0.5
+
+- patch: fix csv reader
+- patch: update version
+- patch: vector load_documents in batches
+
 ## 3.0.4
 
 - patch: fix versions release

diff --git a/dictionary.txt b/dictionary.txt
@@ -133,6 +133,7 @@ retryer
 agenerate
 dropna
 notna
+upserted
 
 # LLM Terms
 AOAI

diff --git a/packages/graphrag-cache/pyproject.toml b/packages/graphrag-cache/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "graphrag-cache"
-version = "3.0.4"
+version = "3.0.5"
 description = "GraphRAG cache package."
 authors = [
     {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
@@ -31,8 +31,8 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "graphrag-common==3.0.4",
-    "graphrag-storage==3.0.4",
+    "graphrag-common==3.0.5",
+    "graphrag-storage==3.0.5",
 ]
 
 [project.urls]

diff --git a/packages/graphrag-chunking/pyproject.toml b/packages/graphrag-chunking/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "graphrag-chunking"
-version = "3.0.4"
+version = "3.0.5"
 description = "Chunking utilities for GraphRAG"
 authors = [
     {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
@@ -30,7 +30,7 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "graphrag-common==3.0.4",
+    "graphrag-common==3.0.5",
     "pydantic~=2.10",
 ]
 

diff --git a/packages/graphrag-common/pyproject.toml b/packages/graphrag-common/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "graphrag-common"
-version = "3.0.4"
+version = "3.0.5"
 description = "Common utilities and types for GraphRAG"
 authors = [
     {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},

diff --git a/packages/graphrag-input/pyproject.toml b/packages/graphrag-input/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "graphrag-input"
-version = "3.0.4"
+version = "3.0.5"
 description = "Input document loading utilities for GraphRAG"
 authors = [
     {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
@@ -30,8 +30,8 @@ classifiers = [
     "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
-    "graphrag-common==3.0.4",
-    "graphrag-storage==3.0.4    ",
+    "graphrag-common==3.0.5",
+    "graphrag-storage==3.0.5    ",
     "pydantic~=2.10",
     "markitdown~=0.1.0",
     "markitdown[pdf]"

diff --git a/packages/graphrag-llm/pyproject.toml b/packages/graphrag-llm/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "graphrag-llm"
-version = "3.0.4"
+version = "3.0.5"
 description = "GraphRAG LLM package."
 authors = [
     {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
@@ -33,8 +33,8 @@ classifiers = [
 ]
 dependencies = [
     "azure-identity~=1.25",
-    "graphrag-cache==3.0.4",
-    "graphrag-common==3.0.4",
+    "graphrag-cache==3.0.5",
+    "graphrag-common==3.0.5",
     "jinja2~=3.1",
     "litellm~=1.80",
     "nest-asyncio2~=1.7",

diff --git a/packages/graphrag-storage/pyproject.toml b/packages/graphrag-storage/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "graphrag-storage"
-version = "3.0.4"
+version = "3.0.5"
 description = "GraphRAG storage package."
 authors = [
     {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
@@ -34,7 +34,7 @@ dependencies = [
     "azure-cosmos~=4.9",
     "azure-identity~=1.25",
     "azure-storage-blob~=12.24",
-    "graphrag-common==3.0.4",
+    "graphrag-common==3.0.5",
     "pandas~=2.3",
     "pydantic~=2.10",
 ]

diff --git a/packages/graphrag-vectors/graphrag_vectors/azure_ai_search.py b/packages/graphrag-vectors/graphrag_vectors/azure_ai_search.py
@@ -165,22 +165,27 @@ def create_index(self) -> None:
             index,
         )
 
-    def insert(self, document: VectorStoreDocument) -> None:
-        """Insert a single document into Azure AI Search."""
-        self._prepare_document(document)
-        if document.vector is not None:
-            doc_dict = {
+    def load_documents(self, documents: list[VectorStoreDocument]) -> None:
+        """Load documents into Azure AI Search as a single batch upload."""
+        batch: list[dict[str, Any]] = []
+        for document in documents:
+            self._prepare_document(document)
+            if document.vector is None:
+                continue
+            doc_dict: dict[str, Any] = {
                 self.id_field: document.id,
                 self.vector_field: document.vector,
                 self.create_date_field: document.create_date,
                 self.update_date_field: document.update_date,
             }
-            # Add additional fields if they exist in the document data
             if document.data:
                 for field_name in self.fields:
                     if field_name in document.data:
                         doc_dict[field_name] = document.data[field_name]
-            self.db_connection.upload_documents([doc_dict])
+            batch.append(doc_dict)
+
+        if batch:
+            self.db_connection.upload_documents(batch)
 
     def _compile_filter(self, expr: FilterExpr) -> str:
         """Compile a FilterExpr into an Azure AI Search OData filter string."""

diff --git a/packages/graphrag-vectors/graphrag_vectors/cosmosdb.py b/packages/graphrag-vectors/graphrag_vectors/cosmosdb.py
@@ -163,17 +163,22 @@ def create_index(self) -> None:
             msg = "Container client is not initialized."
             raise ValueError(msg)
 
-    def insert(self, document: VectorStoreDocument) -> None:
-        """Insert a single document into CosmosDB."""
-        self._prepare_document(document)
-        if document.vector is not None:
+    def load_documents(self, documents: list[VectorStoreDocument]) -> None:
+        """Load documents into CosmosDB.
+
+        CosmosDB does not support native batch upsert, so each
+        document is upserted individually after preparation.
+        """
+        for document in documents:
+            self._prepare_document(document)
+            if document.vector is None:
+                continue
             doc_json: dict[str, Any] = {
                 self.id_field: document.id,
                 self.vector_field: document.vector,
                 self.create_date_field: document.create_date,
                 self.update_date_field: document.update_date,
             }
-            # Add additional fields if they exist in the document data
             if document.data:
                 for field_name in self.fields:
                     if field_name in document.data:

diff --git a/packages/graphrag-vectors/graphrag_vectors/lancedb.py b/packages/graphrag-vectors/graphrag_vectors/lancedb.py
@@ -78,38 +78,43 @@ def create_index(self) -> None:
         # Remove the dummy document used to set up the schema
         self.document_collection.delete(f"{self.id_field} = '__DUMMY__'")
 
-    def insert(self, document: VectorStoreDocument) -> None:
-        """Insert a single document into LanceDB."""
-        self._prepare_document(document)
-        if document.vector is not None:
-            vector = np.array(document.vector, dtype=np.float32)
-            flat_array = pa.array(vector, type=pa.float32())
-            vector_column = pa.FixedSizeListArray.from_arrays(
-                flat_array, self.vector_size
-            )
-
-            others = {}
+    def load_documents(self, documents: list[VectorStoreDocument]) -> None:
+        """Load documents into LanceDB as a single batch write."""
+        ids: list[str] = []
+        vectors: list[np.ndarray] = []
+        create_dates: list[str | None] = []
+        update_dates: list[str | None] = []
+        field_columns: dict[str, list[Any]] = {name: [] for name in self.fields}
+
+        for document in documents:
+            self._prepare_document(document)
+            if document.vector is None:
+                continue
+
+            ids.append(str(document.id))
+            vectors.append(np.array(document.vector, dtype=np.float32))
+            create_dates.append(document.create_date)
+            update_dates.append(document.update_date)
             for field_name in self.fields:
-                others[field_name] = (
-                    document.data.get(field_name) if document.data else None
-                )
-
-            data = pa.table({
-                self.id_field: pa.array([document.id], type=pa.string()),
-                self.vector_field: vector_column,
-                self.create_date_field: pa.array(
-                    [document.create_date], type=pa.string()
-                ),
-                self.update_date_field: pa.array(
-                    [document.update_date], type=pa.string()
-                ),
-                **{
-                    field_name: pa.array([value])
-                    for field_name, value in others.items()
-                },
-            })
+                value = document.data.get(field_name) if document.data else None
+                field_columns[field_name].append(value)
+
+        if not ids:
+            return
+
+        flat_vector = np.concatenate(vectors).astype(np.float32)
+        flat_array = pa.array(flat_vector, type=pa.float32())
+        vector_column = pa.FixedSizeListArray.from_arrays(flat_array, self.vector_size)
+
+        data = pa.table({
+            self.id_field: pa.array(ids, type=pa.string()),
+            self.vector_field: vector_column,
+            self.create_date_field: pa.array(create_dates, type=pa.string()),
+            self.update_date_field: pa.array(update_dates, type=pa.string()),
+            **{name: pa.array(values) for name, values in field_columns.items()},
+        })
 
-            self.document_collection.add(data)
+        self.document_collection.add(data)
 
     def _extract_data(
         self, doc: dict[str, Any], select: list[str] | None = None

diff --git a/packages/graphrag-vectors/graphrag_vectors/vector_store.py b/packages/graphrag-vectors/graphrag_vectors/vector_store.py
@@ -140,14 +140,13 @@ def connect(self) -> None:
     def create_index(self) -> None:
         """Create index."""
 
+    @abstractmethod
     def load_documents(self, documents: list[VectorStoreDocument]) -> None:
         """Load documents into the vector-store."""
-        for doc in documents:
-            self.insert(doc)
 
-    @abstractmethod
     def insert(self, document: VectorStoreDocument) -> None:
-        """Insert a single document into the vector-store."""
+        """Insert a single document by delegating to load_documents."""
+        self.load_documents([document])
 
     @abstractmethod
     def similarity_search_by_vector(

diff --git a/packages/graphrag-vectors/pyproject.toml b/packages/graphrag-vectors/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "graphrag-vectors"
-version = "3.0.4"
+version = "3.0.5"
 description = "GraphRAG vector store package."
 authors = [
     {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
@@ -34,7 +34,7 @@ dependencies = [
     "azure-cosmos~=4.9",
     "azure-identity~=1.25",
     "azure-search-documents~=11.6",
-    "graphrag-common==3.0.4",
+    "graphrag-common==3.0.5",
     "lancedb~=0.24.1",
     "numpy~=2.1",
     "pyarrow~=22.0",

diff --git a/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py b/packages/graphrag/graphrag/index/operations/embed_text/embed_text.py
@@ -38,6 +38,7 @@ async def embed_text(
 
     buffer: list[dict[str, Any]] = []
     total_rows = 0
+    flush_size = batch_size * 4
 
     async for row in input_table:
         text = row.get(embed_column)
@@ -49,7 +50,7 @@ async def embed_text(
             embed_column: text,
         })
 
-        if len(buffer) >= batch_size:
+        if len(buffer) >= flush_size:
             total_rows += await _flush_embedding_buffer(
                 buffer,
                 embed_column,

diff --git a/packages/graphrag/pyproject.toml b/packages/graphrag/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "graphrag"
 # Maintainers: do not change the version here manually
-version = "3.0.4"
+version = "3.0.5"
 description = "GraphRAG: A graph-based retrieval-augmented generation (RAG) system."
 authors = [
     {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
@@ -36,13 +36,13 @@ dependencies = [
     "azure-search-documents~=11.5",
     "azure-storage-blob~=12.24",
     "devtools~=0.12",
-    "graphrag-cache==3.0.4",
-    "graphrag-chunking==3.0.4",
-    "graphrag-common==3.0.4",
-    "graphrag-input==3.0.4",
-    "graphrag-llm==3.0.4",
-    "graphrag-storage==3.0.4",
-    "graphrag-vectors==3.0.4",
+    "graphrag-cache==3.0.5",
+    "graphrag-chunking==3.0.5",
+    "graphrag-common==3.0.5",
+    "graphrag-input==3.0.5",
+    "graphrag-llm==3.0.5",
+    "graphrag-storage==3.0.5",
+    "graphrag-vectors==3.0.5",
     "graspologic-native~=1.2",
     "json-repair~=0.30",
     "networkx~=3.4",
-Original file line number
+Diff line change
@@ Expand Up / @@ -133,6 +133,7 @@ retryer @@
     agenerate
     dropna
     notna
+    upserted
     # LLM Terms
     AOAI
@@ Expand Down @@