From 6b30d1a02f917cb89c0e0cc2b7231300bc534f8a Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Thu, 12 Mar 2026 17:32:14 +0800 Subject: [PATCH 1/9] feat: enhance DetailHeader component with confirmation dialog props and improve knowledge base detail handling --- frontend/src/components/DetailHeader.tsx | 8 +++ .../Detail/KnowledgeBaseDetail.tsx | 53 +++++++++++++++---- 2 files changed, 50 insertions(+), 11 deletions(-) diff --git a/frontend/src/components/DetailHeader.tsx b/frontend/src/components/DetailHeader.tsx index 001c8b47..646c86b2 100644 --- a/frontend/src/components/DetailHeader.tsx +++ b/frontend/src/components/DetailHeader.tsx @@ -20,6 +20,14 @@ interface OperationItem { onMenuClick?: (key: string) => void; onClick?: () => void; danger?: boolean; + confirm?: { + title: string; + description?: string; + cancelText?: string; + okText?: string; + okType?: "default" | "primary" | "danger"; + onConfirm?: () => void; + }; } interface TagConfig { diff --git a/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx b/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx index 2802b4be..98851c44 100644 --- a/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx +++ b/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx @@ -15,7 +15,7 @@ import { useNavigate, useParams } from "react-router"; import DetailHeader from "@/components/DetailHeader"; import { SearchControls } from "@/components/SearchControls"; import { KBFile, KnowledgeBaseItem, KnowledgeGraphNode, KnowledgeGraphEdge, KBType } from "../knowledge-base.model"; -import { mapFileData, mapKnowledgeBase } from "../knowledge-base.const"; +import { getKBTypeMap, mapFileData, mapKnowledgeBase } from "../knowledge-base.const"; import { deleteKnowledgeBaseByIdUsingDelete, deleteKnowledgeBaseFileByIdUsingDelete, @@ -30,11 +30,7 @@ import CreateKnowledgeBase from "../components/CreateKnowledgeBase"; import KnowledgeGraphView, { GraphEntitySelection } from "../components/KnowledgeGraphView"; import { useTranslation } from "react-i18next"; -interface StatisticItem { - icon?: React.ReactNode; - label: string; - value: string | number; -} +type HeaderStatisticItem = React.ComponentProps["statistics"][number]; // Use UnifiedSearchResult from model - flat structure from backend // Backend returns: { id, text, score, metadata, resultType, knowledgeBaseId, knowledgeBaseName } interface RecallResult { @@ -75,6 +71,35 @@ const KnowledgeBaseDetailPage: React.FC = () => { const [graphData, setGraphData] = useState<{ nodes: KnowledgeGraphNode[]; edges: KnowledgeGraphEdge[] }>({ nodes: [], edges: [] }); const [graphSelection, setGraphSelection] = useState(null); + const kbTypeMap = getKBTypeMap(t); + + const detailHeaderData = knowledgeBase + ? { + ...knowledgeBase, + tags: (() => { + const rawTags = Array.isArray((knowledgeBase as any)?.tags) ? ((knowledgeBase as any).tags as any[]) : []; + const normalized = rawTags + .map((tag) => { + if (!tag) return ""; + if (typeof tag === "string") return tag; + return String(tag.label ?? tag.name ?? ""); + }) + .map((s) => s.trim()) + .filter(Boolean); + + const typeLabel = String(kbTypeMap[knowledgeBase.type as KBType]?.label ?? "").trim(); + const all = typeLabel ? [typeLabel, ...normalized] : normalized; + return Array.from(new Set(all)); + })(), + description: + knowledgeBase.description && knowledgeBase.description.trim().length > 0 + ? knowledgeBase.description + : kbTypeMap[knowledgeBase.type as KBType]?.description ?? + kbTypeMap[knowledgeBase.type as KBType]?.label ?? + knowledgeBase.description, + } + : knowledgeBase; + const fetchKnowledgeBaseDetails = async (id: string) => { const { data } = await queryKnowledgeBaseByIdUsingGet(id); setKnowledgeBase(mapKnowledgeBase(data, true, t)); @@ -156,7 +181,8 @@ const KnowledgeBaseDetailPage: React.FC = () => { threshold: 0.2, knowledgeBaseIds: [knowledgeBase.id], }); - setRecallResults(result?.data || []); + const data = Array.isArray(result) ? result : (result as any)?.data; + setRecallResults(Array.isArray(data) ? data : []); } catch { setRecallResults([]); } @@ -330,9 +356,9 @@ const KnowledgeBaseDetailPage: React.FC = () => { @@ -457,7 +483,12 @@ const KnowledgeBaseDetailPage: React.FC = () => { searchPlaceholder={t("knowledgeBase.detail.searchPlaceholder")} filters={[]} onFiltersChange={handleFiltersChange} - onClearFilters={() => setSearchParams({ ...searchParams, filter: { type: [], status: [], tags: [] } })} + onClearFilters={() => + setSearchParams({ + ...searchParams, + filter: { type: [], status: [], tags: [], categories: [], selectedStar: false }, + }) + } showViewToggle={false} showReload={false} /> From 69801c1eb0490f8b0f1f84d5299945d0ee1ab6fb Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Thu, 12 Mar 2026 18:16:57 +0800 Subject: [PATCH 2/9] feat: enhance CardView and KnowledgeBaseDetail components with additional props and improve API endpoint for knowledge retrieval --- frontend/src/components/CardView.tsx | 12 ++++++---- frontend/src/components/DetailHeader.tsx | 3 +++ .../Detail/KnowledgeBaseDetail.tsx | 24 +++++++++++++++---- .../components/AddDataDialog.tsx | 14 ++++++++++- .../pages/KnowledgeBase/knowledge-base.api.ts | 2 +- .../module/rag/interface/knowledge_base.py | 12 +++++++++- 6 files changed, 56 insertions(+), 11 deletions(-) diff --git a/frontend/src/components/CardView.tsx b/frontend/src/components/CardView.tsx index b6746395..31856e65 100644 --- a/frontend/src/components/CardView.tsx +++ b/frontend/src/components/CardView.tsx @@ -301,11 +301,15 @@ function CardView(props: CardViewProps) { {/* Description */} -

- + triggerNode.parentElement || document.body} + > +

{item?.description} - -

+

+ {/* Statistics */}
diff --git a/frontend/src/components/DetailHeader.tsx b/frontend/src/components/DetailHeader.tsx index 646c86b2..fcf7480b 100644 --- a/frontend/src/components/DetailHeader.tsx +++ b/frontend/src/components/DetailHeader.tsx @@ -44,6 +44,7 @@ interface DetailHeaderProps { statistics: StatisticItem[]; operations: OperationItem[]; tagConfig?: TagConfig; + titleExtra?: React.ReactNode; } // 标签单行渲染组件 @@ -222,6 +223,7 @@ function DetailHeader({ statistics, operations, tagConfig, + titleExtra, }: DetailHeaderProps): React.ReactNode { return ( @@ -244,6 +246,7 @@ function DetailHeader({

{(data as any)?.name}

+ {titleExtra} {(data as any)?.status && (
diff --git a/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx b/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx index 98851c44..2b7c387a 100644 --- a/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx +++ b/frontend/src/pages/KnowledgeBase/Detail/KnowledgeBaseDetail.tsx @@ -1,6 +1,6 @@ import type React from "react"; import { useEffect, useState } from "react"; -import { Table, Badge, Button, Breadcrumb, Tooltip, App, Card, Input, Empty, Spin } from "antd"; +import { Table, Badge, Button, Breadcrumb, Tooltip, App, Card, Input, Empty, Spin, Tag } from "antd"; import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; import { Prism as SyntaxHighlighter } from "react-syntax-highlighter"; @@ -72,6 +72,7 @@ const KnowledgeBaseDetailPage: React.FC = () => { const [graphSelection, setGraphSelection] = useState(null); const kbTypeMap = getKBTypeMap(t); + const kbTypeMeta = knowledgeBase ? kbTypeMap[knowledgeBase.type as KBType] : undefined; const detailHeaderData = knowledgeBase ? { @@ -87,9 +88,10 @@ const KnowledgeBaseDetailPage: React.FC = () => { .map((s) => s.trim()) .filter(Boolean); - const typeLabel = String(kbTypeMap[knowledgeBase.type as KBType]?.label ?? "").trim(); - const all = typeLabel ? [typeLabel, ...normalized] : normalized; - return Array.from(new Set(all)); + const typeLabel = String(kbTypeMeta?.tag?.label ?? "").trim(); + const filtered = typeLabel ? normalized.filter((label) => label !== typeLabel) : normalized; + + return Array.from(new Set(filtered)); })(), description: knowledgeBase.description && knowledgeBase.description.trim().length > 0 @@ -357,6 +359,20 @@ const KnowledgeBaseDetailPage: React.FC = () => {
+ {kbTypeMeta.tag.label} +
+ ) : null + } statistics={knowledgeBase && Array.isArray((knowledgeBase as { statistics?: HeaderStatisticItem[] }).statistics) ? ((knowledgeBase as { statistics?: HeaderStatisticItem[] }).statistics ?? []) : []} diff --git a/frontend/src/pages/KnowledgeBase/components/AddDataDialog.tsx b/frontend/src/pages/KnowledgeBase/components/AddDataDialog.tsx index 7683cb81..13d4a781 100644 --- a/frontend/src/pages/KnowledgeBase/components/AddDataDialog.tsx +++ b/frontend/src/pages/KnowledgeBase/components/AddDataDialog.tsx @@ -27,6 +27,18 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) { const [selectedFilesMap, setSelectedFilesMap] = useState({}); + const toBackendProcessType = (uiValue: string) => { + switch (uiValue) { + case "FIXED_LENGTH_CHUNK": + return "LENGTH_CHUNK"; + case "CHAPTER_CHUNK": + // 后端暂不支持 CHAPTER_CHUNK:用段落分块策略兼容“按章节分块”的入口 + return "PARAGRAPH_CHUNK"; + default: + return uiValue; + } + }; + // 定义分块选项 const sliceOptions = [ { label: t("knowledgeBase.const.sliceMethod.default"), value: "DEFAULT_CHUNK" }, @@ -124,7 +136,7 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) { // 构造符合API要求的请求数据 const requestData = { files: Object.values(selectedFilesMap), - processType: newKB.processType, + processType: toBackendProcessType(newKB.processType), chunkSize: Number(newKB.chunkSize), // 确保是数字类型 overlapSize: Number(newKB.overlapSize), // 确保是数字类型 delimiter: newKB.delimiter, diff --git a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts index 0126762a..102520c8 100644 --- a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts +++ b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts @@ -67,7 +67,7 @@ export function retrieveKnowledgeBaseContent(data: { threshold?: number; knowledgeBaseIds: string[]; }): Promise { - return post("/api/knowledge-base/retrieve", data); + return post("/api/knowledge-base/v2/retrieve", data); } // 获取知识库文件详情(分页的切片数据) diff --git a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py index c702f5b5..a6b26bc1 100644 --- a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py +++ b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py @@ -159,6 +159,16 @@ async def retrieve_knowledge_base( results = await service.search(request) return SuccessResponse(data=results) +@router.post("/v2/retrieve", response_model=SuccessResponse) +async def retrieve_knowledge_base( + request: RetrieveReq, + db: AsyncSession = Depends(get_db), +): + """检索知识库内容(统一检索接口)""" + service = UnifiedRetrievalService(db) + results = await service.search(request) + return SuccessResponse(data=results) + @router.post("/query", response_model=SuccessResponse) async def query_knowledge_base( @@ -166,7 +176,7 @@ async def query_knowledge_base( db: AsyncSession = Depends(get_db), ): """查询知识库(支持向量检索和知识图谱) - + 根据知识库类型自动选择查询策略: - DOCUMENT: 向量检索 - GRAPH: 知识图谱查询 From 5c7b8cd56c85bced11537ec681a7b42d2a32b663 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Thu, 12 Mar 2026 18:33:54 +0800 Subject: [PATCH 3/9] feat: enhance knowledge base retrieval API to support legacy format --- .../module/rag/interface/knowledge_base.py | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py index a6b26bc1..70f8d4b6 100644 --- a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py +++ b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py @@ -4,6 +4,8 @@ 实现知识库相关的 REST API 接口。 对应 Java: com.datamate.rag.indexer.interfaces.KnowledgeBaseController """ +import json + from fastapi import APIRouter, Depends, BackgroundTasks from sqlalchemy.ext.asyncio import AsyncSession @@ -154,13 +156,29 @@ async def retrieve_knowledge_base( request: RetrieveReq, db: AsyncSession = Depends(get_db), ): - """检索知识库内容(统一检索接口)""" + """检索知识库内容(统一检索接口,兼容旧版本格式)""" service = UnifiedRetrievalService(db) results = await service.search(request) - return SuccessResponse(data=results) + + legacy_results = [] + for item in results: + legacy_item = { + "entity": { + "metadata": json.dumps(item.get("metadata", {}), ensure_ascii=False), + "text": item.get("text", ""), + "id": item.get("id", ""), + }, + "score": item.get("score", 0.0), + "id": item.get("id", ""), + "knowledgeBaseId": item.get("knowledgeBaseId", ""), + "knowledgeBaseName": item.get("knowledgeBaseName", ""), + } + legacy_results.append(legacy_item) + + return SuccessResponse(data=legacy_results) @router.post("/v2/retrieve", response_model=SuccessResponse) -async def retrieve_knowledge_base( +async def v2_retrieve_knowledge_base( request: RetrieveReq, db: AsyncSession = Depends(get_db), ): From ab3ccd1c6915ff5a0b4f755eaf5eb3365ab5ae83 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Thu, 12 Mar 2026 20:25:24 +0800 Subject: [PATCH 4/9] refactor: clean up type ignore comments in graph_strategy.py --- .../rag/service/strategy/graph_strategy.py | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/runtime/datamate-python/app/module/rag/service/strategy/graph_strategy.py b/runtime/datamate-python/app/module/rag/service/strategy/graph_strategy.py index bf7e51e6..25380c34 100644 --- a/runtime/datamate-python/app/module/rag/service/strategy/graph_strategy.py +++ b/runtime/datamate-python/app/module/rag/service/strategy/graph_strategy.py @@ -122,7 +122,7 @@ async def search( retrieval_results = await rag_instance.aquery_data(query_text, query_param) unified_results = self._convert_retrieval_results_into_unified( - retrieval_results, str(kb.id), str(kb.name) # type: ignore + retrieval_results, str(kb.id), str(kb.name) ) all_results.extend(unified_results) @@ -317,35 +317,34 @@ async def _get_knowledge_base(self, knowledge_base_id: str) -> KnowledgeBase: return kb async def _get_or_create_graph_rag(self, kb: KnowledgeBase) -> Any: - kb_name = str(kb.name) # type: ignore + kb_name = str(kb.name) if kb_name in self._rag_cache: return self._rag_cache[kb_name] - chat_model = await get_model_by_id(self.db, str(kb.chat_model)) # type: ignore - embedding_model = await get_model_by_id(self.db, str(kb.embedding_model)) # type: ignore + chat_model = await get_model_by_id(self.db, str(kb.chat_model)) + embedding_model = await get_model_by_id(self.db, str(kb.embedding_model)) if not chat_model or not embedding_model: raise BusinessError(ErrorCodes.RAG_MODEL_NOT_FOUND) llm_func = _create_llm_func( - str(chat_model.model_name), # type: ignore - str(chat_model.base_url), # type: ignore - str(chat_model.api_key), # type: ignore + str(chat_model.model_name), + str(chat_model.base_url), + str(chat_model.api_key), ) from app.module.shared.llm import LLMFactory embedding_func = _create_embedding_func( - str(embedding_model.model_name), # type: ignore - str(embedding_model.base_url), # type: ignore - str(embedding_model.api_key), # type: ignore + str(embedding_model.model_name), + str(embedding_model.base_url), + str(embedding_model.api_key), LLMFactory.get_embedding_dimension( - str(embedding_model.model_name), # type: ignore - str(embedding_model.base_url), # type: ignore - str(embedding_model.api_key), # type: ignore + str(embedding_model.model_name), + str(embedding_model.base_url), + str(embedding_model.api_key), ), ) - working_dir = os.path.join(DEFAULT_WORKING_DIR, kb_name) - rag = await _create_rag(llm_func, embedding_func, working_dir, workspace=kb_name) + rag = await _create_rag(llm_func, embedding_func, DEFAULT_WORKING_DIR, workspace=kb_name) self._rag_cache[kb_name] = rag return rag From 17a535b2f8d9b371a45f49d637fc6ec03b36a41b Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Thu, 12 Mar 2026 20:53:31 +0800 Subject: [PATCH 5/9] feat: enhance GraphKnowledgeBaseStrategy with workspace management and caching functionality --- .../rag/service/knowledge_base_service.py | 44 +++++++++++++++---- .../rag/service/strategy/graph_strategy.py | 18 +++++++- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py index ef2a2625..94e126f3 100644 --- a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py +++ b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py @@ -14,7 +14,7 @@ from app.core.exception import BusinessError, ErrorCodes from app.db.models.dataset_management import DatasetFiles -from app.db.models.knowledge_gen import KnowledgeBase, RagFile, FileStatus +from app.db.models.knowledge_gen import KnowledgeBase, RagFile, FileStatus, RagType from app.db.models.models import Models from app.module.rag.infra.vectorstore import drop_collection, rename_collection, delete_chunks_by_rag_file_ids from app.module.rag.repository import KnowledgeBaseRepository, RagFileRepository @@ -88,15 +88,23 @@ async def update(self, knowledge_base_id: str, request: KnowledgeBaseUpdateReq) if not knowledge_base: raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND) - old_name = knowledge_base.name + old_name = str(knowledge_base.name) + new_name = request.name + kb_type = knowledge_base.type + knowledge_base.name = request.name knowledge_base.description = request.description await self.kb_repo.update(knowledge_base) - if old_name != request.name: + if old_name != new_name: try: - rename_collection(old_name, request.name) + if kb_type == RagType.DOCUMENT.value: + rename_collection(old_name, new_name) + elif kb_type == RagType.GRAPH.value: + from app.module.rag.service.strategy.graph_strategy import GraphKnowledgeBaseStrategy + GraphKnowledgeBaseStrategy.rename_workspace(old_name, new_name) + GraphKnowledgeBaseStrategy.clear_cache(old_name) except BusinessError: await self.db.rollback() raise @@ -113,13 +121,30 @@ async def delete(self, knowledge_base_id: str) -> None: if not knowledge_base: raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_NOT_FOUND) + kb_name = str(knowledge_base.name) + kb_type = knowledge_base.type + await self.file_repo.delete_by_knowledge_base(knowledge_base_id) await self.kb_repo.delete(knowledge_base_id) - try: - drop_collection(knowledge_base.name) - except Exception as e: - logger.error("删除 Milvus 集合失败: %s", e) + if kb_type == RagType.DOCUMENT.value: + try: + drop_collection(kb_name) + except Exception as e: + logger.error("删除 Milvus 集合失败: %s", e) + elif kb_type == RagType.GRAPH.value: + try: + from app.module.rag.service.strategy.graph_strategy import GraphKnowledgeBaseStrategy + import shutil + from pathlib import Path + from app.core.config import settings + workspace_path = Path(settings.rag_storage_dir) / kb_name + if workspace_path.exists(): + shutil.rmtree(workspace_path) + logger.info("已删除知识图谱 workspace: %s", kb_name) + GraphKnowledgeBaseStrategy.clear_cache(kb_name) + except Exception as e: + logger.error("删除知识图谱 workspace 失败: %s", e) await self.db.commit() @@ -141,7 +166,8 @@ async def get_by_id(self, knowledge_base_id: str) -> KnowledgeBaseResp: }) return KnowledgeBaseResp(**data) - def _kb_to_dict(self, kb: KnowledgeBase) -> dict: + @staticmethod + def _kb_to_dict(kb: KnowledgeBase) -> dict: """知识库实体转字典""" return { "id": kb.id, diff --git a/runtime/datamate-python/app/module/rag/service/strategy/graph_strategy.py b/runtime/datamate-python/app/module/rag/service/strategy/graph_strategy.py index 25380c34..8c7f532e 100644 --- a/runtime/datamate-python/app/module/rag/service/strategy/graph_strategy.py +++ b/runtime/datamate-python/app/module/rag/service/strategy/graph_strategy.py @@ -71,11 +71,12 @@ async def _create_rag( class GraphKnowledgeBaseStrategy(KnowledgeBaseStrategy): + # 类级别的缓存,允许跨实例共享 + _rag_cache: Dict[str, Any] = {} def __init__(self, db: AsyncSession): super().__init__(db) self.kb_repo = KnowledgeBaseRepository(db) - self._rag_cache: Dict[str, Any] = {} async def query( self, @@ -348,3 +349,18 @@ async def _get_or_create_graph_rag(self, kb: KnowledgeBase) -> Any: rag = await _create_rag(llm_func, embedding_func, DEFAULT_WORKING_DIR, workspace=kb_name) self._rag_cache[kb_name] = rag return rag + + @classmethod + def rename_workspace(cls, old_name: str, new_name: str) -> None: + old_path = Path(DEFAULT_WORKING_DIR) / old_name + new_path = Path(DEFAULT_WORKING_DIR) / new_name + if old_path.exists() and old_path.is_dir(): + old_path.rename(new_path) + logger.info("知识图谱 workspace 重命名: %s -> %s", old_name, new_name) + cls.clear_cache(old_name) + + @classmethod + def clear_cache(cls, name: str) -> None: + if name in cls._rag_cache: + del cls._rag_cache[name] + logger.info("已清除知识图谱缓存: %s", name) From bde667c367cea0c31113e3af4965a0c49fc5eb70 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Fri, 13 Mar 2026 11:32:57 +0800 Subject: [PATCH 6/9] feat: enhance file processing and knowledge base deletion logic for improved document handling --- .../app/module/rag/service/file_processor.py | 7 ++-- .../rag/service/knowledge_base_service.py | 32 +++++++++++++------ 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/runtime/datamate-python/app/module/rag/service/file_processor.py b/runtime/datamate-python/app/module/rag/service/file_processor.py index cf99e06b..a5ac38e1 100644 --- a/runtime/datamate-python/app/module/rag/service/file_processor.py +++ b/runtime/datamate-python/app/module/rag/service/file_processor.py @@ -161,9 +161,10 @@ async def _process_single_graph_file( await self._update_progress(db, file_repo, str(rag_file.id), 30) # type: ignore await db.commit() - for idx, doc in enumerate(documents): - logger.info("插入文档到知识图谱: %s, 进度: %d/%d", str(rag_file.file_name), idx + 1, len(documents)) # type: ignore - await rag_instance.ainsert(input=doc.page_content, file_paths=[file_path]) + all_content = "\n\n".join(doc.page_content for doc in documents) + doc_id = str(rag_file.id) + logger.info("插入文档到知识图谱: %s, doc_id=%s, 文档数=%d", str(rag_file.file_name), doc_id, len(documents)) # type: ignore + await rag_instance.ainsert(input=all_content, file_paths=[file_path], ids=doc_id) await self._mark_success(db, file_repo, str(rag_file.id), len(documents)) # type: ignore logger.info("文件 %s 知识图谱处理完成", str(rag_file.file_name)) diff --git a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py index 94e126f3..dea4896e 100644 --- a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py +++ b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py @@ -346,24 +346,36 @@ async def delete_files(self, knowledge_base_id: str, request: DeleteFilesReq) -> if not request.file_ids: raise BusinessError(ErrorCodes.BAD_REQUEST, "文件ID列表不能为空") - # 获取文件列表 + kb_type = knowledge_base.type + kb_name = str(knowledge_base.name) + rag_files = [] for file_id in request.file_ids: rag_file = await self.file_repo.get_by_id(file_id) if rag_file: rag_files.append(rag_file) - # 删除 Milvus 数据 if rag_files: - try: - delete_chunks_by_rag_file_ids( - knowledge_base.name, - [r.id for r in rag_files], - ) - except Exception as e: - logger.error("删除 Milvus 数据失败: %s", e) + if kb_type == RagType.DOCUMENT.value: + try: + delete_chunks_by_rag_file_ids( + kb_name, + [r.id for r in rag_files], + ) + except Exception as e: + logger.error("删除 Milvus 数据失败: %s", e) + elif kb_type == RagType.GRAPH.value: + try: + from app.module.rag.service.strategy.graph_strategy import GraphKnowledgeBaseStrategy + strategy = GraphKnowledgeBaseStrategy(self.db) + rag_instance = await strategy._get_or_create_graph_rag(knowledge_base) + for rag_file in rag_files: + doc_id = str(rag_file.id) + await rag_instance.adelete_by_doc_id(doc_id) + logger.info("已从知识图谱删除文件: %s, doc_id=%s", rag_file.file_name, doc_id) + except Exception as e: + logger.error("删除知识图谱数据失败: %s", e) - # 删除数据库记录 for file_id in request.file_ids: try: await self.file_repo.delete(file_id) From 45a0df9a6fc15589bb2cc7d2908a53464772da69 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Fri, 13 Mar 2026 17:22:32 +0800 Subject: [PATCH 7/9] refactor: remove type ignore comments in file_processor.py for cleaner code --- .../app/module/rag/service/file_processor.py | 48 ++++++++++--------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/runtime/datamate-python/app/module/rag/service/file_processor.py b/runtime/datamate-python/app/module/rag/service/file_processor.py index a5ac38e1..ada1e154 100644 --- a/runtime/datamate-python/app/module/rag/service/file_processor.py +++ b/runtime/datamate-python/app/module/rag/service/file_processor.py @@ -127,7 +127,7 @@ async def _process_graph_files( logger.exception("初始化知识图谱失败: %s", e) for rag_file in files: file_repo = RagFileRepository(db) - await self._mark_failed(db, file_repo, str(rag_file.id), f"知识图谱初始化失败: {str(e)}") # type: ignore + await self._mark_failed(db, file_repo, str(rag_file.id), f"知识图谱初始化失败: {str(e)}") @staticmethod async def _initialize_graph_rag(db: AsyncSession, knowledge_base: KnowledgeBase): @@ -145,33 +145,35 @@ async def _process_single_graph_file( file_repo = RagFileRepository(db) try: - await self._update_status(db, file_repo, str(rag_file.id), FileStatus.PROCESSING, 10) # type: ignore + await self._update_status(db, file_repo, str(rag_file.id), FileStatus.PROCESSING, 10) await db.commit() file_path = get_file_path(rag_file) if not file_path or not Path(file_path).exists(): - await self._mark_failed(db, file_repo, str(rag_file.id), "文件不存在") # type: ignore + await self._mark_failed(db, file_repo, str(rag_file.id), "文件不存在") return documents = load_documents(file_path) if not documents: - await self._mark_failed(db, file_repo, str(rag_file.id), "文件解析失败,未生成文档") # type: ignore + await self._mark_failed(db, file_repo, str(rag_file.id), "文件解析失败,未生成文档") return - await self._update_progress(db, file_repo, str(rag_file.id), 30) # type: ignore + await self._update_progress(db, file_repo, str(rag_file.id), 30) await db.commit() all_content = "\n\n".join(doc.page_content for doc in documents) doc_id = str(rag_file.id) - logger.info("插入文档到知识图谱: %s, doc_id=%s, 文档数=%d", str(rag_file.file_name), doc_id, len(documents)) # type: ignore + logger.info("插入文档到知识图谱: %s, doc_id=%s, 文档数=%d", str(rag_file.file_name), doc_id, len(documents)) await rag_instance.ainsert(input=all_content, file_paths=[file_path], ids=doc_id) - await self._mark_success(db, file_repo, str(rag_file.id), len(documents)) # type: ignore - logger.info("文件 %s 知识图谱处理完成", str(rag_file.file_name)) + doc_status_data = await rag_instance.doc_status.get_by_id(doc_id) + chunk_count = len(doc_status_data.get("chunks_list", [])) if doc_status_data else 0 + await self._mark_success(db, file_repo, str(rag_file.id), chunk_count) + logger.info("文件 %s 知识图谱处理完成, 实际分块数: %d", str(rag_file.file_name), chunk_count) except Exception as e: - logger.exception("文件 %s 知识图谱处理失败: %s", str(rag_file.file_name), e) # type: ignore - await self._mark_failed(db, file_repo, str(rag_file.id), str(e)) # type: ignore + logger.exception("文件 %s 知识图谱处理失败: %s", str(rag_file.file_name), e) + await self._mark_failed(db, file_repo, str(rag_file.id), str(e)) async def _process_single_file( self, @@ -183,12 +185,12 @@ async def _process_single_file( file_repo = RagFileRepository(db) try: - await self._update_status(db, file_repo, rag_file.id, FileStatus.PROCESSING, 5) # type: ignore + await self._update_status(db, file_repo, rag_file.id, FileStatus.PROCESSING, 5) await db.commit() file_path = get_file_path(rag_file) if not file_path or not Path(file_path).exists(): - await self._mark_failed(db, file_repo, rag_file.id, "文件不存在") # type: ignore + await self._mark_failed(db, file_repo, rag_file.id, "文件不存在") return base_metadata = MetadataBuilder.build_chunk_metadata(rag_file, knowledge_base) @@ -202,39 +204,39 @@ async def _process_single_file( ) if not chunks: - await self._mark_failed(db, file_repo, rag_file.id, "文档解析后未生成任何分块") # type: ignore + await self._mark_failed(db, file_repo, rag_file.id, "文档解析后未生成任何分块") return logger.info("文件 %s 分块完成,共 %d 个分块", rag_file.file_name, len(chunks)) valid_chunks = self._filter_and_clean_chunks(chunks, rag_file) if not valid_chunks: - await self._mark_failed(db, file_repo, rag_file.id, "文件没有有效的分块内容") # type: ignore + await self._mark_failed(db, file_repo, rag_file.id, "文件没有有效的分块内容") return embedding = await self._get_embeddings(db, knowledge_base) vectorstore = VectorStoreFactory.create( - collection_name=str(knowledge_base.name), # type: ignore + collection_name=str(knowledge_base.name), embedding=embedding, ) - await self._update_progress(db, file_repo, rag_file.id, 60) # type: ignore + await self._update_progress(db, file_repo, rag_file.id, 60) await db.commit() MetadataBuilder.add_to_chunks(valid_chunks, { - "rag_file_id": str(rag_file.id), # type: ignore - "original_file_id": str(rag_file.file_id), # type: ignore - "knowledge_base_id": str(knowledge_base.id), # type: ignore + "rag_file_id": str(rag_file.id), + "original_file_id": str(rag_file.file_id), + "knowledge_base_id": str(knowledge_base.id), }) await BatchProcessor.store_in_batches(vectorstore, valid_chunks) - await self._mark_success(db, file_repo, rag_file.id, len(valid_chunks)) # type: ignore + await self._mark_success(db, file_repo, rag_file.id, len(valid_chunks)) logger.info("文件 %s ETL 处理完成", rag_file.file_name) except Exception as e: logger.exception("文件 %s 处理失败: %s", rag_file.file_name, e) - await self._mark_failed(db, file_repo, rag_file.id, str(e)) # type: ignore + await self._mark_failed(db, file_repo, rag_file.id, str(e)) @staticmethod def _filter_and_clean_chunks(chunks: list, rag_file: RagFile) -> list: @@ -257,12 +259,12 @@ def _filter_and_clean_chunks(chunks: list, rag_file: RagFile) -> list: @staticmethod async def _get_embeddings(db: AsyncSession, knowledge_base: KnowledgeBase): - embedding_entity = await get_model_by_id(db, str(knowledge_base.embedding_model)) # type: ignore + embedding_entity = await get_model_by_id(db, str(knowledge_base.embedding_model)) if not embedding_entity: raise ValueError(f"嵌入模型不存在: {knowledge_base.embedding_model}") return EmbeddingFactory.create_embeddings( - model_name=str(embedding_entity.model_name), # type: ignore + model_name=str(embedding_entity.model_name), base_url=getattr(embedding_entity, "base_url", None), api_key=getattr(embedding_entity, "api_key", None), ) From 3db970dcd63092bfe04a2e0e323af82e47d156b0 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Fri, 13 Mar 2026 17:57:39 +0800 Subject: [PATCH 8/9] feat: add error codes for RAG configuration and improve knowledge base creation error handling --- frontend/public/config/error-code.json | 3 +++ .../pages/KnowledgeBase/components/CreateKnowledgeBase.tsx | 3 ++- .../app/module/rag/service/knowledge_base_service.py | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/frontend/public/config/error-code.json b/frontend/public/config/error-code.json index 41131166..6eb1c30a 100644 --- a/frontend/public/config/error-code.json +++ b/frontend/public/config/error-code.json @@ -1,5 +1,8 @@ { "0": "成功", + "rag.0001": "RAG 配置错误", + "rag.0002": "知识库不存在", + "rag.0003": "知识库名称已存在", "cleaning.0001": "清洗任务不存在", "cleaning.0002": "清洗任务名称重复", "cleaning.0003": "清洗模板不存在", diff --git a/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx b/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx index 5d2750bf..035fecf0 100644 --- a/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx +++ b/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx @@ -109,7 +109,8 @@ export default function CreateKnowledgeBase({ setOpen(false); onUpdate(); } catch (error) { - message.error(t("knowledgeBase.create.messages.operationFailed") + error.data.message); + // 错误已由全局拦截器统一处理,此处不再重复提示 + console.error("知识库操作失败:", error); } }; diff --git a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py index dea4896e..41105c89 100644 --- a/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py +++ b/runtime/datamate-python/app/module/rag/service/knowledge_base_service.py @@ -92,6 +92,10 @@ async def update(self, knowledge_base_id: str, request: KnowledgeBaseUpdateReq) new_name = request.name kb_type = knowledge_base.type + if new_name and new_name != old_name: + if await self.kb_repo.exists_by_name(new_name, exclude_id=knowledge_base_id): + raise BusinessError(ErrorCodes.RAG_KNOWLEDGE_BASE_ALREADY_EXISTS, data={"name": new_name}) + knowledge_base.name = request.name knowledge_base.description = request.description From de68adbd7bec91efc67114bab192412e371c0fce Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Fri, 13 Mar 2026 18:18:42 +0800 Subject: [PATCH 9/9] feat: implement responsive resizing for KnowledgeGraphView component --- .../components/KnowledgeGraphView.tsx | 98 ++++++++++--------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/frontend/src/pages/KnowledgeBase/components/KnowledgeGraphView.tsx b/frontend/src/pages/KnowledgeBase/components/KnowledgeGraphView.tsx index 992d90f0..922d768a 100644 --- a/frontend/src/pages/KnowledgeBase/components/KnowledgeGraphView.tsx +++ b/frontend/src/pages/KnowledgeBase/components/KnowledgeGraphView.tsx @@ -1,6 +1,6 @@ -import React, {useMemo, useRef, useEffect} from "react"; +import React, { useMemo, useRef, useEffect, useState } from "react"; import ForceGraph2D from "react-force-graph-2d"; -import type {KnowledgeGraphEdge, KnowledgeGraphNode} from "../knowledge-base.model"; +import type { KnowledgeGraphEdge, KnowledgeGraphNode } from "../knowledge-base.model"; export type GraphEntitySelection = | { type: "node"; data: KnowledgeGraphNode } @@ -16,19 +16,45 @@ interface KnowledgeGraphViewProps { const COLOR_PALETTE = ["#60a5fa", "#f87171", "#fbbf24", "#34d399", "#a78bfa", "#fb7185", "#22d3ee", "#818cf8", "#fb923c", "#4ade80"]; const KnowledgeGraphView: React.FC = ({ - nodes, - edges, - height = 520, - onSelectEntity, - }) => { + nodes, + edges, + height = 520, + onSelectEntity, +}) => { const graphRef = useRef(); + // 新增:用于监听尺寸的容器引用 + const containerRef = useRef(null); + // 新增:保存当前实际宽高的状态 + const [dimensions, setDimensions] = useState({ width: 0, height: 0 }); + + // --- 核心修复:监听容器大小变化 --- + useEffect(() => { + if (!containerRef.current) return; + + const resizeObserver = new ResizeObserver((entries) => { + for (const entry of entries) { + const { width, height } = entry.contentRect; + setDimensions({ width, height }); + + // 强制通知 force-graph 组件更新内部 canvas 尺寸 + if (graphRef.current) { + graphRef.current.width(width); + graphRef.current.height(height); + // 可选:如果希望尺寸变化后图谱自动居中,取消下行注释 + // graphRef.current.zoomToFit(400); + } + } + }); + + resizeObserver.observe(containerRef.current); + return () => resizeObserver.disconnect(); + }, []); useEffect(() => { if (graphRef.current) { - // 1. 调整力导向平衡:减小斥力让独立图块靠近,增加向心力防止飘散 - graphRef.current.d3Force("charge").strength(-250); // 斥力适中 - graphRef.current.d3Force("link").distance(120); // 边长适中 - graphRef.current.d3Force("center").strength(0.8); // 增强向心力,让孤立集群往中间靠 + graphRef.current.d3Force("charge").strength(-250); + graphRef.current.d3Force("link").distance(120); + graphRef.current.d3Force("center").strength(0.8); } }, [nodes]); @@ -43,7 +69,7 @@ const KnowledgeGraphView: React.FC = ({ nodes: nodes.map((node) => ({ ...node, color: typeColorMap.get(node.properties?.entity_type || (node.labels && node.labels[0]) || 'default'), - val: 8 // 统一基础大小,使视觉更整洁 + val: 8 })), links: edges.map((edge) => ({ ...edge, @@ -53,9 +79,15 @@ const KnowledgeGraphView: React.FC = ({ }), [nodes, edges, typeColorMap]); return ( -
+
= ({ linkCurvature={0.1} // --- 节点绘制 --- - nodeCanvasObject={(node: any, ctx, globalScale) => { - const {x, y, val: radius, color, id} = node; + nodeCanvasObject={(node: never, ctx, globalScale) => { + const { x, y, val: radius, color, id } = node; if (!Number.isFinite(x) || !Number.isFinite(y)) return; ctx.save(); @@ -79,7 +111,6 @@ const KnowledgeGraphView: React.FC = ({ ctx.shadowColor = color; ctx.fill(); - // 节点名称 if (globalScale > 0.4) { const fontSize = 12 / globalScale; ctx.font = `${fontSize}px Sans-Serif`; @@ -95,82 +126,57 @@ const KnowledgeGraphView: React.FC = ({ linkPointerAreaPaint={(link: any, color, ctx, globalScale) => { const label = link.keywords; if (!label || globalScale < 1.1) return; - const start = link.source; const end = link.target; if (typeof start !== 'object' || typeof end !== 'object') return; - const fontSize = 9 / globalScale; - const textPos = {x: start.x + (end.x - start.x) * 0.5, y: start.y + (end.y - start.y) * 0.5}; + const textPos = { x: start.x + (end.x - start.x) * 0.5, y: start.y + (end.y - start.y) * 0.5 }; const angle = Math.atan2(end.y - start.y, end.x - start.x); const bRotate = angle > Math.PI / 2 || angle < -Math.PI / 2; - ctx.save(); ctx.translate(textPos.x, textPos.y); ctx.rotate(bRotate ? angle + Math.PI : angle); - ctx.font = `${fontSize}px Sans-Serif`; const textWidth = ctx.measureText(label).width; - - // 绘制一个与文字大小相同的透明矩形,颜色必须使用参数中的 'color' - // 这是 react-force-graph 识别点击对象的关键(Color-picking 技术) ctx.fillStyle = color; ctx.fillRect(-textWidth / 2 - 2, -fontSize / 2 - 2, textWidth + 4, fontSize + 4); ctx.restore(); }} - // --- 边文字绘制:优化大小、位置和翻转逻辑 --- linkCanvasObjectMode={() => 'after'} linkCanvasObject={(link: any, ctx, globalScale) => { const MAX_DISPLAY_SCALE = 1.1; if (globalScale < MAX_DISPLAY_SCALE) return; - const label = link.keywords; const start = link.source; const end = link.target; if (typeof start !== 'object' || typeof end !== 'object') return; - - // 边文字比节点文字小一点点(节点12,边11) const fontSize = 11 / globalScale; - - const textPos = { - x: start.x + (end.x - start.x) * 0.5, - y: start.y + (end.y - start.y) * 0.5 - }; - + const textPos = { x: start.x + (end.x - start.x) * 0.5, y: start.y + (end.y - start.y) * 0.5 }; let angle = Math.atan2(end.y - start.y, end.x - start.x); - - // --- 核心修复:防止文字倒挂 --- - // 如果角度在 90度 到 270度 之间,旋转180度让文字保持正向 const bRotate = angle > Math.PI / 2 || angle < -Math.PI / 2; - ctx.save(); ctx.translate(textPos.x, textPos.y); ctx.rotate(bRotate ? angle + Math.PI : angle); - ctx.font = `${fontSize}px Sans-Serif`; const textWidth = ctx.measureText(label).width; - - // 绘制极小的背景遮罩,紧贴文字 ctx.fillStyle = 'rgba(1, 3, 15, 0.7)'; ctx.fillRect(-textWidth / 2 - 1, -fontSize / 2, textWidth + 2, fontSize); - ctx.fillStyle = '#94e2d5'; ctx.textAlign = 'center'; ctx.textBaseline = 'middle'; - // y轴偏移设为0,使其紧贴线条中心 ctx.fillText(label, 0, 0); ctx.restore(); }} - onNodeClick={(node: any) => onSelectEntity?.({type: "node", data: node})} + onNodeClick={(node: any) => onSelectEntity?.({ type: "node", data: node })} onLinkClick={(link: any) => { const originalData = link.__originalEdge || link; - onSelectEntity?.({type: "edge", data: originalData}); + onSelectEntity?.({ type: "edge", data: originalData }); }} onBackgroundClick={() => onSelectEntity?.(null)} cooldownTicks={120} - d3VelocityDecay={0.4} // 增加阻力,使布局更快稳定 + d3VelocityDecay={0.4} />
);