diff --git a/frontend/src/i18n/locales/en/common.json b/frontend/src/i18n/locales/en/common.json index 3fe17885..98806f31 100644 --- a/frontend/src/i18n/locales/en/common.json +++ b/frontend/src/i18n/locales/en/common.json @@ -1756,7 +1756,15 @@ "kbList": "Knowledge Bases", "kbDetail": "Knowledge Base Detail" }, - "defaultFileName": "File {{id}}" + "defaultFileName": "File {{id}}", + "filter": { + "title": "Filter", + "idOperator": "ID Operator", + "idValue": "ID Value", + "textKeyword": "Text Keyword", + "apply": "Apply", + "clear": "Clear" + } }, "create": { "title": "Create Knowledge Base", diff --git a/frontend/src/i18n/locales/zh/common.json b/frontend/src/i18n/locales/zh/common.json index d16f37cc..34521c6a 100644 --- a/frontend/src/i18n/locales/zh/common.json +++ b/frontend/src/i18n/locales/zh/common.json @@ -1755,7 +1755,15 @@ "kbList": "知识库", "kbDetail": "知识库详情" }, - "defaultFileName": "文件 {{id}}" + "defaultFileName": "文件 {{id}}", + "filter": { + "title": "过滤", + "idOperator": "ID操作符", + "idValue": "ID值", + "textKeyword": "文本关键词", + "apply": "应用", + "clear": "清除" + } }, "create": { "title": "创建知识库", diff --git a/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx b/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx index 5d646dcd..e3293477 100644 --- a/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx +++ b/frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx @@ -1,10 +1,9 @@ import React, { useEffect, useState, useCallback } from "react"; -import {Eye, Edit, Trash2, FileBox, ChevronLeft, ChevronRight, Code, CheckCircle, AlertCircle, Wand2} from "lucide-react"; -import { Card, Button, Badge, Input, Tabs, Modal, Breadcrumb, Tag, Spin, Empty, Alert, message, Tooltip } from "antd"; +import {Eye, Edit, Trash2, FileBox, ChevronLeft, ChevronRight, Code, CheckCircle, AlertCircle, Wand2, X} from "lucide-react"; +import { Card, Button, Badge, Input, Tabs, Modal, Breadcrumb, Tag, Spin, Empty, Alert, message, Tooltip, Select } from "antd"; import { ExclamationCircleOutlined } from "@ant-design/icons"; import { queryKnowledgeBaseFileDetailUsingGet, updateKnowledgeBaseChunk, deleteKnowledgeBaseChunk } from "@/pages/KnowledgeBase/knowledge-base.api"; import { Link, useParams } from "react-router"; -import DetailHeader from "@/components/DetailHeader"; import { useTranslation } from "react-i18next"; import { Prism as SyntaxHighlighter } from "react-syntax-highlighter"; import { vscDarkPlus } from "react-syntax-highlighter/dist/esm/styles/prism"; @@ -50,6 +49,28 @@ const KnowledgeBaseFileDetail: React.FC = () => { const pageSize = 10; const [currentPage, setCurrentPage] = useState(1); + const [idOperator, setIdOperator] = useState(""); + const [idValue, setIdValue] = useState(""); + const [textKeyword, setTextKeyword] = useState(""); + + const buildFilterExpr = useCallback((op: string, val: string, keyword: string): string => { + const parts: string[] = []; + if (op && val) { + parts.push(`id ${op} "${val}"`); + } + if (keyword) { + parts.push(`text like "%${keyword}%"`); + } + return parts.join(" && "); + }, []); + + const handleClearFilter = useCallback(() => { + setIdOperator(""); + setIdValue(""); + setTextKeyword(""); + setCurrentPage(1); + }, []); + const safeParse = (meta: unknown): unknown => { if (typeof meta === "string") { try { @@ -66,7 +87,8 @@ const KnowledgeBaseFileDetail: React.FC = () => { setLoading(true); setError(null); try { - const res = await queryKnowledgeBaseFileDetailUsingGet(knowledgeBaseId, ragFileId, { page, size: pageSize }); + const expr = buildFilterExpr(idOperator, idValue, textKeyword); + const res = await queryKnowledgeBaseFileDetailUsingGet(knowledgeBaseId, ragFileId, { page, size: pageSize, expr: expr || undefined }); const raw = (res?.data ?? res) as { page: number; size: number; @@ -92,7 +114,7 @@ const KnowledgeBaseFileDetail: React.FC = () => { useEffect(() => { fetchChunks(currentPage); - }, [knowledgeBaseId, ragFileId, currentPage, t]); + }, [knowledgeBaseId, ragFileId, currentPage, idOperator, idValue, textKeyword, t]); const totalElements = paged?.totalElements ?? 0; const totalPages = paged?.totalPages ?? 0; @@ -237,7 +259,7 @@ const KnowledgeBaseFileDetail: React.FC = () => { key={chunk.id} title={
- {t("knowledgeBase.fileDetail.messages.chunkLabel")} {chunk.id} + ID{chunk.id} {chunk.metadata?.sliceOperator && ( {chunk.metadata.sliceOperator} @@ -295,19 +317,64 @@ const KnowledgeBaseFileDetail: React.FC = () => { { title: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }) }, ]} /> - , - iconColor: "#a27e7e", - name: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }), - description: `${totalElements} ${t("knowledgeBase.fileDetail.messages.chunkCount", { count: 0 })}`, - createdAt: "", - lastUpdated: "", - }} - statistics={[]} - operations={[]} - /> +
+
+ +
+
+

+ {fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId })} +

+

+ {totalElements} {t("knowledgeBase.fileDetail.messages.chunkCount", { count: 0 })} +

+
+
+
+
+ ID + setIdValue(e.target.value)} + placeholder={t("knowledgeBase.fileDetail.filter.idValue")} + className="w-28" + /> +
+
+ setTextKeyword(e.target.value)} + placeholder={t("knowledgeBase.fileDetail.filter.textKeyword")} + className="w-48" + allowClear + /> +
+ + {(idOperator || idValue || textKeyword) && ( + + )} +
+
{loading ?
: renderChunks()}
diff --git a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts index 1ac85173..0505c242 100644 --- a/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts +++ b/frontend/src/pages/KnowledgeBase/knowledge-base.api.ts @@ -74,11 +74,15 @@ export function retrieveKnowledgeBaseContent(data: { export function queryKnowledgeBaseFileDetailUsingGet( knowledgeBaseId: string, ragFileId: string, - params: { page?: number; size?: number } = { page: 1, size: 20 } + params: { page?: number; size?: number; expr?: string } = { page: 1, size: 20 } ) { const page = params.page ?? 1; const size = params.size ?? 20; - return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?page=${page}&page_size=${size}`); + const queryParams = [`page=${page}`, `page_size=${size}`]; + if (params.expr) { + queryParams.push(`expr=${encodeURIComponent(params.expr)}`); + } + return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?${queryParams.join('&')}`); } export function queryKnowledgeBase(data: { diff --git a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py index 2b212f60..80a786e3 100644 --- a/runtime/datamate-python/app/module/rag/interface/knowledge_base.py +++ b/runtime/datamate-python/app/module/rag/interface/knowledge_base.py @@ -20,6 +20,7 @@ RagFileReq, RetrieveReq, PagingQuery, + ChunkFilterQuery, QueryRequest, ChunkUpdateReq, ) @@ -143,12 +144,12 @@ async def delete_knowledge_base_files( async def get_file_chunks( knowledge_base_id: str, rag_file_id: str, - paging_query: PagingQuery = Depends(), + query: ChunkFilterQuery = Depends(), db: AsyncSession = Depends(get_db), ): """获取指定 RAG 文件的分块列表""" service = UnifiedRetrievalService(db) - result = await service.get_chunks(knowledge_base_id, rag_file_id, paging_query) + result = await service.get_chunks(knowledge_base_id, rag_file_id, query) return SuccessResponse(data=result) diff --git a/runtime/datamate-python/app/module/rag/schema/request.py b/runtime/datamate-python/app/module/rag/schema/request.py index 4463339c..52a3ff72 100644 --- a/runtime/datamate-python/app/module/rag/schema/request.py +++ b/runtime/datamate-python/app/module/rag/schema/request.py @@ -330,6 +330,37 @@ class Config: } +class ChunkFilterQuery(BaseModel): + """分块过滤查询请求 + + 支持分页和 Milvus 表达式过滤 + """ + page: int = Field( + default=1, + ge=1, + description="页码(从 1 开始)" + ) + size: int = Field( + default=10, + ge=1, + le=100, + description="每页数量" + ) + expr: Optional[str] = Field( + None, + description="Milvus 过滤表达式(如 id > \"1\" && text like \"%keyword%\")" + ) + + class Config: + json_schema_extra = { + "example": { + "page": 1, + "size": 10, + "expr": "id > \"1\"" + } + } + + class QueryRequest(BaseModel): """知识图谱查询请求""" knowledge_base_id: str = Field(..., description="知识库ID") diff --git a/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py b/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py index 9db26c2b..c70a6543 100644 --- a/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py +++ b/runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py @@ -15,6 +15,7 @@ from app.module.rag.infra.vectorstore.milvus_client import get_milvus_client from app.module.rag.repository import KnowledgeBaseRepository, RagFileRepository from app.module.rag.schema.response import PagedResponse, RagChunkResp +from app.module.rag.schema.request import ChunkFilterQuery from app.module.rag.service.common import TextCleaner, MetadataBuilder, BatchProcessor, get_file_path from app.module.system.service.common_service import get_model_by_id from .base import KnowledgeBaseStrategy @@ -39,18 +40,18 @@ async def query( knowledge_base_id: 知识库 ID **kwargs: 额外参数,必须包含: - rag_file_id: RAG 文件 ID - - paging_query: 分页参数 + - chunk_filter_query: 分页和过滤参数 Returns: 分页分块列表 """ rag_file_id = kwargs.get("rag_file_id") - paging_query = kwargs.get("paging_query") + chunk_filter_query: ChunkFilterQuery = kwargs.get("chunk_filter_query") - if not rag_file_id or not paging_query: + if not rag_file_id or not chunk_filter_query: raise BusinessError( ErrorCodes.RAG_INVALID_REQUEST, - "Missing rag_file_id or paging_query parameters" + "Missing rag_file_id or chunk_filter_query parameters" ) kb_repo = KnowledgeBaseRepository(self.db) @@ -67,20 +68,22 @@ async def query( client = get_milvus_client() try: - count_filter = f'metadata["rag_file_id"] == "{rag_file_id}"' + base_filter = f'metadata["rag_file_id"] == "{rag_file_id}"' + combined_filter = self._build_combined_filter(base_filter, chunk_filter_query.expr) + count_res = client.query( collection_name=knowledge_base.name, - filter=count_filter, + filter=combined_filter, output_fields=["id"], ) total = len(count_res) - offset = (paging_query.page - 1) * paging_query.size + offset = (chunk_filter_query.page - 1) * chunk_filter_query.size results = client.query( collection_name=knowledge_base.name, - filter=count_filter, + filter=combined_filter, output_fields=["id", "text", "metadata"], - limit=paging_query.size, + limit=chunk_filter_query.size, offset=offset, ) @@ -103,8 +106,8 @@ async def query( return PagedResponse.create( content=chunks, total_elements=total, - page=paging_query.page, - size=paging_query.size, + page=chunk_filter_query.page, + size=chunk_filter_query.size, ) except Exception as e: @@ -117,6 +120,12 @@ async def query( f"查询文件分块失败: {str(e)}" ) from e + @staticmethod + def _build_combined_filter(base_filter: str, user_expr: Optional[str]) -> str: + if not user_expr: + return base_filter + return f"{base_filter} && {user_expr}" + async def search( self, query_text: str, diff --git a/runtime/datamate-python/app/module/rag/service/unified_retrieval_service.py b/runtime/datamate-python/app/module/rag/service/unified_retrieval_service.py index 3be5a839..7376dc6e 100644 --- a/runtime/datamate-python/app/module/rag/service/unified_retrieval_service.py +++ b/runtime/datamate-python/app/module/rag/service/unified_retrieval_service.py @@ -12,7 +12,7 @@ from app.core.exception import BusinessError, ErrorCodes from app.db.models.knowledge_gen import KnowledgeBase from app.module.rag.repository import KnowledgeBaseRepository -from app.module.rag.schema.request import PagingQuery, RetrieveReq +from app.module.rag.schema.request import PagingQuery, ChunkFilterQuery, RetrieveReq from app.module.rag.schema.response import PagedResponse from .strategy import KnowledgeBaseStrategyFactory @@ -86,14 +86,14 @@ async def get_chunks( self, knowledge_base_id: str, rag_file_id: str, - paging_query: PagingQuery, + query: ChunkFilterQuery, ) -> PagedResponse: """获取指定 RAG 文件的分块列表(仅向量知识库) Args: knowledge_base_id: 知识库 ID rag_file_id: RAG 文件 ID - paging_query: 分页参数 + query: 分页和过滤参数 Returns: 分块列表(分页) @@ -101,7 +101,7 @@ async def get_chunks( return await self.query( knowledge_base_id, rag_file_id=rag_file_id, - paging_query=paging_query, + chunk_filter_query=query, ) async def _get_knowledge_base(self, knowledge_base_id: str) -> KnowledgeBase: