Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion frontend/src/i18n/locales/en/common.json
Original file line number Diff line number Diff line change
Expand Up @@ -1756,7 +1756,15 @@
"kbList": "Knowledge Bases",
"kbDetail": "Knowledge Base Detail"
},
"defaultFileName": "File {{id}}"
"defaultFileName": "File {{id}}",
"filter": {
"title": "Filter",
"idOperator": "ID Operator",
"idValue": "ID Value",
"textKeyword": "Text Keyword",
"apply": "Apply",
"clear": "Clear"
}
},
"create": {
"title": "Create Knowledge Base",
Expand Down
10 changes: 9 additions & 1 deletion frontend/src/i18n/locales/zh/common.json
Original file line number Diff line number Diff line change
Expand Up @@ -1755,7 +1755,15 @@
"kbList": "知识库",
"kbDetail": "知识库详情"
},
"defaultFileName": "文件 {{id}}"
"defaultFileName": "文件 {{id}}",
"filter": {
"title": "过滤",
"idOperator": "ID操作符",
"idValue": "ID值",
"textKeyword": "文本关键词",
"apply": "应用",
"clear": "清除"
}
},
"create": {
"title": "创建知识库",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import React, { useEffect, useState, useCallback } from "react";
import {Eye, Edit, Trash2, FileBox, ChevronLeft, ChevronRight, Code, CheckCircle, AlertCircle, Wand2} from "lucide-react";
import { Card, Button, Badge, Input, Tabs, Modal, Breadcrumb, Tag, Spin, Empty, Alert, message, Tooltip } from "antd";
import {Eye, Edit, Trash2, FileBox, ChevronLeft, ChevronRight, Code, CheckCircle, AlertCircle, Wand2, X} from "lucide-react";
import { Card, Button, Badge, Input, Tabs, Modal, Breadcrumb, Tag, Spin, Empty, Alert, message, Tooltip, Select } from "antd";
import { ExclamationCircleOutlined } from "@ant-design/icons";
import { queryKnowledgeBaseFileDetailUsingGet, updateKnowledgeBaseChunk, deleteKnowledgeBaseChunk } from "@/pages/KnowledgeBase/knowledge-base.api";
import { Link, useParams } from "react-router";
import DetailHeader from "@/components/DetailHeader";
import { useTranslation } from "react-i18next";
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
import { vscDarkPlus } from "react-syntax-highlighter/dist/esm/styles/prism";
Expand Down Expand Up @@ -50,6 +49,28 @@ const KnowledgeBaseFileDetail: React.FC = () => {
const pageSize = 10;
const [currentPage, setCurrentPage] = useState(1);

const [idOperator, setIdOperator] = useState<string>("");
const [idValue, setIdValue] = useState<string>("");
const [textKeyword, setTextKeyword] = useState<string>("");

const buildFilterExpr = useCallback((op: string, val: string, keyword: string): string => {
const parts: string[] = [];
if (op && val) {
parts.push(`id ${op} "${val}"`);
}
if (keyword) {
parts.push(`text like "%${keyword}%"`);
}
return parts.join(" && ");
}, []);

const handleClearFilter = useCallback(() => {
setIdOperator("");
setIdValue("");
setTextKeyword("");
setCurrentPage(1);
}, []);

const safeParse = (meta: unknown): unknown => {
if (typeof meta === "string") {
try {
Expand All @@ -66,7 +87,8 @@ const KnowledgeBaseFileDetail: React.FC = () => {
setLoading(true);
setError(null);
try {
const res = await queryKnowledgeBaseFileDetailUsingGet(knowledgeBaseId, ragFileId, { page, size: pageSize });
const expr = buildFilterExpr(idOperator, idValue, textKeyword);
const res = await queryKnowledgeBaseFileDetailUsingGet(knowledgeBaseId, ragFileId, { page, size: pageSize, expr: expr || undefined });
const raw = (res?.data ?? res) as {
page: number;
size: number;
Expand All @@ -92,7 +114,7 @@ const KnowledgeBaseFileDetail: React.FC = () => {

useEffect(() => {
fetchChunks(currentPage);
}, [knowledgeBaseId, ragFileId, currentPage, t]);
}, [knowledgeBaseId, ragFileId, currentPage, idOperator, idValue, textKeyword, t]);

const totalElements = paged?.totalElements ?? 0;
const totalPages = paged?.totalPages ?? 0;
Expand Down Expand Up @@ -237,7 +259,7 @@ const KnowledgeBaseFileDetail: React.FC = () => {
key={chunk.id}
title={
<div className="flex items-center gap-2">
<span>{t("knowledgeBase.fileDetail.messages.chunkLabel")} {chunk.id}</span>
<span className="text-slate-400 text-xs mr-1">ID</span><span className="font-mono text-sm">{chunk.id}</span>
{chunk.metadata?.sliceOperator && (
<Tag className="text-xs">
{chunk.metadata.sliceOperator}
Expand Down Expand Up @@ -295,19 +317,64 @@ const KnowledgeBaseFileDetail: React.FC = () => {
{ title: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }) },
]}
/>
<DetailHeader
data={{
id: ragFileId,
icon: <FileBox className="w-full h-full" />,
iconColor: "#a27e7e",
name: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }),
description: `${totalElements} ${t("knowledgeBase.fileDetail.messages.chunkCount", { count: 0 })}`,
createdAt: "",
lastUpdated: "",
}}
statistics={[]}
operations={[]}
/>
<div className="flex items-center gap-3 px-4 py-3 bg-gradient-to-r from-slate-50 to-slate-100/50 rounded-xl border border-slate-200/60">
<div className="flex-shrink-0 w-10 h-10 rounded-lg bg-gradient-to-br from-amber-100 to-orange-100 flex items-center justify-center shadow-sm">
<FileBox className="w-5 h-5 text-amber-600" />
</div>
<div className="flex-1 min-w-0">
<h1 className="text-base font-semibold text-slate-800 truncate">
{fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId })}
</h1>
<p className="text-sm text-slate-500">
{totalElements} {t("knowledgeBase.fileDetail.messages.chunkCount", { count: 0 })}
</p>
</div>
</div>
<div className="flex items-center gap-3 bg-white rounded-lg border border-slate-200/80 px-4 py-3 mb-4">
<div className="flex items-center gap-2">
<span className="text-xs text-slate-500">ID</span>
<Select
value={idOperator || undefined}
onChange={setIdOperator}
placeholder={t("knowledgeBase.fileDetail.filter.idOperator")}
allowClear
className="w-16"
options={[
{ value: ">", label: ">" },
{ value: "<", label: "<" },
{ value: "==", label: "==" },
]}
/>
<Input
value={idValue}
onChange={(e) => setIdValue(e.target.value)}
placeholder={t("knowledgeBase.fileDetail.filter.idValue")}
className="w-28"
/>
</div>
<div className="h-5 w-px bg-slate-200" />
<Input.Search
value={textKeyword}
onChange={(e) => setTextKeyword(e.target.value)}
placeholder={t("knowledgeBase.fileDetail.filter.textKeyword")}
className="w-48"
allowClear
/>
<div className="flex items-center gap-2 ml-auto">
<Button
type="primary"
size="small"
onClick={() => { setCurrentPage(1); fetchChunks(1); }}
>
{t("knowledgeBase.fileDetail.filter.apply")}
</Button>
{(idOperator || idValue || textKeyword) && (
<Button size="small" onClick={handleClearFilter}>
<X className="w-4 h-4" />
</Button>
)}
</div>
</div>
<Card>
{loading ? <div className="flex items-center justify-center py-8"><Spin /></div> : renderChunks()}
</Card>
Expand Down
8 changes: 6 additions & 2 deletions frontend/src/pages/KnowledgeBase/knowledge-base.api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,15 @@ export function retrieveKnowledgeBaseContent(data: {
export function queryKnowledgeBaseFileDetailUsingGet(
knowledgeBaseId: string,
ragFileId: string,
params: { page?: number; size?: number } = { page: 1, size: 20 }
params: { page?: number; size?: number; expr?: string } = { page: 1, size: 20 }
) {
const page = params.page ?? 1;
const size = params.size ?? 20;
return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?page=${page}&page_size=${size}`);
const queryParams = [`page=${page}`, `page_size=${size}`];
if (params.expr) {
queryParams.push(`expr=${encodeURIComponent(params.expr)}`);
}
return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?${queryParams.join('&')}`);
}

export function queryKnowledgeBase(data: {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
RagFileReq,
RetrieveReq,
PagingQuery,
ChunkFilterQuery,
QueryRequest,
ChunkUpdateReq,
)
Expand Down Expand Up @@ -143,12 +144,12 @@ async def delete_knowledge_base_files(
async def get_file_chunks(
knowledge_base_id: str,
rag_file_id: str,
paging_query: PagingQuery = Depends(),
query: ChunkFilterQuery = Depends(),
db: AsyncSession = Depends(get_db),
):
"""获取指定 RAG 文件的分块列表"""
service = UnifiedRetrievalService(db)
result = await service.get_chunks(knowledge_base_id, rag_file_id, paging_query)
result = await service.get_chunks(knowledge_base_id, rag_file_id, query)
return SuccessResponse(data=result)


Expand Down
31 changes: 31 additions & 0 deletions runtime/datamate-python/app/module/rag/schema/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,37 @@ class Config:
}


class ChunkFilterQuery(BaseModel):
"""分块过滤查询请求

支持分页和 Milvus 表达式过滤
"""
page: int = Field(
default=1,
ge=1,
description="页码(从 1 开始)"
)
size: int = Field(
default=10,
ge=1,
le=100,
description="每页数量"
)
expr: Optional[str] = Field(
None,
description="Milvus 过滤表达式(如 id > \"1\" && text like \"%keyword%\")"
)

class Config:
json_schema_extra = {
"example": {
"page": 1,
"size": 10,
"expr": "id > \"1\""
}
}


class QueryRequest(BaseModel):
"""知识图谱查询请求"""
knowledge_base_id: str = Field(..., description="知识库ID")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from app.module.rag.infra.vectorstore.milvus_client import get_milvus_client
from app.module.rag.repository import KnowledgeBaseRepository, RagFileRepository
from app.module.rag.schema.response import PagedResponse, RagChunkResp
from app.module.rag.schema.request import ChunkFilterQuery
from app.module.rag.service.common import TextCleaner, MetadataBuilder, BatchProcessor, get_file_path
from app.module.system.service.common_service import get_model_by_id
from .base import KnowledgeBaseStrategy
Expand All @@ -39,18 +40,18 @@ async def query(
knowledge_base_id: 知识库 ID
**kwargs: 额外参数,必须包含:
- rag_file_id: RAG 文件 ID
- paging_query: 分页参数
- chunk_filter_query: 分页和过滤参数

Returns:
分页分块列表
"""
rag_file_id = kwargs.get("rag_file_id")
paging_query = kwargs.get("paging_query")
chunk_filter_query: ChunkFilterQuery = kwargs.get("chunk_filter_query")

if not rag_file_id or not paging_query:
if not rag_file_id or not chunk_filter_query:
raise BusinessError(
ErrorCodes.RAG_INVALID_REQUEST,
"Missing rag_file_id or paging_query parameters"
"Missing rag_file_id or chunk_filter_query parameters"
)

kb_repo = KnowledgeBaseRepository(self.db)
Expand All @@ -67,20 +68,22 @@ async def query(
client = get_milvus_client()

try:
count_filter = f'metadata["rag_file_id"] == "{rag_file_id}"'
base_filter = f'metadata["rag_file_id"] == "{rag_file_id}"'
combined_filter = self._build_combined_filter(base_filter, chunk_filter_query.expr)

count_res = client.query(
collection_name=knowledge_base.name,
filter=count_filter,
filter=combined_filter,
output_fields=["id"],
)
total = len(count_res)

offset = (paging_query.page - 1) * paging_query.size
offset = (chunk_filter_query.page - 1) * chunk_filter_query.size
results = client.query(
collection_name=knowledge_base.name,
filter=count_filter,
filter=combined_filter,
output_fields=["id", "text", "metadata"],
limit=paging_query.size,
limit=chunk_filter_query.size,
offset=offset,
)

Expand All @@ -103,8 +106,8 @@ async def query(
return PagedResponse.create(
content=chunks,
total_elements=total,
page=paging_query.page,
size=paging_query.size,
page=chunk_filter_query.page,
size=chunk_filter_query.size,
)

except Exception as e:
Expand All @@ -117,6 +120,12 @@ async def query(
f"查询文件分块失败: {str(e)}"
) from e

@staticmethod
def _build_combined_filter(base_filter: str, user_expr: Optional[str]) -> str:
if not user_expr:
return base_filter
return f"{base_filter} && {user_expr}"

async def search(
self,
query_text: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from app.core.exception import BusinessError, ErrorCodes
from app.db.models.knowledge_gen import KnowledgeBase
from app.module.rag.repository import KnowledgeBaseRepository
from app.module.rag.schema.request import PagingQuery, RetrieveReq
from app.module.rag.schema.request import PagingQuery, ChunkFilterQuery, RetrieveReq
from app.module.rag.schema.response import PagedResponse
from .strategy import KnowledgeBaseStrategyFactory

Expand Down Expand Up @@ -86,22 +86,22 @@ async def get_chunks(
self,
knowledge_base_id: str,
rag_file_id: str,
paging_query: PagingQuery,
query: ChunkFilterQuery,
) -> PagedResponse:
"""获取指定 RAG 文件的分块列表(仅向量知识库)

Args:
knowledge_base_id: 知识库 ID
rag_file_id: RAG 文件 ID
paging_query: 分页参数
query: 分页和过滤参数

Returns:
分块列表(分页)
"""
return await self.query(
knowledge_base_id,
rag_file_id=rag_file_id,
paging_query=paging_query,
chunk_filter_query=query,
)

async def _get_knowledge_base(self, knowledge_base_id: str) -> KnowledgeBase:
Expand Down
Loading