Skip to content

Commit 2986101

Browse files
authored
feat: add filtering functionality for knowledge base file details (#445)
* feat: add filtering functionality for knowledge base file details * feat: add filtering functionality for knowledge base file details * feat: add filtering functionality for knowledge base file details
1 parent 82770c5 commit 2986101

8 files changed

Lines changed: 168 additions & 40 deletions

File tree

frontend/src/i18n/locales/en/common.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1756,7 +1756,15 @@
17561756
"kbList": "Knowledge Bases",
17571757
"kbDetail": "Knowledge Base Detail"
17581758
},
1759-
"defaultFileName": "File {{id}}"
1759+
"defaultFileName": "File {{id}}",
1760+
"filter": {
1761+
"title": "Filter",
1762+
"idOperator": "ID Operator",
1763+
"idValue": "ID Value",
1764+
"textKeyword": "Text Keyword",
1765+
"apply": "Apply",
1766+
"clear": "Clear"
1767+
}
17601768
},
17611769
"create": {
17621770
"title": "Create Knowledge Base",

frontend/src/i18n/locales/zh/common.json

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1755,7 +1755,15 @@
17551755
"kbList": "知识库",
17561756
"kbDetail": "知识库详情"
17571757
},
1758-
"defaultFileName": "文件 {{id}}"
1758+
"defaultFileName": "文件 {{id}}",
1759+
"filter": {
1760+
"title": "过滤",
1761+
"idOperator": "ID操作符",
1762+
"idValue": "ID值",
1763+
"textKeyword": "文本关键词",
1764+
"apply": "应用",
1765+
"clear": "清除"
1766+
}
17591767
},
17601768
"create": {
17611769
"title": "创建知识库",

frontend/src/pages/KnowledgeBase/FileDetail/KnowledgeBaseFileDetail.tsx

Lines changed: 86 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
import React, { useEffect, useState, useCallback } from "react";
2-
import {Eye, Edit, Trash2, FileBox, ChevronLeft, ChevronRight, Code, CheckCircle, AlertCircle, Wand2} from "lucide-react";
3-
import { Card, Button, Badge, Input, Tabs, Modal, Breadcrumb, Tag, Spin, Empty, Alert, message, Tooltip } from "antd";
2+
import {Eye, Edit, Trash2, FileBox, ChevronLeft, ChevronRight, Code, CheckCircle, AlertCircle, Wand2, X} from "lucide-react";
3+
import { Card, Button, Badge, Input, Tabs, Modal, Breadcrumb, Tag, Spin, Empty, Alert, message, Tooltip, Select } from "antd";
44
import { ExclamationCircleOutlined } from "@ant-design/icons";
55
import { queryKnowledgeBaseFileDetailUsingGet, updateKnowledgeBaseChunk, deleteKnowledgeBaseChunk } from "@/pages/KnowledgeBase/knowledge-base.api";
66
import { Link, useParams } from "react-router";
7-
import DetailHeader from "@/components/DetailHeader";
87
import { useTranslation } from "react-i18next";
98
import { Prism as SyntaxHighlighter } from "react-syntax-highlighter";
109
import { vscDarkPlus } from "react-syntax-highlighter/dist/esm/styles/prism";
@@ -50,6 +49,28 @@ const KnowledgeBaseFileDetail: React.FC = () => {
5049
const pageSize = 10;
5150
const [currentPage, setCurrentPage] = useState(1);
5251

52+
const [idOperator, setIdOperator] = useState<string>("");
53+
const [idValue, setIdValue] = useState<string>("");
54+
const [textKeyword, setTextKeyword] = useState<string>("");
55+
56+
const buildFilterExpr = useCallback((op: string, val: string, keyword: string): string => {
57+
const parts: string[] = [];
58+
if (op && val) {
59+
parts.push(`id ${op} "${val}"`);
60+
}
61+
if (keyword) {
62+
parts.push(`text like "%${keyword}%"`);
63+
}
64+
return parts.join(" && ");
65+
}, []);
66+
67+
const handleClearFilter = useCallback(() => {
68+
setIdOperator("");
69+
setIdValue("");
70+
setTextKeyword("");
71+
setCurrentPage(1);
72+
}, []);
73+
5374
const safeParse = (meta: unknown): unknown => {
5475
if (typeof meta === "string") {
5576
try {
@@ -66,7 +87,8 @@ const KnowledgeBaseFileDetail: React.FC = () => {
6687
setLoading(true);
6788
setError(null);
6889
try {
69-
const res = await queryKnowledgeBaseFileDetailUsingGet(knowledgeBaseId, ragFileId, { page, size: pageSize });
90+
const expr = buildFilterExpr(idOperator, idValue, textKeyword);
91+
const res = await queryKnowledgeBaseFileDetailUsingGet(knowledgeBaseId, ragFileId, { page, size: pageSize, expr: expr || undefined });
7092
const raw = (res?.data ?? res) as {
7193
page: number;
7294
size: number;
@@ -92,7 +114,7 @@ const KnowledgeBaseFileDetail: React.FC = () => {
92114

93115
useEffect(() => {
94116
fetchChunks(currentPage);
95-
}, [knowledgeBaseId, ragFileId, currentPage, t]);
117+
}, [knowledgeBaseId, ragFileId, currentPage, idOperator, idValue, textKeyword, t]);
96118

97119
const totalElements = paged?.totalElements ?? 0;
98120
const totalPages = paged?.totalPages ?? 0;
@@ -237,7 +259,7 @@ const KnowledgeBaseFileDetail: React.FC = () => {
237259
key={chunk.id}
238260
title={
239261
<div className="flex items-center gap-2">
240-
<span>{t("knowledgeBase.fileDetail.messages.chunkLabel")} {chunk.id}</span>
262+
<span className="text-slate-400 text-xs mr-1">ID</span><span className="font-mono text-sm">{chunk.id}</span>
241263
{chunk.metadata?.sliceOperator && (
242264
<Tag className="text-xs">
243265
{chunk.metadata.sliceOperator}
@@ -295,19 +317,64 @@ const KnowledgeBaseFileDetail: React.FC = () => {
295317
{ title: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }) },
296318
]}
297319
/>
298-
<DetailHeader
299-
data={{
300-
id: ragFileId,
301-
icon: <FileBox className="w-full h-full" />,
302-
iconColor: "#a27e7e",
303-
name: fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId }),
304-
description: `${totalElements} ${t("knowledgeBase.fileDetail.messages.chunkCount", { count: 0 })}`,
305-
createdAt: "",
306-
lastUpdated: "",
307-
}}
308-
statistics={[]}
309-
operations={[]}
310-
/>
320+
<div className="flex items-center gap-3 px-4 py-3 bg-gradient-to-r from-slate-50 to-slate-100/50 rounded-xl border border-slate-200/60">
321+
<div className="flex-shrink-0 w-10 h-10 rounded-lg bg-gradient-to-br from-amber-100 to-orange-100 flex items-center justify-center shadow-sm">
322+
<FileBox className="w-5 h-5 text-amber-600" />
323+
</div>
324+
<div className="flex-1 min-w-0">
325+
<h1 className="text-base font-semibold text-slate-800 truncate">
326+
{fileName || t("knowledgeBase.fileDetail.defaultFileName", { id: ragFileId })}
327+
</h1>
328+
<p className="text-sm text-slate-500">
329+
{totalElements} {t("knowledgeBase.fileDetail.messages.chunkCount", { count: 0 })}
330+
</p>
331+
</div>
332+
</div>
333+
<div className="flex items-center gap-3 bg-white rounded-lg border border-slate-200/80 px-4 py-3 mb-4">
334+
<div className="flex items-center gap-2">
335+
<span className="text-xs text-slate-500">ID</span>
336+
<Select
337+
value={idOperator || undefined}
338+
onChange={setIdOperator}
339+
placeholder={t("knowledgeBase.fileDetail.filter.idOperator")}
340+
allowClear
341+
className="w-16"
342+
options={[
343+
{ value: ">", label: ">" },
344+
{ value: "<", label: "<" },
345+
{ value: "==", label: "==" },
346+
]}
347+
/>
348+
<Input
349+
value={idValue}
350+
onChange={(e) => setIdValue(e.target.value)}
351+
placeholder={t("knowledgeBase.fileDetail.filter.idValue")}
352+
className="w-28"
353+
/>
354+
</div>
355+
<div className="h-5 w-px bg-slate-200" />
356+
<Input.Search
357+
value={textKeyword}
358+
onChange={(e) => setTextKeyword(e.target.value)}
359+
placeholder={t("knowledgeBase.fileDetail.filter.textKeyword")}
360+
className="w-48"
361+
allowClear
362+
/>
363+
<div className="flex items-center gap-2 ml-auto">
364+
<Button
365+
type="primary"
366+
size="small"
367+
onClick={() => { setCurrentPage(1); fetchChunks(1); }}
368+
>
369+
{t("knowledgeBase.fileDetail.filter.apply")}
370+
</Button>
371+
{(idOperator || idValue || textKeyword) && (
372+
<Button size="small" onClick={handleClearFilter}>
373+
<X className="w-4 h-4" />
374+
</Button>
375+
)}
376+
</div>
377+
</div>
311378
<Card>
312379
{loading ? <div className="flex items-center justify-center py-8"><Spin /></div> : renderChunks()}
313380
</Card>

frontend/src/pages/KnowledgeBase/knowledge-base.api.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,15 @@ export function retrieveKnowledgeBaseContent(data: {
7474
export function queryKnowledgeBaseFileDetailUsingGet(
7575
knowledgeBaseId: string,
7676
ragFileId: string,
77-
params: { page?: number; size?: number } = { page: 1, size: 20 }
77+
params: { page?: number; size?: number; expr?: string } = { page: 1, size: 20 }
7878
) {
7979
const page = params.page ?? 1;
8080
const size = params.size ?? 20;
81-
return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?page=${page}&page_size=${size}`);
81+
const queryParams = [`page=${page}`, `page_size=${size}`];
82+
if (params.expr) {
83+
queryParams.push(`expr=${encodeURIComponent(params.expr)}`);
84+
}
85+
return get(`/api/knowledge-base/${knowledgeBaseId}/files/${ragFileId}?${queryParams.join('&')}`);
8286
}
8387

8488
export function queryKnowledgeBase(data: {

runtime/datamate-python/app/module/rag/interface/knowledge_base.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
RagFileReq,
2121
RetrieveReq,
2222
PagingQuery,
23+
ChunkFilterQuery,
2324
QueryRequest,
2425
ChunkUpdateReq,
2526
)
@@ -143,12 +144,12 @@ async def delete_knowledge_base_files(
143144
async def get_file_chunks(
144145
knowledge_base_id: str,
145146
rag_file_id: str,
146-
paging_query: PagingQuery = Depends(),
147+
query: ChunkFilterQuery = Depends(),
147148
db: AsyncSession = Depends(get_db),
148149
):
149150
"""获取指定 RAG 文件的分块列表"""
150151
service = UnifiedRetrievalService(db)
151-
result = await service.get_chunks(knowledge_base_id, rag_file_id, paging_query)
152+
result = await service.get_chunks(knowledge_base_id, rag_file_id, query)
152153
return SuccessResponse(data=result)
153154

154155

runtime/datamate-python/app/module/rag/schema/request.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,37 @@ class Config:
330330
}
331331

332332

333+
class ChunkFilterQuery(BaseModel):
334+
"""分块过滤查询请求
335+
336+
支持分页和 Milvus 表达式过滤
337+
"""
338+
page: int = Field(
339+
default=1,
340+
ge=1,
341+
description="页码(从 1 开始)"
342+
)
343+
size: int = Field(
344+
default=10,
345+
ge=1,
346+
le=100,
347+
description="每页数量"
348+
)
349+
expr: Optional[str] = Field(
350+
None,
351+
description="Milvus 过滤表达式(如 id > \"1\" && text like \"%keyword%\")"
352+
)
353+
354+
class Config:
355+
json_schema_extra = {
356+
"example": {
357+
"page": 1,
358+
"size": 10,
359+
"expr": "id > \"1\""
360+
}
361+
}
362+
363+
333364
class QueryRequest(BaseModel):
334365
"""知识图谱查询请求"""
335366
knowledge_base_id: str = Field(..., description="知识库ID")

runtime/datamate-python/app/module/rag/service/strategy/vector_strategy.py

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from app.module.rag.infra.vectorstore.milvus_client import get_milvus_client
1616
from app.module.rag.repository import KnowledgeBaseRepository, RagFileRepository
1717
from app.module.rag.schema.response import PagedResponse, RagChunkResp
18+
from app.module.rag.schema.request import ChunkFilterQuery
1819
from app.module.rag.service.common import TextCleaner, MetadataBuilder, BatchProcessor, get_file_path
1920
from app.module.system.service.common_service import get_model_by_id
2021
from .base import KnowledgeBaseStrategy
@@ -39,18 +40,18 @@ async def query(
3940
knowledge_base_id: 知识库 ID
4041
**kwargs: 额外参数,必须包含:
4142
- rag_file_id: RAG 文件 ID
42-
- paging_query: 分页参数
43+
- chunk_filter_query: 分页和过滤参数
4344
4445
Returns:
4546
分页分块列表
4647
"""
4748
rag_file_id = kwargs.get("rag_file_id")
48-
paging_query = kwargs.get("paging_query")
49+
chunk_filter_query: ChunkFilterQuery = kwargs.get("chunk_filter_query")
4950

50-
if not rag_file_id or not paging_query:
51+
if not rag_file_id or not chunk_filter_query:
5152
raise BusinessError(
5253
ErrorCodes.RAG_INVALID_REQUEST,
53-
"Missing rag_file_id or paging_query parameters"
54+
"Missing rag_file_id or chunk_filter_query parameters"
5455
)
5556

5657
kb_repo = KnowledgeBaseRepository(self.db)
@@ -67,20 +68,22 @@ async def query(
6768
client = get_milvus_client()
6869

6970
try:
70-
count_filter = f'metadata["rag_file_id"] == "{rag_file_id}"'
71+
base_filter = f'metadata["rag_file_id"] == "{rag_file_id}"'
72+
combined_filter = self._build_combined_filter(base_filter, chunk_filter_query.expr)
73+
7174
count_res = client.query(
7275
collection_name=knowledge_base.name,
73-
filter=count_filter,
76+
filter=combined_filter,
7477
output_fields=["id"],
7578
)
7679
total = len(count_res)
7780

78-
offset = (paging_query.page - 1) * paging_query.size
81+
offset = (chunk_filter_query.page - 1) * chunk_filter_query.size
7982
results = client.query(
8083
collection_name=knowledge_base.name,
81-
filter=count_filter,
84+
filter=combined_filter,
8285
output_fields=["id", "text", "metadata"],
83-
limit=paging_query.size,
86+
limit=chunk_filter_query.size,
8487
offset=offset,
8588
)
8689

@@ -103,8 +106,8 @@ async def query(
103106
return PagedResponse.create(
104107
content=chunks,
105108
total_elements=total,
106-
page=paging_query.page,
107-
size=paging_query.size,
109+
page=chunk_filter_query.page,
110+
size=chunk_filter_query.size,
108111
)
109112

110113
except Exception as e:
@@ -117,6 +120,12 @@ async def query(
117120
f"查询文件分块失败: {str(e)}"
118121
) from e
119122

123+
@staticmethod
124+
def _build_combined_filter(base_filter: str, user_expr: Optional[str]) -> str:
125+
if not user_expr:
126+
return base_filter
127+
return f"{base_filter} && {user_expr}"
128+
120129
async def search(
121130
self,
122131
query_text: str,

runtime/datamate-python/app/module/rag/service/unified_retrieval_service.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from app.core.exception import BusinessError, ErrorCodes
1313
from app.db.models.knowledge_gen import KnowledgeBase
1414
from app.module.rag.repository import KnowledgeBaseRepository
15-
from app.module.rag.schema.request import PagingQuery, RetrieveReq
15+
from app.module.rag.schema.request import PagingQuery, ChunkFilterQuery, RetrieveReq
1616
from app.module.rag.schema.response import PagedResponse
1717
from .strategy import KnowledgeBaseStrategyFactory
1818

@@ -86,22 +86,22 @@ async def get_chunks(
8686
self,
8787
knowledge_base_id: str,
8888
rag_file_id: str,
89-
paging_query: PagingQuery,
89+
query: ChunkFilterQuery,
9090
) -> PagedResponse:
9191
"""获取指定 RAG 文件的分块列表(仅向量知识库)
9292
9393
Args:
9494
knowledge_base_id: 知识库 ID
9595
rag_file_id: RAG 文件 ID
96-
paging_query: 分页参数
96+
query: 分页和过滤参数
9797
9898
Returns:
9999
分块列表(分页)
100100
"""
101101
return await self.query(
102102
knowledge_base_id,
103103
rag_file_id=rag_file_id,
104-
paging_query=paging_query,
104+
chunk_filter_query=query,
105105
)
106106

107107
async def _get_knowledge_base(self, knowledge_base_id: str) -> KnowledgeBase:

0 commit comments

Comments
 (0)