Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 72 additions & 3 deletions backend/package/yuxi/agents/toolkits/kbs/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
FindOutputSchema,
OpenInputSchema,
OpenOutputSchema,
QueryKeywordsInputSchema,
SearchInputSchema,
SearchOutputSchema,
)
Expand Down Expand Up @@ -153,6 +154,7 @@ def mindmap_to_text(node, level=0):


QueryKBInput = SearchInputSchema
QueryKeywordsInput = QueryKeywordsInputSchema
OpenKBDocumentInput = OpenInputSchema
FindKBDocumentInput = FindInputSchema

Expand Down Expand Up @@ -241,6 +243,72 @@ async def query_kb(kb_id: str, query_text: str, file_name: str | None = None, ru
return f"检索失败: {str(e)}"


@tool(category="knowledge", tags=["知识库"], args_schema=QueryKeywordsInput)
async def query_keywords(
kb_id: str,
keywords: list[str],
file_name: str | None = None,
runtime: ToolRuntime = None,
) -> Any:
"""基于关键词在指定知识库中检索内容

当用户明确知道要搜索的关键词(如专有名词、技术术语、代码符号、特定指标等)
时使用此工具。检索采用「精准优先 + BM25 兜底」策略:包含完整关键词短语的 chunk
排在前面(基于 Milvus PHRASE_MATCH,分词后 token 相邻即算精准命中),精准命中
不足时由 BM25 模糊命中补齐,结果 metadata 中以 is_precise_match 标记。如果需要
语义理解型的模糊检索,请使用 query_kb。

Args:
kb_id: 知识库资源 ID,也就是 kb_id
keywords: 关键词列表
file_name: 可选文件名关键词过滤

Returns:
检索结果列表,结构与 query_kb 一致
"""
if not kb_id:
return "请提供 kb_id"
keywords = [k.strip() for k in keywords if k and k.strip()]
if not keywords:
return "请提供关键词列表"

knowledge_base = _get_knowledge_base()
retrievers = knowledge_base.get_retrievers()
visible_kbs = await _resolve_visible_knowledge_bases_for_query(runtime)
target_info, target_kb_id, target_error = _find_query_target(
kb_id=kb_id,
retrievers=retrievers,
visible_kbs=visible_kbs,
)
if target_error:
return target_error

try:
retriever = target_info["retriever"]
# 拼接关键词为查询文本,强制使用 keyword/BM25 模式并启用精准匹配
query_text = " ".join(keywords)
kwargs: dict[str, Any] = {
"search_mode": "keyword",
"precise_match": True,
"phrase_match_terms": keywords,
}
if file_name:
kwargs["file_name"] = file_name

if inspect.iscoroutinefunction(retriever):
result = await retriever(query_text, **kwargs)
else:
result = retriever(query_text, **kwargs)

if isinstance(result, dict) and result.get("kb_id") == target_kb_id and isinstance(result.get("results"), list):
return SearchOutputSchema(**result).model_dump()
return KnowledgeBase.build_search_output(target_kb_id, result)

except Exception as e:
logger.error(f"关键词检索失败: {e}")
return f"关键词检索失败: {str(e)}"


@tool(category="knowledge", tags=["知识库"], args_schema=OpenKBDocumentInput)
async def open_kb_document(
kb_id: str,
Expand Down Expand Up @@ -358,11 +426,12 @@ async def find_kb_document(
def get_common_kb_tools() -> list:
"""获取通用知识库工具列表

返回 5 个通用工具:
返回 6 个通用工具:
- list_kbs: 列出用户可访问的知识库
- get_mindmap: 获取指定知识库的思维导图
- query_kb: 在指定知识库中检索
- query_kb: 在指定知识库中语义检索
- query_keywords: 基于关键词在指定知识库中检索
- find_kb_document: 在指定文件内定位关键词或正则模式
- open_kb_document: 按 file_id 分段打开知识库文档
"""
return [list_kbs, get_mindmap, query_kb, find_kb_document, open_kb_document]
return [list_kbs, get_mindmap, query_kb, query_keywords, find_kb_document, open_kb_document]
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def _get_or_create_entity_collection(self, kb_id: str, embedding_info: Any) -> C
max_length=65535,
enable_analyzer=True,
analyzer_params=CONTENT_ANALYZER_PARAMS,
enable_match=True,
),
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=embedding_info.dimension or 1024),
FieldSchema(name=CONTENT_SPARSE_FIELD, dtype=DataType.SPARSE_FLOAT_VECTOR),
Expand All @@ -221,6 +222,7 @@ def _get_or_create_triple_collection(self, kb_id: str, embedding_info: Any) -> C
max_length=65535,
enable_analyzer=True,
analyzer_params=CONTENT_ANALYZER_PARAMS,
enable_match=True,
),
FieldSchema(name="source_id", dtype=DataType.VARCHAR, max_length=100),
FieldSchema(name="target_id", dtype=DataType.VARCHAR, max_length=100),
Expand Down
Loading