feat: init kb tool

This commit is contained in:
Vukasin 2025-09-13 11:03:37 +02:00
parent b547135806
commit 13a8125c1b
3 changed files with 288 additions and 0 deletions

View File

@ -37,6 +37,26 @@ You have the abilixwty to execute operations using both Python and CLI tools:
- Batch processing multiple files
- AI-powered intelligent file editing with natural language instructions, using the `edit_file` tool exclusively.
#### 2.3.1.1 KNOWLEDGE BASE SEMANTIC SEARCH
* Use `init_kb` to initialize kb-fusion binary before performing semantic searches (no parameters required)
* Use `search_files` to perform intelligent content discovery across documents with natural language queries
* Provide the FULL path to files/documents and your search queries. IMPORTANT NOTE: FULL FILE PATH IS REQUIRED SO NO FILENAME ONLY.
* Example:
<function_calls>
<invoke name="search_files">
<parameter name="path">/workspace/documents/dataset.txt</parameter>
<parameter name="queries">["What is the main topic?", "Key findings summary"]</parameter>
</invoke>
</function_calls>
* ALWAYS use this tool when you need to find specific information within large documents or datasets
* Use `ls_kb` to list all indexed files and their status
* Use `cleanup_kb` for maintenance operations (operation: default|remove_files|clear_embeddings|clear_all):
<function_calls>
<invoke name="cleanup_kb">
<parameter name="operation">default</parameter>
</invoke>
</function_calls>
### 2.3.2 DATA PROCESSING
- Scraping and extracting data from websites
- Parsing structured data (JSON, CSV, XML)

View File

@ -16,6 +16,7 @@ from core.agentpress.thread_manager import ThreadManager
from core.agentpress.response_processor import ProcessorConfig
from core.tools.sb_shell_tool import SandboxShellTool
from core.tools.sb_files_tool import SandboxFilesTool
from core.tools.sb_kb_tool import SandboxKbTool
from core.tools.data_providers_tool import DataProvidersTool
from core.tools.expand_msg_tool import ExpandMessageTool
from core.prompts.prompt import get_system_prompt
@ -108,6 +109,7 @@ class ToolManager:
('web_search_tool', SandboxWebSearchTool, {'project_id': self.project_id, 'thread_manager': self.thread_manager}),
('sb_vision_tool', SandboxVisionTool, {'project_id': self.project_id, 'thread_id': self.thread_id, 'thread_manager': self.thread_manager}),
('sb_image_edit_tool', SandboxImageEditTool, {'project_id': self.project_id, 'thread_id': self.thread_id, 'thread_manager': self.thread_manager}),
('sb_kb_tool', SandboxKbTool, {'project_id': self.project_id, 'thread_manager': self.thread_manager}),
('sb_presentation_outline_tool', SandboxPresentationOutlineTool, {'project_id': self.project_id, 'thread_manager': self.thread_manager}),
('sb_presentation_tool', SandboxPresentationTool, {'project_id': self.project_id, 'thread_manager': self.thread_manager}),

View File

@ -0,0 +1,266 @@
import asyncio
from typing import Optional, List
from core.agentpress.tool import ToolResult, openapi_schema, usage_example
from core.sandbox.tool_base import SandboxToolsBase
from core.agentpress.thread_manager import ThreadManager
from core.utils.config import config
class SandboxKbTool(SandboxToolsBase):
"""Tool for knowledge base operations using kb-fusion binary in a Daytona sandbox.
Provides search capabilities and maintenance operations for knowledge bases."""
def __init__(self, project_id: str, thread_manager: ThreadManager):
super().__init__(project_id, thread_manager)
self.kb_version = "0.1.0"
self.kb_download_url = f"https://github.com/kortix-ai/kb-fusion/releases/download/v{self.kb_version}/kb"
async def _execute_kb_command(self, command: str) -> dict:
"""Execute a kb command with OPENAI_API_KEY environment variable set."""
await self._ensure_sandbox()
env = {"OPENAI_API_KEY": config.OPENAI_API_KEY} if config.OPENAI_API_KEY else {}
response = await self.sandbox.process.exec(command, env=env)
return {
"output": response.result,
"exit_code": response.exit_code
}
@openapi_schema({
"type": "function",
"function": {
"name": "init_kb",
"description": "Initialize the kb-fusion binary. Checks if kb exists and installs/updates if needed.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
})
@usage_example('''
<function_calls>
<invoke name="init_kb">
</invoke>
</function_calls>
''')
async def init_kb(self) -> ToolResult:
try:
await self._ensure_sandbox()
# Check if kb exists and get version
check_result = await self._execute_kb_command("kb -v")
if check_result["exit_code"] == 0:
output = check_result["output"].strip()
if f"kb-fusion {self.kb_version}" in output:
return self.success_response({
"message": f"kb-fusion {self.kb_version} is already installed and up to date.",
"version": self.kb_version
})
else:
# Update needed
install_msg = f"Updating kb-fusion to version {self.kb_version}"
else:
# Install needed
install_msg = f"Installing kb-fusion version {self.kb_version}"
# Download and install kb binary
install_commands = [
f"curl -L {self.kb_download_url} -o /usr/local/bin/kb",
"chmod +x /usr/local/bin/kb"
]
for cmd in install_commands:
result = await self._execute_kb_command(cmd)
if result["exit_code"] != 0:
return self.fail_response(f"Failed to install kb: {result['output']}")
# Verify installation
verify_result = await self._execute_kb_command("kb -v")
if verify_result["exit_code"] != 0:
return self.fail_response(f"kb installation verification failed: {verify_result['output']}")
return self.success_response({
"message": f"{install_msg} completed successfully.",
"version": self.kb_version,
"verification": verify_result["output"].strip()
})
except Exception as e:
return self.fail_response(f"Error installing kb: {str(e)}")
@openapi_schema({
"type": "function",
"function": {
"name": "search_files",
"description": "Perform semantic search on files using kb-fusion. Searches for multiple queries in a specified full file path.",
"parameters": {
"type": "object",
"properties": {
"path": {
"type": "string",
"description": "Full Path to the file or directory to search in."
},
"queries": {
"type": "array",
"items": {"type": "string"},
"description": "List of search queries to execute."
}
},
"required": ["path", "queries"]
}
}
})
@usage_example('''
<function_calls>
<invoke name="search_files">
<parameter name="path"> /workspace/documents/dataset.txt</parameter>
<parameter name="queries">["What is the atomic number of oxygen?", "What color is oxygen when liquid?"]</parameter>
</invoke>
</function_calls>
''')
async def search_files(self, path: str, queries: List[str]) -> ToolResult:
try:
if not queries:
return self.fail_response("At least one query is required for search.")
# Build search command
query_args = " ".join([f'"{query}"' for query in queries])
search_command = f'kb search {path} {query_args} -k 18 --json'
result = await self._execute_kb_command(search_command)
if result["exit_code"] != 0:
return self.fail_response(f"Search failed: {result['output']}")
return self.success_response({
"search_results": result["output"],
"path": path,
"queries": queries,
"command": search_command
})
except Exception as e:
return self.fail_response(f"Error performing search: {str(e)}")
@openapi_schema({
"type": "function",
"function": {
"name": "cleanup_kb",
"description": "Perform maintenance and cleanup operations on the knowledge base.",
"parameters": {
"type": "object",
"properties": {
"operation": {
"type": "string",
"enum": ["default", "remove_files", "clear_embeddings", "clear_all"],
"description": "Type of cleanup operation: 'default' (missing files + orphan cleanup), 'remove_files' (remove specific files), 'clear_embeddings' (clear embedding cache), 'clear_all' (nuke everything)."
},
"file_paths": {
"type": "array",
"items": {"type": "string"},
"description": "List of file paths to remove (only used with 'remove_files' operation)."
},
"days": {
"type": "integer",
"description": "Days for embedding retention (only used with 'clear_embeddings'). Use 0 to clear all embeddings."
},
"retention_days": {
"type": "integer",
"description": "Retention window for default sweep operation (default 30 days).",
"default": 30
}
},
"required": ["operation"]
}
}
})
@usage_example('''
<function_calls>
<invoke name="cleanup_kb">
<parameter name="operation">default</parameter>
</invoke>
</function_calls>
<function_calls>
<invoke name="cleanup_kb">
<parameter name="operation">remove_files</parameter>
<parameter name="file_paths">["/workspace/old_dataset.txt", "/workspace/temp.pdf"]</parameter>
</invoke>
</function_calls>
<function_calls>
<invoke name="cleanup_kb">
<parameter name="operation">clear_embeddings</parameter>
<parameter name="days">7</parameter>
</invoke>
</function_calls>
''')
async def cleanup_kb(self, operation: str, file_paths: Optional[List[str]] = None, days: Optional[int] = None, retention_days: int = 30) -> ToolResult:
try:
if operation == "default":
command = f"kb sweep --retention-days {retention_days}"
elif operation == "remove_files":
if not file_paths:
return self.fail_response("file_paths is required for remove_files operation.")
paths_str = " ".join([f'"{path}"' for path in file_paths])
command = f"kb sweep --remove {paths_str}"
elif operation == "clear_embeddings":
if days is not None:
command = f"kb sweep --clear-embeddings {days}"
else:
command = "kb sweep --clear-embeddings 0"
elif operation == "clear_all":
command = "kb sweep --clear-all"
else:
return self.fail_response(f"Unknown operation: {operation}")
result = await self._execute_kb_command(command)
if result["exit_code"] != 0:
return self.fail_response(f"Cleanup operation failed: {result['output']}")
return self.success_response({
"message": f"Cleanup operation '{operation}' completed successfully.",
"output": result["output"],
"command": command
})
except Exception as e:
return self.fail_response(f"Error performing cleanup: {str(e)}")
@openapi_schema({
"type": "function",
"function": {
"name": "ls_kb",
"description": "List indexed files in the knowledge base. Shows file status, size, modification time, and paths.",
"parameters": {
"type": "object",
"properties": {},
"required": []
}
}
})
@usage_example('''
<function_calls>
<invoke name="ls_kb">
</invoke>
</function_calls>
''')
async def ls_kb(self) -> ToolResult:
try:
result = await self._execute_kb_command("kb ls")
if result["exit_code"] != 0:
return self.fail_response(f"List operation failed: {result['output']}")
return self.success_response({
"message": "Successfully listed indexed files.",
"output": result["output"],
"command": "kb ls"
})
except Exception as e:
return self.fail_response(f"Error listing files: {str(e)}")