Merge pull request #1093 from escapade-mckv/fix-agent-builder-tool-call

improve system promot for self-config
This commit is contained in:
Bobbie 2025-07-26 21:46:10 +05:30 committed by GitHub
commit 5157765f52
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 82 additions and 140 deletions

View File

@ -691,19 +691,38 @@ You have the ability to configure and enhance yourself! When users ask you to mo
## 🎯 When Users Request Configuration Changes ## 🎯 When Users Request Configuration Changes
**If a user asks you to:** **CRITICAL: ASK CLARIFYING QUESTIONS FIRST**
- "Add Gmail integration" Search for Gmail MCP, create credential profile, guide connection Before implementing any configuration changes, ALWAYS ask detailed questions to understand:
- "Set up daily reports" Create workflow + scheduled trigger - What specific outcome do they want to achieve?
- "Connect to Slack" Find Slack integration, set up credential profile - What platforms/services are they using?
- "Automate [task]" Design appropriate workflow/trigger combination - How often do they need this to happen?
- "Add [service] capabilities" Search for relevant MCP servers - What data or information needs to be processed?
- Do they have existing accounts/credentials for relevant services?
- What should trigger the automation (time, events, manual)?
**Important Guidelines:** **MANDATORY MCP TOOL ADDITION FLOW:**
- Always search for integrations before creating credential profiles 1. **Search** Use `search_mcp_servers` to find relevant integrations
- Guide users through connection processes step-by-step 2. **Explore** Use `get_mcp_server_tools` to see available capabilities
3. **AUTOMATICALLY ADD** Use `configure_mcp_server` to add the MCP server immediately
4. **Create Profile** Use `create_credential_profile` for authentication
5. **Connect** Use `connect_credential_profile` to generate connection link
6. **Configure** Use `configure_profile_for_agent` to add to your capabilities
7. **Verify** Test the new tools work correctly
**If a user asks you to:**
- "Add Gmail integration" Ask: What Gmail tasks? Read/send emails? Manage labels? Then SEARCH ADD CONFIGURE
- "Set up daily reports" Ask: What data? What format? Where to send? Then SEARCH for needed tools ADD CREATE workflow
- "Connect to Slack" Ask: What Slack actions? Send messages? Read channels? Then SEARCH ADD CONFIGURE
- "Automate [task]" Ask: What triggers it? What steps? What outputs? Then SEARCH ADD BUILD workflow
- "Add [service] capabilities" Ask: What specific actions? Then SEARCH ADD immediately
**ABSOLUTE REQUIREMENTS:**
- **NEVER just show MCP servers** - ALWAYS configure and add them immediately after discovery
- **ASK 3-5 SPECIFIC QUESTIONS** before starting any configuration
- **AUTOMATICALLY ADD EVERY RELEVANT MCP SERVER** you discover during search
- Guide users through connection processes step-by-step with clear instructions
- Explain what each integration enables and why it's useful - Explain what each integration enables and why it's useful
- Test connections before finalizing configurations - Test connections before finalizing configurations
- Only enable tools that users specifically request or need
- **ALWAYS ADD THE NECESSARY TOOLS** - If you don't have capabilities for something, automatically search for and add the required MCP integrations - **ALWAYS ADD THE NECESSARY TOOLS** - If you don't have capabilities for something, automatically search for and add the required MCP integrations
## 🌟 Self-Configuration Philosophy ## 🌟 Self-Configuration Philosophy

View File

@ -13,32 +13,17 @@ import chardet
import PyPDF2 import PyPDF2
import docx import docx
import openpyxl
import csv
import json
import yaml
import xml.etree.ElementTree as ET
from PIL import Image
import pytesseract
from utils.logger import logger from utils.logger import logger
from services.supabase import DBConnection from services.supabase import DBConnection
class FileProcessor: class FileProcessor:
"""Handles file upload, content extraction, and processing for agent knowledge bases."""
SUPPORTED_TEXT_EXTENSIONS = { SUPPORTED_TEXT_EXTENSIONS = {
'.txt', '.md', '.py', '.js', '.ts', '.html', '.css', '.json', '.yaml', '.yml', '.txt'
'.xml', '.csv', '.sql', '.sh', '.bat', '.ps1', '.dockerfile', '.gitignore',
'.env', '.ini', '.cfg', '.conf', '.log', '.rst', '.toml', '.lock'
} }
SUPPORTED_DOCUMENT_EXTENSIONS = { SUPPORTED_DOCUMENT_EXTENSIONS = {
'.pdf', '.docx', '.xlsx', '.pptx' '.pdf', '.docx'
}
SUPPORTED_IMAGE_EXTENSIONS = {
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff', '.webp'
} }
MAX_FILE_SIZE = 50 * 1024 * 1024 MAX_FILE_SIZE = 50 * 1024 * 1024
@ -247,7 +232,7 @@ class FileProcessor:
"""Clone a Git repository and extract content from supported files.""" """Clone a Git repository and extract content from supported files."""
if include_patterns is None: if include_patterns is None:
include_patterns = ['*.py', '*.js', '*.ts', '*.md', '*.txt', '*.json', '*.yaml', '*.yml'] include_patterns = ['*.txt', '*.pdf', '*.docx']
if exclude_patterns is None: if exclude_patterns is None:
exclude_patterns = ['node_modules/*', '.git/*', '*.pyc', '__pycache__/*', '.env', '*.log'] exclude_patterns = ['node_modules/*', '.git/*', '*.pyc', '__pycache__/*', '.env', '*.log']
@ -394,7 +379,7 @@ class FileProcessor:
shutil.rmtree(temp_dir, ignore_errors=True) shutil.rmtree(temp_dir, ignore_errors=True)
async def _extract_file_content(self, file_content: bytes, filename: str, mime_type: str) -> str: async def _extract_file_content(self, file_content: bytes, filename: str, mime_type: str) -> str:
"""Extract text content from various file types.""" """Extract text content from supported file types."""
file_extension = Path(filename).suffix.lower() file_extension = Path(filename).suffix.lower()
try: try:
@ -410,33 +395,8 @@ class FileProcessor:
elif file_extension == '.docx': elif file_extension == '.docx':
return self._extract_docx_content(file_content) return self._extract_docx_content(file_content)
# Excel files
elif file_extension == '.xlsx':
return self._extract_xlsx_content(file_content)
# Images (OCR)
elif file_extension in self.SUPPORTED_IMAGE_EXTENSIONS:
return self._extract_image_content(file_content)
# JSON files
elif file_extension == '.json':
return self._extract_json_content(file_content)
# YAML files
elif file_extension in {'.yaml', '.yml'}:
return self._extract_yaml_content(file_content)
# XML files
elif file_extension == '.xml':
return self._extract_xml_content(file_content)
# CSV files
elif file_extension == '.csv':
return self._extract_csv_content(file_content)
else: else:
# Try to extract as text if possible raise ValueError(f"Unsupported file format: {file_extension}. Only .txt, .pdf, and .docx files are supported.")
return self._extract_text_content(file_content)
except Exception as e: except Exception as e:
logger.error(f"Error extracting content from {filename}: {str(e)}") logger.error(f"Error extracting content from {filename}: {str(e)}")
@ -479,77 +439,17 @@ class FileProcessor:
raw_text = '\n'.join(text_content) raw_text = '\n'.join(text_content)
return self._sanitize_content(raw_text) return self._sanitize_content(raw_text)
def _extract_xlsx_content(self, file_content: bytes) -> str:
"""Extract text from Excel files."""
workbook = openpyxl.load_workbook(io.BytesIO(file_content))
text_content = []
for sheet_name in workbook.sheetnames:
sheet = workbook[sheet_name]
text_content.append(f"Sheet: {sheet_name}")
for row in sheet.iter_rows(values_only=True):
row_text = [str(cell) if cell is not None else '' for cell in row]
if any(row_text):
text_content.append('\t'.join(row_text))
raw_text = '\n'.join(text_content)
return self._sanitize_content(raw_text)
def _extract_image_content(self, file_content: bytes) -> str:
"""Extract text from images using OCR."""
try:
image = Image.open(io.BytesIO(file_content))
raw_text = pytesseract.image_to_string(image)
return self._sanitize_content(raw_text)
except Exception as e:
return f"OCR extraction failed: {str(e)}"
def _extract_json_content(self, file_content: bytes) -> str:
"""Extract and format JSON content."""
text = self._extract_text_content(file_content)
try:
parsed = json.loads(text)
formatted = json.dumps(parsed, indent=2)
return self._sanitize_content(formatted)
except json.JSONDecodeError:
return self._sanitize_content(text)
def _extract_yaml_content(self, file_content: bytes) -> str:
"""Extract and format YAML content."""
text = self._extract_text_content(file_content)
try:
parsed = yaml.safe_load(text)
formatted = yaml.dump(parsed, default_flow_style=False)
return self._sanitize_content(formatted)
except yaml.YAMLError:
return self._sanitize_content(text)
def _extract_xml_content(self, file_content: bytes) -> str:
"""Extract content from XML files."""
try:
root = ET.fromstring(file_content)
xml_string = ET.tostring(root, encoding='unicode')
return self._sanitize_content(xml_string)
except ET.ParseError:
return self._extract_text_content(file_content)
def _extract_csv_content(self, file_content: bytes) -> str:
"""Extract and format CSV content."""
text = self._extract_text_content(file_content)
try:
reader = csv.reader(io.StringIO(text))
rows = list(reader)
formatted = '\n'.join(['\t'.join(row) for row in rows])
return self._sanitize_content(formatted)
except Exception:
return self._sanitize_content(text)
def _sanitize_content(self, content: str) -> str: def _sanitize_content(self, content: str) -> str:
"""Sanitize extracted content to remove problematic characters for PostgreSQL.""" """Sanitize extracted content to remove problematic characters for PostgreSQL."""
@ -576,18 +476,8 @@ class FileProcessor:
return 'PyPDF2' return 'PyPDF2'
elif file_extension == '.docx': elif file_extension == '.docx':
return 'python-docx' return 'python-docx'
elif file_extension == '.xlsx': elif file_extension == '.txt':
return 'openpyxl' return 'text encoding detection'
elif file_extension in self.SUPPORTED_IMAGE_EXTENSIONS:
return 'pytesseract OCR'
elif file_extension == '.json':
return 'JSON parser'
elif file_extension in {'.yaml', '.yml'}:
return 'YAML parser'
elif file_extension == '.xml':
return 'XML parser'
elif file_extension == '.csv':
return 'CSV parser'
else: else:
return 'text encoding detection' return 'text encoding detection'

View File

@ -427,12 +427,22 @@ export const AgentKnowledgeBaseManager = ({ agentId, agentName }: AgentKnowledge
const zip = new JSZip(); const zip = new JSZip();
const zipContent = await zip.loadAsync(zipFile); const zipContent = await zip.loadAsync(zipFile);
const extractedFiles: UploadedFile[] = []; const extractedFiles: UploadedFile[] = [];
const rejectedFiles: string[] = [];
const supportedExtensions = ['.txt', '.pdf', '.docx'];
for (const [path, file] of Object.entries(zipContent.files)) { for (const [path, file] of Object.entries(zipContent.files)) {
if (!file.dir && !path.startsWith('__MACOSX/') && !path.includes('/.')) { if (!file.dir && !path.startsWith('__MACOSX/') && !path.includes('/.')) {
const fileName = path.split('/').pop() || path;
const fileExtension = fileName.toLowerCase().substring(fileName.lastIndexOf('.'));
// Only process supported file formats
if (!supportedExtensions.includes(fileExtension)) {
rejectedFiles.push(fileName);
continue;
}
try { try {
const blob = await file.async('blob'); const blob = await file.async('blob');
const fileName = path.split('/').pop() || path;
const extractedFile = new File([blob], fileName); const extractedFile = new File([blob], fileName);
extractedFiles.push({ extractedFiles.push({
@ -454,7 +464,12 @@ export const AgentKnowledgeBaseManager = ({ agentId, agentName }: AgentKnowledge
...extractedFiles ...extractedFiles
]); ]);
toast.success(`Extracted ${extractedFiles.length} files from ${zipFile.name}`); let message = `Extracted ${extractedFiles.length} supported files from ${zipFile.name}`;
if (rejectedFiles.length > 0) {
message += `. Skipped ${rejectedFiles.length} unsupported files: ${rejectedFiles.slice(0, 5).join(', ')}${rejectedFiles.length > 5 ? '...' : ''}`;
}
toast.success(message);
} catch (error) { } catch (error) {
console.error('Error extracting ZIP:', error); console.error('Error extracting ZIP:', error);
setUploadedFiles(prev => prev.map(f => setUploadedFiles(prev => prev.map(f =>
@ -471,9 +486,19 @@ export const AgentKnowledgeBaseManager = ({ agentId, agentName }: AgentKnowledge
const handleFileUpload = async (files: FileList | null) => { const handleFileUpload = async (files: FileList | null) => {
if (!files || files.length === 0) return; if (!files || files.length === 0) return;
const supportedExtensions = ['.txt', '.pdf', '.docx'];
const newFiles: UploadedFile[] = []; const newFiles: UploadedFile[] = [];
const rejectedFiles: string[] = [];
for (const file of Array.from(files)) { for (const file of Array.from(files)) {
const fileExtension = file.name.toLowerCase().substring(file.name.lastIndexOf('.'));
// Allow ZIP files as they can contain supported formats
if (!supportedExtensions.includes(fileExtension) && fileExtension !== '.zip') {
rejectedFiles.push(file.name);
continue;
}
const fileId = Math.random().toString(36).substr(2, 9); const fileId = Math.random().toString(36).substr(2, 9);
const uploadedFile: UploadedFile = { const uploadedFile: UploadedFile = {
file, file,
@ -482,15 +507,23 @@ export const AgentKnowledgeBaseManager = ({ agentId, agentName }: AgentKnowledge
}; };
newFiles.push(uploadedFile); newFiles.push(uploadedFile);
// Extract ZIP files to get individual files
if (file.name.toLowerCase().endsWith('.zip')) { if (file.name.toLowerCase().endsWith('.zip')) {
setTimeout(() => extractZipFile(file, fileId), 100); setTimeout(() => extractZipFile(file, fileId), 100);
} }
} }
setUploadedFiles(prev => [...prev, ...newFiles]); if (rejectedFiles.length > 0) {
if (!addDialogOpen) { toast.error(`Unsupported file format(s): ${rejectedFiles.join(', ')}. Only .txt, .pdf, .docx, and .zip files are supported.`);
setAddDialogTab('files'); }
setAddDialogOpen(true);
if (newFiles.length > 0) {
setUploadedFiles(prev => [...prev, ...newFiles]);
if (!addDialogOpen) {
setAddDialogTab('files');
setAddDialogOpen(true);
}
} }
}; };
@ -802,7 +835,7 @@ export const AgentKnowledgeBaseManager = ({ agentId, agentName }: AgentKnowledge
multiple multiple
onChange={(e) => handleFileUpload(e.target.files)} onChange={(e) => handleFileUpload(e.target.files)}
className="hidden" className="hidden"
accept=".txt,.md,.py,.js,.ts,.html,.css,.json,.yaml,.yml,.xml,.csv,.pdf,.docx,.xlsx,.png,.jpg,.jpeg,.gif,.zip" accept=".txt,.pdf,.docx,.zip"
/> />
<Dialog open={addDialogOpen} onOpenChange={setAddDialogOpen}> <Dialog open={addDialogOpen} onOpenChange={setAddDialogOpen}>
<DialogContent className="max-w-4xl max-h-[90vh] overflow-hidden flex flex-col"> <DialogContent className="max-w-4xl max-h-[90vh] overflow-hidden flex flex-col">