From d5c59b1acb93a79d704393cec71ae90fa987016e Mon Sep 17 00:00:00 2001 From: marko-kraemer Date: Mon, 21 Apr 2025 01:06:10 +0100 Subject: [PATCH] tool vie wip --- backend/agent/tools/web_search_tool.py | 64 +++++----- backend/sandbox/sandbox.py | 10 +- .../src/components/sidebar/nav-agents.tsx | 2 +- .../src/components/sidebar/sidebar-left.tsx | 4 +- .../thread/tool-call-side-panel.tsx | 2 +- .../tool-views/FileOperationToolView.tsx | 47 ++++++- .../thread/tool-views/WebSearchToolView.tsx | 60 +++++---- .../src/components/thread/tool-views/utils.ts | 117 +++++++++++++++--- 8 files changed, 214 insertions(+), 92 deletions(-) diff --git a/backend/agent/tools/web_search_tool.py b/backend/agent/tools/web_search_tool.py index aea336cf..fc5ff9f6 100644 --- a/backend/agent/tools/web_search_tool.py +++ b/backend/agent/tools/web_search_tool.py @@ -5,6 +5,7 @@ from datetime import datetime import os from dotenv import load_dotenv from agentpress.tool import Tool, ToolResult, openapi_schema, xml_schema +import json # TODO: add subpages, etc... in filters as sometimes its necessary @@ -92,30 +93,13 @@ class WebSearchTool(Tool): ) -> ToolResult: """ Search the web using the Exa API to find relevant and up-to-date information. - - This function performs a web search based on the provided query and returns a list - of relevant search results. Each result includes metadata about the webpage, such as - title, URL, summary (if requested), publication date, and relevance score. - - The returned data for each result includes: - - Title: The title of the webpage - - URL: The URL of the webpage - - Summary: A brief summary of the webpage content (if summary=True) - - Published Date: When the content was published (if available) - - Score: The relevance score of the result - - Parameters: - - query: The search query to find relevant web pages - - summary: Whether to include a summary of the results (default: True) - - num_results: The number of results to return (default: 20) """ try: # Ensure we have a valid query if not query or not isinstance(query, str): return self.fail_response("A valid search query is required.") - # ---------- Tavily search parameters ---------- - # num_results normalisation (1‑50) + # Normalize num_results if num_results is None: num_results = 20 elif isinstance(num_results, int): @@ -136,30 +120,36 @@ class WebSearchTool(Tool): include_images=False, ) - # `tavily` may return a dict with `results` or a bare list + # Normalize the response format raw_results = ( search_response.get("results") if isinstance(search_response, dict) else search_response ) + # Format results consistently formatted_results = [] for result in raw_results: formatted_result = { - "Title": result.get("title"), - "URL": result.get("url"), + "title": result.get("title", ""), + "url": result.get("url", ""), } if summary: # Prefer full content; fall back to description - if result.get("content"): - formatted_result["Summary"] = result["content"] - elif result.get("description"): - formatted_result["Summary"] = result["description"] + formatted_result["snippet"] = ( + result.get("content") or + result.get("description") or + "" + ) formatted_results.append(formatted_result) - return self.success_response(formatted_results) + # Return a properly formatted ToolResult + return ToolResult( + success=True, + output=json.dumps(formatted_results, ensure_ascii=False) + ) except Exception as e: error_message = str(e) @@ -257,11 +247,8 @@ class WebSearchTool(Tool): ) response.raise_for_status() data = response.json() - print(f"--- Raw Tavily Response ---") - print(data) - print(f"--------------------------") - # Normalise Tavily extract output to a list of dicts + # Normalize Tavily extract output to a list of dicts extracted = [] if isinstance(data, list): extracted = data @@ -273,18 +260,25 @@ class WebSearchTool(Tool): else: extracted = [data] + # Format results consistently formatted_results = [] for item in extracted: formatted_result = { - "Title": item.get("title"), - "URL": item.get("url") or url, - "Text":item.get("raw_content") or item.get("content") or item.get("text") + "title": item.get("title", ""), + "url": item.get("url", url), + "content": item.get("raw_content") or item.get("content") or item.get("text", "") } + if item.get("published_date"): - formatted_result["Published Date"] = item["published_date"] + formatted_result["published_date"] = item["published_date"] + formatted_results.append(formatted_result) - return self.success_response(formatted_results) + # Return a properly formatted ToolResult + return ToolResult( + success=True, + output=json.dumps(formatted_results, ensure_ascii=False) + ) except Exception as e: error_message = str(e) diff --git a/backend/sandbox/sandbox.py b/backend/sandbox/sandbox.py index eae680b6..3a62d424 100644 --- a/backend/sandbox/sandbox.py +++ b/backend/sandbox/sandbox.py @@ -111,14 +111,14 @@ def create_sandbox(password: str): "CHROME_DEBUGGING_HOST": "localhost", "CHROME_CDP": "" }, - # ports=[ - # 7788, # Gradio default port - # 6080, # noVNC web interface + ports=[ + # 7788, # Gradio default port + 6080, # noVNC web interface # 5900, # VNC port # 5901, # VNC port # 9222, # Chrome remote debugging port - # 8080 # HTTP website port - # ] + 8080 # HTTP website port + ] )) logger.info(f"Sandbox created with ID: {sandbox.id}") diff --git a/frontend/src/components/sidebar/nav-agents.tsx b/frontend/src/components/sidebar/nav-agents.tsx index b4eb6d19..c661eedd 100644 --- a/frontend/src/components/sidebar/nav-agents.tsx +++ b/frontend/src/components/sidebar/nav-agents.tsx @@ -193,7 +193,7 @@ export function NavAgents() { ) : null} - + {state === "collapsed" && ( diff --git a/frontend/src/components/sidebar/sidebar-left.tsx b/frontend/src/components/sidebar/sidebar-left.tsx index 9944c963..888e9e63 100644 --- a/frontend/src/components/sidebar/sidebar-left.tsx +++ b/frontend/src/components/sidebar/sidebar-left.tsx @@ -77,7 +77,7 @@ export function SidebarLeft({ }, [state, setOpen]); return ( - +
@@ -100,7 +100,7 @@ export function SidebarLeft({ )}
- + {state !== "collapsed" && ( diff --git a/frontend/src/components/thread/tool-call-side-panel.tsx b/frontend/src/components/thread/tool-call-side-panel.tsx index d1723224..9496ede6 100644 --- a/frontend/src/components/thread/tool-call-side-panel.tsx +++ b/frontend/src/components/thread/tool-call-side-panel.tsx @@ -312,7 +312,7 @@ export function ToolCallSidePanel({ }; return ( -
+
{renderContent()}
diff --git a/frontend/src/components/thread/tool-views/FileOperationToolView.tsx b/frontend/src/components/thread/tool-views/FileOperationToolView.tsx index 8cd38f53..731a7db4 100644 --- a/frontend/src/components/thread/tool-views/FileOperationToolView.tsx +++ b/frontend/src/components/thread/tool-views/FileOperationToolView.tsx @@ -3,6 +3,7 @@ import { FileCode, FileSymlink, FolderPlus, FileX, Replace, CheckCircle, AlertTr import { ToolViewProps } from "./types"; import { extractFilePath, extractFileContent, getFileType, formatTimestamp, getToolTitle } from "./utils"; import { GenericToolView } from "./GenericToolView"; +import { Markdown } from "@/components/ui/markdown"; import { cn } from "@/lib/utils"; // Type for operation type @@ -70,7 +71,7 @@ export function FileOperationToolView({ : undefined; // Add state for view mode toggle (code or preview) - moved before any conditional returns - const [viewMode, setViewMode] = useState<'code' | 'preview'>(isHtml ? 'preview' : 'code'); + const [viewMode, setViewMode] = useState<'code' | 'preview'>(isHtml || isMarkdown ? 'preview' : 'code'); // Fall back to generic view if file path is missing or if content is missing for non-delete operations if ((!filePath && !showDebugInfo) || (operation !== "delete" && !fileContent)) { @@ -152,14 +153,43 @@ export function FileOperationToolView({
)} + {/* View switcher for Markdown files */} + {isMarkdown && isSuccess && ( +
+ + +
+ )} {fileType}
- {/* File Content */} - {(!isHtml || viewMode === 'code' || !htmlPreviewUrl || !isSuccess) && ( + {/* File Content (Code View) */} + {viewMode === 'code' || (!isHtml && !isMarkdown) || !isSuccess ? (
{contentLines.map((line, idx) => ( @@ -175,7 +205,7 @@ export function FileOperationToolView({
- )} + ) : null} {/* HTML Preview with iframe */} {isHtml && viewMode === 'preview' && htmlPreviewUrl && isSuccess && ( @@ -190,6 +220,15 @@ export function FileOperationToolView({ )} + {/* Markdown Preview */} + {isMarkdown && viewMode === 'preview' && isSuccess && ( +
+ + {fileContent} + +
+ )} + {/* External link button for HTML files */} {isHtml && viewMode === 'preview' && htmlPreviewUrl && isSuccess && (
diff --git a/frontend/src/components/thread/tool-views/WebSearchToolView.tsx b/frontend/src/components/thread/tool-views/WebSearchToolView.tsx index e8251e8c..3081b819 100644 --- a/frontend/src/components/thread/tool-views/WebSearchToolView.tsx +++ b/frontend/src/components/thread/tool-views/WebSearchToolView.tsx @@ -13,6 +13,15 @@ export function WebSearchToolView({ isSuccess = true, isStreaming = false }: ToolViewProps) { + console.log({ + name, + assistantContent, + toolContent, + assistantTimestamp, + toolTimestamp, + isSuccess, + isStreaming + }); const query = extractSearchQuery(assistantContent); const searchResults = extractSearchResults(toolContent); const toolTitle = getToolTitle(name); @@ -20,47 +29,47 @@ export function WebSearchToolView({ return (
-
-
+
+
- - Search Results + + Search Results
-
-
-
Query:
-
{query || 'Unknown query'}
+
+
+
Query:
+
{query || 'Unknown query'}
-
+
{isStreaming ? 'Searching...' : searchResults.length > 0 ? `Found ${searchResults.length} results` : 'No results found'}
-
+
{isStreaming ? ( -
- +
+

Searching the web...

This might take a moment

) : searchResults.length > 0 ? (
{searchResults.map((result, idx) => ( -
+
-
+
{cleanUrl(result.url)}
{result.title} - +
{result.snippet && ( @@ -72,36 +81,35 @@ export function WebSearchToolView({ ))}
) : ( -
- -

No results found

-

Try a different search query

+
+ +

No results found

+

Try refining your search query

)}
- {/* Footer */} -
+
{!isStreaming && ( -
+
{isSuccess ? ( ) : ( )} - {isSuccess ? 'Search completed successfully' : 'Search failed'} + {isSuccess ? 'Search completed' : 'Search failed'}
)} {isStreaming && ( -
+
- Searching the web... + Searching...
)} diff --git a/frontend/src/components/thread/tool-views/utils.ts b/frontend/src/components/thread/tool-views/utils.ts index bcb8770a..f5c5131a 100644 --- a/frontend/src/components/thread/tool-views/utils.ts +++ b/frontend/src/components/thread/tool-views/utils.ts @@ -246,8 +246,59 @@ export function extractBrowserOperation(toolName: string | undefined): string { // Helper to extract search query export function extractSearchQuery(content: string | undefined): string | null { if (!content) return null; - const queryMatch = content.match(/query=["']([\s\S]*?)["']/); - return queryMatch ? queryMatch[1] : null; + + let contentToSearch = content; // Start with the original content + + // 3. Try parsing as JSON first, as the relevant content might be nested + try { + const parsedOuter = JSON.parse(content); + if (typeof parsedOuter.content === 'string') { + // If the outer content is JSON and has a 'content' string field, + // use that inner content for searching the query. + contentToSearch = parsedOuter.content; + + // Also check common JSON structures within the outer parsed object itself + if (typeof parsedOuter.query === 'string') { + return parsedOuter.query; + } + if (typeof parsedOuter.arguments === 'object' && parsedOuter.arguments !== null && typeof parsedOuter.arguments.query === 'string') { + return parsedOuter.arguments.query; + } + if (Array.isArray(parsedOuter.tool_calls) && parsedOuter.tool_calls.length > 0) { + const toolCall = parsedOuter.tool_calls[0]; + if (typeof toolCall.arguments === 'object' && toolCall.arguments !== null && typeof toolCall.arguments.query === 'string') { + return toolCall.arguments.query; + } + if (typeof toolCall.arguments === 'string') { + try { + const argsParsed = JSON.parse(toolCall.arguments); + if (typeof argsParsed.query === 'string') { + return argsParsed.query; + } + } catch {} + } + } + } + } catch (e) { + // If parsing fails, continue with the original content string + } + + // Now search within contentToSearch (either original or nested content) + + // 1. Try regex for attribute within tag + const xmlQueryMatch = contentToSearch.match(/]*query=[\"']([^\"']*)["'][^>]*>/i); + if (xmlQueryMatch && xmlQueryMatch[1]) { + return xmlQueryMatch[1].trim(); + } + + // 2. Try simple attribute regex (fallback, less specific) + const simpleAttrMatch = contentToSearch.match(/query=[\"']([\s\S]*?)["']/i); + if (simpleAttrMatch && simpleAttrMatch[1]) { + return simpleAttrMatch[1].split(/[\"']/)[0].trim(); + } + + // 4. If nothing found after checking original/nested content and JSON structure, return null + return null; } // Helper to extract search results from tool response @@ -308,24 +359,54 @@ export function extractSearchResults(content: string | undefined): Array<{ title export function extractUrlsAndTitles(content: string): Array<{ title: string, url: string, snippet?: string }> { const results: Array<{ title: string, url: string, snippet?: string }> = []; - // Match URL and title pairs - const urlMatches = content.match(/https?:\/\/[^\s"]+/g) || []; - urlMatches.forEach(url => { - // Try to find a title near this URL - const urlIndex = content.indexOf(url); - const surroundingText = content.substring(Math.max(0, urlIndex - 100), urlIndex + url.length + 100); + // Regex to find URLs, attempting to exclude common trailing unwanted characters/tags + const urlRegex = /https?:\/\/[^\s"<]+/g; + let match; + + while ((match = urlRegex.exec(content)) !== null) { + let url = match[0]; - // Look for "Title:" or similar patterns - const titleMatch = surroundingText.match(/Title[:\s]+([^\n]+)/i) || + // Basic cleaning: remove common tags or artifacts if they are directly appended + url = url.replace(/<\/?url>$/, '') + .replace(/<\/?content>$/, '') + .replace(/%3C$/, ''); // Remove trailing %3C (less than sign) + + // Decode URI components to handle % sequences, but catch errors + try { + url = decodeURIComponent(url); + } catch (e) { + // If decoding fails, use the URL as is, potentially still needs cleaning + console.warn("Failed to decode URL component:", url, e); + } + + // Final cleaning for specific problematic sequences like ellipsis + url = url.replace(/\u2026$/, ''); // Remove trailing ellipsis (…) + + // Try to find a title near this URL - simplified logic + const urlIndex = match.index; + const surroundingText = content.substring(Math.max(0, urlIndex - 100), urlIndex + url.length + 150); // Increased lookahead for content + + // Look for title patterns more robustly + const contentMatch = surroundingText.match(/([^<]+)<\/content>/i); + const titleMatch = surroundingText.match(/Title[:\s]+([^\n<]+)/i) || surroundingText.match(/\"(.*?)\"[\s\n]*?https?:\/\//); - - const title = titleMatch ? titleMatch[1] : cleanUrl(url); - - results.push({ - title: title, - url: url - }); - }); + + let title = cleanUrl(url); // Default to cleaned URL hostname/path + if (contentMatch && contentMatch[1].trim()) { + title = contentMatch[1].trim(); + } else if (titleMatch && titleMatch[1].trim()) { + title = titleMatch[1].trim(); + } + + // Avoid adding duplicates if the cleaning resulted in the same URL + if (!results.some(r => r.url === url)) { + results.push({ + title: title, + url: url + // Snippet extraction could be added here if needed + }); + } + } return results; }