simplify web search tool

2025-04-12 16:10:29 +01:00 · 2025-04-12 16:10:29 +01:00 · 364268eaa7
parent 42e294b8ff
commit 364268eaa7
2 changed files with 80 additions and 42 deletions
--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
@ -334,8 +334,8 @@ Your approach is deliberately methodical and persistent:
 - Use prose and paragraphs by default; only employ lists when explicitly requested by users
 - All writing must be highly detailed with a minimum length of several thousand words, unless user explicitly specifies length or format requirements
 - When writing based on references, actively cite original text with sources and provide a reference list with URLs at the end
- For lengthy documents, first save each section as separate draft files, then append them sequentially to create the final document
+- Focus on creating high-quality, cohesive documents directly rather than producing multiple intermediate files
- During final compilation, no content should be reduced or summarized; the final length must exceed the sum of all individual draft files
+- Prioritize efficiency and document quality over quantity of files created
 - Use flowing paragraphs rather than lists; provide detailed content with proper citations
 - Strictly follow requirements in writing rules, and avoid using list formats in any files except todo.md
--- a/backend/agent/tools/web_search_tool.py
+++ b/backend/agent/tools/web_search_tool.py
@ -164,53 +164,71 @@ class WebSearchTool(Tool):
        - num_results: The number of results to return (default: 20)
        """
        try:
-            # Handle string to boolean conversion for the summary parameter
+            # Ensure we have a valid query
-            if isinstance(summary, str):
+            if not query or not isinstance(query, str):
-                summary = summary.lower() == "true"
+                return self.fail_response("A valid search query is required.")
            # Handle string to integer conversion for the num_results parameter
            if isinstance(num_results, str):
                num_results = int(num_results)
            # Prepare parameters, only including non-None values
            params = {"query": query, "summary": summary, "num_results": num_results, "type": "auto"}
-            if start_published_date:
+            # Basic parameters - use only the minimum required to avoid API errors
-                params["start_published_date"] = start_published_date
+            params = {
-            if end_published_date:
+                "query": query,
-                params["end_published_date"] = end_published_date
+                "type": "auto",
-            if start_crawl_date:
+                "livecrawl": "auto"
-                params["start_crawl_date"] = start_crawl_date
+            }
            if end_crawl_date:
                params["end_crawl_date"] = end_crawl_date
            if include_text:
                params["include_text"] = include_text
            if exclude_text:
                params["exclude_text"] = exclude_text
-            # Execute the search
+            # Handle summary parameter (boolean conversion)
            if summary is None:
                params["summary"] = True
            elif isinstance(summary, bool):
                params["summary"] = summary
            elif isinstance(summary, str):
                params["summary"] = summary.lower() == "true"
            else:
                params["summary"] = True
            # Handle num_results parameter (integer conversion)
            if num_results is None:
                params["num_results"] = 20
            elif isinstance(num_results, int):
                params["num_results"] = max(1, min(num_results, 50))
            elif isinstance(num_results, str):
                try:
                    params["num_results"] = max(1, min(int(num_results), 50))
                except ValueError:
                    params["num_results"] = 20
            else:
                params["num_results"] = 20
            # Execute the search with minimal parameters
            search_response = self.exa.search_and_contents(**params)
            # print(search_response)
-            # Format the results to include only specified fields
+            # Format the results
            formatted_results = []
            for result in search_response.results:
                formatted_result = {
                    "Title": result.title,
-                    "URL": result.url,
+                    "URL": result.url
                    "Summary": result.summary if hasattr(result, 'summary') else None,
                    "Published Date": result.published_date,
                    "Score": result.score
                }
                # Add optional fields if they exist
                if hasattr(result, 'summary') and result.summary:
                    formatted_result["Summary"] = result.summary
                if hasattr(result, 'published_date') and result.published_date:
                    formatted_result["Published Date"] = result.published_date
                if hasattr(result, 'score'):
                    formatted_result["Score"] = result.score
                formatted_results.append(formatted_result)
            # print(formatted_results)
            return self.success_response(formatted_results)
        except Exception as e:
-            return self.fail_response(f"Error performing web search: {str(e)}")
+            error_message = str(e)
            simplified_message = f"Error performing web search: {error_message[:200]}"
            if len(error_message) > 200:
                simplified_message += "..."
            return self.fail_response(simplified_message)
    @openapi_schema({
        "type": "function",
@ -290,29 +308,49 @@ class WebSearchTool(Tool):
        - url: The URL of the webpage to crawl
        """
        try:
-            # Execute the content extraction
+            # Parse the URL parameter exactly as it would appear in XML
            if not url:
                return self.fail_response("A valid URL is required.")
            # Handle url parameter (as it would appear in XML)
            if isinstance(url, str):
                # Add protocol if missing
                if not (url.startswith('http://') or url.startswith('https://')):
                    url = 'https://' + url
            else:
                return self.fail_response("URL must be a string.")
            # Execute the crawl with the parsed URL
            result = self.exa.get_contents(
                [url],
-                text=True
+                text=True,
                livecrawl="auto"
            )
-            # print(result)
+            # Format the results to include all available fields
            # Format the results to include only specified fields
            formatted_results = []
            for content in result.results:
                formatted_result = {
                    "Title": content.title,
                    "URL": content.url,
                    "Published Date": content.published_date,
                    "Text": content.text
                }
                # Add optional fields if they exist
                if hasattr(content, 'published_date') and content.published_date:
                    formatted_result["Published Date"] = content.published_date
                formatted_results.append(formatted_result)
            return self.success_response(formatted_results)
        except Exception as e:
-            return self.fail_response(f"Error crawling webpage: {str(e)}")
+            error_message = str(e)
            # Truncate very long error messages
            simplified_message = f"Error crawling webpage: {error_message[:200]}"
            if len(error_message) > 200:
                simplified_message += "..."
            return self.fail_response(simplified_message)
 if __name__ == "__main__":