mirror of https://github.com/kortix-ai/suna.git
simplify web search tool
This commit is contained in:
parent
42e294b8ff
commit
364268eaa7
|
@ -334,8 +334,8 @@ Your approach is deliberately methodical and persistent:
|
||||||
- Use prose and paragraphs by default; only employ lists when explicitly requested by users
|
- Use prose and paragraphs by default; only employ lists when explicitly requested by users
|
||||||
- All writing must be highly detailed with a minimum length of several thousand words, unless user explicitly specifies length or format requirements
|
- All writing must be highly detailed with a minimum length of several thousand words, unless user explicitly specifies length or format requirements
|
||||||
- When writing based on references, actively cite original text with sources and provide a reference list with URLs at the end
|
- When writing based on references, actively cite original text with sources and provide a reference list with URLs at the end
|
||||||
- For lengthy documents, first save each section as separate draft files, then append them sequentially to create the final document
|
- Focus on creating high-quality, cohesive documents directly rather than producing multiple intermediate files
|
||||||
- During final compilation, no content should be reduced or summarized; the final length must exceed the sum of all individual draft files
|
- Prioritize efficiency and document quality over quantity of files created
|
||||||
- Use flowing paragraphs rather than lists; provide detailed content with proper citations
|
- Use flowing paragraphs rather than lists; provide detailed content with proper citations
|
||||||
- Strictly follow requirements in writing rules, and avoid using list formats in any files except todo.md
|
- Strictly follow requirements in writing rules, and avoid using list formats in any files except todo.md
|
||||||
|
|
||||||
|
|
|
@ -164,53 +164,71 @@ class WebSearchTool(Tool):
|
||||||
- num_results: The number of results to return (default: 20)
|
- num_results: The number of results to return (default: 20)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Handle string to boolean conversion for the summary parameter
|
# Ensure we have a valid query
|
||||||
if isinstance(summary, str):
|
if not query or not isinstance(query, str):
|
||||||
summary = summary.lower() == "true"
|
return self.fail_response("A valid search query is required.")
|
||||||
|
|
||||||
# Handle string to integer conversion for the num_results parameter
|
# Basic parameters - use only the minimum required to avoid API errors
|
||||||
if isinstance(num_results, str):
|
params = {
|
||||||
num_results = int(num_results)
|
"query": query,
|
||||||
|
"type": "auto",
|
||||||
|
"livecrawl": "auto"
|
||||||
|
}
|
||||||
|
|
||||||
# Prepare parameters, only including non-None values
|
# Handle summary parameter (boolean conversion)
|
||||||
params = {"query": query, "summary": summary, "num_results": num_results, "type": "auto"}
|
if summary is None:
|
||||||
|
params["summary"] = True
|
||||||
|
elif isinstance(summary, bool):
|
||||||
|
params["summary"] = summary
|
||||||
|
elif isinstance(summary, str):
|
||||||
|
params["summary"] = summary.lower() == "true"
|
||||||
|
else:
|
||||||
|
params["summary"] = True
|
||||||
|
|
||||||
if start_published_date:
|
# Handle num_results parameter (integer conversion)
|
||||||
params["start_published_date"] = start_published_date
|
if num_results is None:
|
||||||
if end_published_date:
|
params["num_results"] = 20
|
||||||
params["end_published_date"] = end_published_date
|
elif isinstance(num_results, int):
|
||||||
if start_crawl_date:
|
params["num_results"] = max(1, min(num_results, 50))
|
||||||
params["start_crawl_date"] = start_crawl_date
|
elif isinstance(num_results, str):
|
||||||
if end_crawl_date:
|
try:
|
||||||
params["end_crawl_date"] = end_crawl_date
|
params["num_results"] = max(1, min(int(num_results), 50))
|
||||||
if include_text:
|
except ValueError:
|
||||||
params["include_text"] = include_text
|
params["num_results"] = 20
|
||||||
if exclude_text:
|
else:
|
||||||
params["exclude_text"] = exclude_text
|
params["num_results"] = 20
|
||||||
|
|
||||||
# Execute the search
|
# Execute the search with minimal parameters
|
||||||
search_response = self.exa.search_and_contents(**params)
|
search_response = self.exa.search_and_contents(**params)
|
||||||
|
|
||||||
# print(search_response)
|
# Format the results
|
||||||
|
|
||||||
# Format the results to include only specified fields
|
|
||||||
formatted_results = []
|
formatted_results = []
|
||||||
for result in search_response.results:
|
for result in search_response.results:
|
||||||
formatted_result = {
|
formatted_result = {
|
||||||
"Title": result.title,
|
"Title": result.title,
|
||||||
"URL": result.url,
|
"URL": result.url
|
||||||
"Summary": result.summary if hasattr(result, 'summary') else None,
|
|
||||||
"Published Date": result.published_date,
|
|
||||||
"Score": result.score
|
|
||||||
}
|
}
|
||||||
formatted_results.append(formatted_result)
|
|
||||||
|
|
||||||
# print(formatted_results)
|
# Add optional fields if they exist
|
||||||
|
if hasattr(result, 'summary') and result.summary:
|
||||||
|
formatted_result["Summary"] = result.summary
|
||||||
|
|
||||||
|
if hasattr(result, 'published_date') and result.published_date:
|
||||||
|
formatted_result["Published Date"] = result.published_date
|
||||||
|
|
||||||
|
if hasattr(result, 'score'):
|
||||||
|
formatted_result["Score"] = result.score
|
||||||
|
|
||||||
|
formatted_results.append(formatted_result)
|
||||||
|
|
||||||
return self.success_response(formatted_results)
|
return self.success_response(formatted_results)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return self.fail_response(f"Error performing web search: {str(e)}")
|
error_message = str(e)
|
||||||
|
simplified_message = f"Error performing web search: {error_message[:200]}"
|
||||||
|
if len(error_message) > 200:
|
||||||
|
simplified_message += "..."
|
||||||
|
return self.fail_response(simplified_message)
|
||||||
|
|
||||||
@openapi_schema({
|
@openapi_schema({
|
||||||
"type": "function",
|
"type": "function",
|
||||||
|
@ -290,29 +308,49 @@ class WebSearchTool(Tool):
|
||||||
- url: The URL of the webpage to crawl
|
- url: The URL of the webpage to crawl
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Execute the content extraction
|
# Parse the URL parameter exactly as it would appear in XML
|
||||||
|
if not url:
|
||||||
|
return self.fail_response("A valid URL is required.")
|
||||||
|
|
||||||
|
# Handle url parameter (as it would appear in XML)
|
||||||
|
if isinstance(url, str):
|
||||||
|
# Add protocol if missing
|
||||||
|
if not (url.startswith('http://') or url.startswith('https://')):
|
||||||
|
url = 'https://' + url
|
||||||
|
else:
|
||||||
|
return self.fail_response("URL must be a string.")
|
||||||
|
|
||||||
|
# Execute the crawl with the parsed URL
|
||||||
result = self.exa.get_contents(
|
result = self.exa.get_contents(
|
||||||
[url],
|
[url],
|
||||||
text=True
|
text=True,
|
||||||
|
livecrawl="auto"
|
||||||
)
|
)
|
||||||
|
|
||||||
# print(result)
|
# Format the results to include all available fields
|
||||||
|
|
||||||
# Format the results to include only specified fields
|
|
||||||
formatted_results = []
|
formatted_results = []
|
||||||
for content in result.results:
|
for content in result.results:
|
||||||
formatted_result = {
|
formatted_result = {
|
||||||
"Title": content.title,
|
"Title": content.title,
|
||||||
"URL": content.url,
|
"URL": content.url,
|
||||||
"Published Date": content.published_date,
|
|
||||||
"Text": content.text
|
"Text": content.text
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Add optional fields if they exist
|
||||||
|
if hasattr(content, 'published_date') and content.published_date:
|
||||||
|
formatted_result["Published Date"] = content.published_date
|
||||||
|
|
||||||
formatted_results.append(formatted_result)
|
formatted_results.append(formatted_result)
|
||||||
|
|
||||||
return self.success_response(formatted_results)
|
return self.success_response(formatted_results)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return self.fail_response(f"Error crawling webpage: {str(e)}")
|
error_message = str(e)
|
||||||
|
# Truncate very long error messages
|
||||||
|
simplified_message = f"Error crawling webpage: {error_message[:200]}"
|
||||||
|
if len(error_message) > 200:
|
||||||
|
simplified_message += "..."
|
||||||
|
return self.fail_response(simplified_message)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in New Issue