simplify web search tool

This commit is contained in:
marko-kraemer 2025-04-12 16:13:38 +01:00
parent 364268eaa7
commit 708ee68414
1 changed files with 12 additions and 91 deletions

View File

@ -35,36 +35,6 @@ class WebSearchTool(Tool):
"description": "Whether to include a summary of each search result. Summaries provide key context about each page without requiring full content extraction. Set to true to get concise descriptions of each result.", "description": "Whether to include a summary of each search result. Summaries provide key context about each page without requiring full content extraction. Set to true to get concise descriptions of each result.",
"default": True "default": True
}, },
"start_published_date": {
"type": "string",
"description": "Optional start date to filter results by publication date (ISO format YYYY-MM-DDTHH:MM:SS.sssZ). Use this to find content published after a specific date, useful for recent news or updated information."
},
"end_published_date": {
"type": "string",
"description": "Optional end date to filter results by publication date (ISO format YYYY-MM-DDTHH:MM:SS.sssZ). Use this to limit results to content published before a specific date, helpful for historical information."
},
"start_crawl_date": {
"type": "string",
"description": "Optional start date to filter results by when they were crawled (ISO format YYYY-MM-DDTHH:MM:SS.sssZ). This can be useful for finding content that was recently indexed by search engines."
},
"end_crawl_date": {
"type": "string",
"description": "Optional end date to filter results by when they were crawled (ISO format YYYY-MM-DDTHH:MM:SS.sssZ). This can help filter out potentially outdated content."
},
"include_text": {
"type": "array",
"items": {
"type": "string"
},
"description": "A list of terms that must be included in the search results. Use this to ensure results contain specific keywords, making them more relevant to the query."
},
"exclude_text": {
"type": "array",
"items": {
"type": "string"
},
"description": "A list of terms that must be excluded from the search results. Use this to filter out irrelevant content or focus the search away from certain topics."
},
"num_results": { "num_results": {
"type": "integer", "type": "integer",
"description": "The number of search results to return. Increase for more comprehensive research or decrease for focused, high-relevance results.", "description": "The number of search results to return. Increase for more comprehensive research or decrease for focused, high-relevance results.",
@ -80,46 +50,32 @@ class WebSearchTool(Tool):
mappings=[ mappings=[
{"param_name": "query", "node_type": "attribute", "path": "."}, {"param_name": "query", "node_type": "attribute", "path": "."},
{"param_name": "summary", "node_type": "attribute", "path": "."}, {"param_name": "summary", "node_type": "attribute", "path": "."},
{"param_name": "start_published_date", "node_type": "attribute", "path": "."},
{"param_name": "end_published_date", "node_type": "attribute", "path": "."},
{"param_name": "start_crawl_date", "node_type": "attribute", "path": "."},
{"param_name": "end_crawl_date", "node_type": "attribute", "path": "."},
{"param_name": "include_text", "node_type": "attribute", "path": "."},
{"param_name": "exclude_text", "node_type": "attribute", "path": "."},
{"param_name": "num_results", "node_type": "attribute", "path": "."} {"param_name": "num_results", "node_type": "attribute", "path": "."}
], ],
example=''' example='''
<!-- <!--
The web-search tool allows you to search the internet for real-time information. The web-search tool allows you to search the internet for real-time information.
It's ideal for: Use this tool when you need to find current information, research topics, or verify facts.
- Finding current information not in your training data
- Researching specific topics, events, or entities
- Validating facts and claims
- Discovering recent developments and news
You can refine searches using: The tool returns information including:
- Date filters (start_published_date, end_published_date) for time-specific content - Titles of relevant web pages
- Text inclusion/exclusion to narrow results to relevant content - URLs for accessing the pages
- Number of results to control search breadth - Summaries of page content (if summary=true)
- Published dates (when available)
The tool returns organized information including titles, URLs, summaries, and publication dates.
--> -->
<!-- Basic search example --> <!-- Simple search example -->
<web-search <web-search
query="latest developments in artificial intelligence" query="current weather in New York City"
summary="true" summary="true"
num_results="10"> num_results="10">
</web-search> </web-search>
<!-- Advanced search with filters --> <!-- Another search example -->
<web-search <web-search
query="renewable energy technology" query="healthy breakfast recipes"
summary="true" summary="true"
start_published_date="2023-01-01T00:00:00.000Z" num_results="20">
include_text="solar,wind"
exclude_text="fossil"
num_results="15">
</web-search> </web-search>
''' '''
) )
@ -127,12 +83,6 @@ class WebSearchTool(Tool):
self, self,
query: str, query: str,
summary: bool = True, summary: bool = True,
start_published_date: Optional[str] = None,
end_published_date: Optional[str] = None,
start_crawl_date: Optional[str] = None,
end_crawl_date: Optional[str] = None,
include_text: Optional[List[str]] = None,
exclude_text: Optional[List[str]] = None,
num_results: int = 20 num_results: int = 20
) -> ToolResult: ) -> ToolResult:
""" """
@ -149,18 +99,9 @@ class WebSearchTool(Tool):
- Published Date: When the content was published (if available) - Published Date: When the content was published (if available)
- Score: The relevance score of the result - Score: The relevance score of the result
Use this function to discover relevant web pages before potentially
using crawl_webpage to extract their complete content.
Parameters: Parameters:
- query: The search query to find relevant web pages - query: The search query to find relevant web pages
- summary: Whether to include a summary of the results (default: True) - summary: Whether to include a summary of the results (default: True)
- start_published_date: Optional start date for published results (ISO format)
- end_published_date: Optional end date for published results (ISO format)
- start_crawl_date: Optional start date for crawled results (ISO format)
- end_crawl_date: Optional end date for crawled results (ISO format)
- include_text: List of terms that must be included in the results
- exclude_text: List of terms that must be excluded from the results
- num_results: The number of results to return (default: 20) - num_results: The number of results to return (default: 20)
""" """
try: try:
@ -255,27 +196,7 @@ class WebSearchTool(Tool):
example=''' example='''
<!-- <!--
The crawl-webpage tool extracts the complete text content from web pages. The crawl-webpage tool extracts the complete text content from web pages.
Use this tool when you need: Use this tool when you need detailed information from specific web pages.
- Detailed information from specific articles or pages
- The full text content rather than just summaries
- To analyze or process the contents of a webpage
- To extract information for further processing
This tool returns:
- The webpage title
- The original URL
- Publication date (when available)
- The complete text content of the page
Common use cases:
- Reading articles, blog posts, or news stories
- Extracting documentation or technical guides
- Gathering detailed product information
- Researching academic or scientific content
Note: Some content may be inaccessible due to paywalls, access restrictions,
or dynamic content loading mechanisms.
--> -->
<!-- Basic webpage crawl example --> <!-- Basic webpage crawl example -->