diff --git a/packages/ai/package.json b/packages/ai/package.json index a18892b53..670a63da6 100644 --- a/packages/ai/package.json +++ b/packages/ai/package.json @@ -45,6 +45,7 @@ "@buster/test-utils": "workspace:*", "@buster/typescript-config": "workspace:*", "@buster/vitest-config": "workspace:*", + "@buster-tools/web-tools": "workspace:*", "@mastra/core": "catalog:", "@mastra/loggers": "^0.10.3", "ai": "catalog:", diff --git a/packages/ai/src/tools/index.ts b/packages/ai/src/tools/index.ts index 94b27312c..0ab353fd4 100644 --- a/packages/ai/src/tools/index.ts +++ b/packages/ai/src/tools/index.ts @@ -19,3 +19,4 @@ export { bashExecute } from './file-tools'; export { deleteFiles } from './file-tools/delete-files-tool/delete-files-tool'; export { checkOffTodoList } from './planning-thinking-tools/check-off-todo-list-tool/check-off-todo-list-tool'; export { updateClarificationsFile } from './planning-thinking-tools/update-clarifications-file-tool/update-clarifications-file-tool'; +export { webSearch } from './web-tools/web-search-tool'; diff --git a/packages/ai/src/tools/web-tools/web-search-tool.int.test.ts b/packages/ai/src/tools/web-tools/web-search-tool.int.test.ts new file mode 100644 index 000000000..1d3546516 --- /dev/null +++ b/packages/ai/src/tools/web-tools/web-search-tool.int.test.ts @@ -0,0 +1,57 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import { webSearch } from './web-search-tool'; + +describe('webSearch tool integration', () => { + beforeEach(() => { + if (!process.env.FIRECRAWL_API_KEY) { + console.warn('Skipping integration tests - FIRECRAWL_API_KEY not set'); + } + }); + + it.skipIf(!process.env.FIRECRAWL_API_KEY)( + 'should perform actual web search and return results', + async () => { + const result = await webSearch.execute({ + context: { + query: 'Buster Data', + limit: 10, + scrapeContent: true, + formats: ['markdown'], + }, + runtimeContext: {} as any, + }); + + expect(result.success).toBe(true); + expect(result.results).toBeDefined(); + expect(Array.isArray(result.results)).toBe(true); + + if (result.results.length > 0) { + const firstResult = result.results[0]!; + expect(firstResult).toHaveProperty('title'); + expect(firstResult).toHaveProperty('url'); + expect(firstResult).toHaveProperty('description'); + expect(typeof firstResult.title).toBe('string'); + expect(typeof firstResult.url).toBe('string'); + expect(typeof firstResult.description).toBe('string'); + } + }, + 30000 + ); + + it.skipIf(!process.env.FIRECRAWL_API_KEY)( + 'should handle search with minimal options', + async () => { + const result = await webSearch.execute({ + context: { + query: 'TypeScript', + }, + runtimeContext: {} as any, + }); + + expect(result.success).toBe(true); + expect(result.results).toBeDefined(); + expect(Array.isArray(result.results)).toBe(true); + }, + 30000 + ); +}); diff --git a/packages/ai/src/tools/web-tools/web-search-tool.test.ts b/packages/ai/src/tools/web-tools/web-search-tool.test.ts new file mode 100644 index 000000000..606dbfa01 --- /dev/null +++ b/packages/ai/src/tools/web-tools/web-search-tool.test.ts @@ -0,0 +1,124 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { webSearch } from './web-search-tool'; + +vi.mock('@buster-tools/web-tools', () => { + const mockFirecrawlService = { + webSearch: vi.fn(), + }; + + return { + FirecrawlService: vi.fn().mockImplementation(() => mockFirecrawlService), + mockFirecrawlService, + }; +}); + +describe('webSearch tool', () => { + let mockFirecrawlService: any; + + beforeEach(async () => { + vi.clearAllMocks(); + const { mockFirecrawlService: mock } = vi.mocked( + await import('@buster-tools/web-tools') + ) as any; + mockFirecrawlService = mock; + }); + + it('should transform search results correctly', async () => { + const mockResponse = { + success: true, + results: [ + { + title: 'Test Result', + url: 'https://example.com', + description: 'Test description', + content: 'Test content', + }, + ], + }; + + mockFirecrawlService.webSearch.mockResolvedValue(mockResponse); + + const result = await webSearch.execute({ + context: { + query: 'test query', + limit: 5, + scrapeContent: true, + formats: ['markdown'], + }, + runtimeContext: {} as any, + }); + + expect(result.success).toBe(true); + expect(result.results).toHaveLength(1); + expect(result.results[0]).toEqual({ + title: 'Test Result', + url: 'https://example.com', + description: 'Test description', + content: 'Test content', + }); + }); + + it('should pass correct options to webSearch method', async () => { + const mockResponse = { + success: true, + results: [], + }; + + mockFirecrawlService.webSearch.mockResolvedValue(mockResponse); + + await webSearch.execute({ + context: { + query: 'test query', + limit: 3, + scrapeContent: true, + formats: ['html', 'markdown'], + }, + runtimeContext: {} as any, + }); + + expect(mockFirecrawlService.webSearch).toHaveBeenCalledWith('test query', { + limit: 3, + scrapeOptions: { + formats: ['html', 'markdown'], + }, + }); + }); + + it('should handle errors gracefully', async () => { + mockFirecrawlService.webSearch.mockRejectedValue(new Error('Search failed')); + + const result = await webSearch.execute({ + context: { + query: 'test query', + }, + runtimeContext: {} as any, + }); + + expect(result.success).toBe(false); + expect(result.results).toEqual([]); + expect(result.error).toBe('Search failed'); + }); + + it('should use default values when options are not provided', async () => { + const mockResponse = { + success: true, + results: [], + }; + + mockFirecrawlService.webSearch.mockResolvedValue(mockResponse); + + await webSearch.execute({ + context: { + query: 'test query', + }, + runtimeContext: {} as any, + }); + + expect(mockFirecrawlService.webSearch).toHaveBeenCalledWith('test query', { + limit: 5, + scrapeOptions: { + formats: ['markdown'], + }, + }); + }); +}); diff --git a/packages/ai/src/tools/web-tools/web-search-tool.ts b/packages/ai/src/tools/web-tools/web-search-tool.ts new file mode 100644 index 000000000..ca8b29dfe --- /dev/null +++ b/packages/ai/src/tools/web-tools/web-search-tool.ts @@ -0,0 +1,105 @@ +import { + FirecrawlService, + type WebSearchOptions, + type WebSearchResult, +} from '@buster-tools/web-tools'; +import type { RuntimeContext } from '@mastra/core/runtime-context'; +import { createTool } from '@mastra/core/tools'; +import { wrapTraced } from 'braintrust'; +import { z } from 'zod'; + +const inputSchema = z.object({ + query: z.string().min(1, 'Search query is required').describe('The search query to execute'), + limit: z + .number() + .int() + .min(1) + .max(20) + .optional() + .describe('Maximum number of search results to return (default: 5)'), + scrapeContent: z + .boolean() + .optional() + .describe('Whether to scrape content from search results (default: true)'), + formats: z + .array(z.enum(['markdown', 'html', 'rawHtml', 'links', 'screenshot'])) + .optional() + .describe('Content formats to scrape (default: ["markdown"])'), +}); + +const outputSchema = z.object({ + success: z.boolean().describe('Whether the search was successful'), + results: z + .array( + z.object({ + title: z.string().describe('Title of the search result'), + url: z.string().describe('URL of the search result'), + description: z.string().describe('Description of the search result'), + content: z.string().optional().describe('Scraped content from the result (if available)'), + }) + ) + .describe('Array of search results'), + error: z.string().optional().describe('Error message if the search failed'), +}); + +type WebSearchInput = z.infer; +type WebSearchOutput = z.infer; + +async function executeWebSearch( + input: WebSearchInput, + _context: RuntimeContext +): Promise { + try { + const firecrawlService = new FirecrawlService(); + + const searchOptions: WebSearchOptions = { + limit: input.limit || 5, + ...(input.scrapeContent !== false && { + scrapeOptions: { + formats: input.formats || ['markdown'], + }, + }), + }; + + const response = await firecrawlService.webSearch(input.query, searchOptions); + + const transformedResults = response.results.map((result: WebSearchResult) => ({ + title: result.title, + url: result.url, + description: result.description, + ...(result.content && { content: result.content }), + })); + + return { + success: response.success, + results: transformedResults, + }; + } catch (error) { + return { + success: false, + results: [], + error: error instanceof Error ? error.message : 'Unknown error occurred', + }; + } +} + +const executeWebSearchTraced = wrapTraced(executeWebSearch, { name: 'web-search-tool' }); + +export const webSearch = createTool({ + id: 'web-search', + description: + 'Search the web for information using Firecrawl. Returns search results with titles, URLs, descriptions, and optionally scraped content. Useful for finding current information, research, and web content.', + inputSchema, + outputSchema, + execute: async ({ + context, + runtimeContext, + }: { + context: WebSearchInput; + runtimeContext: RuntimeContext; + }) => { + return await executeWebSearchTraced(context, runtimeContext); + }, +}); + +export default webSearch; diff --git a/packages/web-tools/package.json b/packages/web-tools/package.json index 5694aa18e..49d661eaa 100644 --- a/packages/web-tools/package.json +++ b/packages/web-tools/package.json @@ -3,7 +3,15 @@ "version": "0.1.0", "description": "Web scraping and research tools using Firecrawl and other services", "type": "module", - "module": "src/index.ts", + "main": "dist/index.js", + "module": "dist/index.js", + "types": "dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, "scripts": { "build": "tsc --build", "lint": "biome check --write", diff --git a/packages/web-tools/src/index.ts b/packages/web-tools/src/index.ts index efe39dfa4..e1f62e7da 100644 --- a/packages/web-tools/src/index.ts +++ b/packages/web-tools/src/index.ts @@ -10,6 +10,11 @@ export type { CompanyResearchError, } from './deep-research/types'; -export type { FirecrawlConfig } from './services/firecrawl'; +export type { + FirecrawlConfig, + WebSearchOptions, + WebSearchResult, + WebSearchResponse, +} from './services/firecrawl'; export type { PollingOptions } from './utils/polling'; diff --git a/packages/web-tools/src/services/firecrawl.test.ts b/packages/web-tools/src/services/firecrawl.test.ts index 65a08f30a..e4a498090 100644 --- a/packages/web-tools/src/services/firecrawl.test.ts +++ b/packages/web-tools/src/services/firecrawl.test.ts @@ -6,6 +6,7 @@ interface MockFirecrawlApp { deepResearch: ReturnType; checkDeepResearchStatus: ReturnType; scrapeUrl: ReturnType; + search: ReturnType; } // Mock the FirecrawlApp @@ -16,6 +17,7 @@ vi.mock('@mendable/firecrawl-js', () => { deepResearch: vi.fn(), checkDeepResearchStatus: vi.fn(), scrapeUrl: vi.fn(), + search: vi.fn(), }) ), }; @@ -180,4 +182,86 @@ describe('FirecrawlService', () => { expect(isValid).toBe(false); }); }); + + describe('webSearch', () => { + beforeEach(() => { + service = new FirecrawlService(); + }); + + it('should perform web search with default options', async () => { + const mockResponse = { + success: true, + data: [ + { + title: 'Test Result', + url: 'https://example.com', + description: 'Test description', + content: 'Test content', + }, + ], + }; + const mockApp = (service as unknown as { app: MockFirecrawlApp }).app; + mockApp.search.mockResolvedValue(mockResponse); + + const result = await service.webSearch('test query'); + + expect(result.success).toBe(true); + expect(result.results).toHaveLength(1); + expect(result.results[0]).toEqual({ + title: 'Test Result', + url: 'https://example.com', + description: 'Test description', + content: 'Test content', + }); + expect(mockApp.search).toHaveBeenCalledWith('test query', { + limit: 5, + scrapeOptions: { + formats: ['markdown'], + }, + }); + }); + + it('should perform web search with content scraping options', async () => { + const mockResponse = { + success: true, + data: [ + { + title: 'Test Result', + url: 'https://example.com', + description: 'Test description', + content: 'Test content', + }, + ], + }; + const mockApp = (service as unknown as { app: MockFirecrawlApp }).app; + mockApp.search.mockResolvedValue(mockResponse); + + const result = await service.webSearch('test query', { + limit: 3, + location: 'US', + scrapeOptions: { + formats: ['html', 'markdown'], + onlyMainContent: false, + }, + }); + + expect(result.success).toBe(true); + expect(result.results).toHaveLength(1); + expect(mockApp.search).toHaveBeenCalledWith('test query', { + limit: 3, + location: 'US', + scrapeOptions: { + formats: ['html', 'markdown'], + onlyMainContent: false, + }, + }); + }); + + it('should handle search errors', async () => { + const mockApp = (service as unknown as { app: MockFirecrawlApp }).app; + mockApp.search.mockRejectedValue(new Error('Search failed')); + + await expect(service.webSearch('test query')).rejects.toThrow(CompanyResearchError); + }); + }); }); diff --git a/packages/web-tools/src/services/firecrawl.ts b/packages/web-tools/src/services/firecrawl.ts index 44389aabe..afa9f1551 100644 --- a/packages/web-tools/src/services/firecrawl.ts +++ b/packages/web-tools/src/services/firecrawl.ts @@ -46,6 +46,30 @@ interface ScrapeResponse { error?: string; } +export interface WebSearchOptions { + limit?: number; + location?: string; + tbs?: string; + timeout?: number; + scrapeOptions?: { + formats?: ('markdown' | 'html' | 'rawHtml' | 'links' | 'screenshot')[]; + onlyMainContent?: boolean; + }; +} + +export interface WebSearchResult { + title: string; + url: string; + description: string; + content?: string; +} + +export interface WebSearchResponse { + success: boolean; + results: WebSearchResult[]; + error?: string; +} + export class FirecrawlService { private app: FirecrawlApp; @@ -168,4 +192,63 @@ export class FirecrawlService { return false; } } + + /** + * Search the web using Firecrawl's search endpoint + */ + async webSearch(query: string, options?: WebSearchOptions): Promise { + try { + const searchOptions = { + limit: options?.limit || 5, + ...(options?.location && { location: options.location }), + ...(options?.tbs && { tbs: options.tbs }), + ...(options?.timeout && { timeout: options.timeout }), + scrapeOptions: { + formats: options?.scrapeOptions?.formats || ['markdown'], + ...(options?.scrapeOptions?.onlyMainContent !== undefined && { + onlyMainContent: options.scrapeOptions.onlyMainContent, + }), + }, + }; + + const response = await this.app.search(query, searchOptions); + + const searchResponse = response as { + success?: boolean; + data?: Array<{ + title?: string; + url?: string; + description?: string; + content?: string; + }>; + error?: string; + }; + + if (!searchResponse.success && searchResponse.error) { + throw new CompanyResearchError( + `Search failed: ${searchResponse.error}`, + 'API_ERROR', + searchResponse.error + ); + } + + const results: WebSearchResult[] = (searchResponse.data || []).map((item) => ({ + title: item.title || '', + url: item.url || '', + description: item.description || '', + ...(item.content && { content: item.content }), + })); + + return { + success: true, + results, + }; + } catch (error) { + throw new CompanyResearchError( + `Failed to perform web search: ${error instanceof Error ? error.message : 'Unknown error'}`, + 'API_ERROR', + error instanceof Error ? error : String(error) + ); + } + } } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 93309bd80..8110ff617 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -681,6 +681,9 @@ importers: '@ai-sdk/provider': specifier: ^1.1.3 version: 1.1.3 + '@buster-tools/web-tools': + specifier: workspace:* + version: link:../web-tools '@buster/access-controls': specifier: workspace:* version: link:../access-controls @@ -6117,7 +6120,6 @@ packages: bun@1.2.18: resolution: {integrity: sha512-OR+EpNckoJN4tHMVZPaTPxDj2RgpJgJwLruTIFYbO3bQMguLd0YrmkWKYqsiihcLgm2ehIjF/H1RLfZiRa7+qQ==} - cpu: [arm64, x64, aarch64] os: [darwin, linux, win32] hasBin: true @@ -17421,14 +17423,14 @@ snapshots: msw: 2.10.4(@types/node@20.19.4)(typescript@5.8.3) vite: 6.3.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0) - '@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(vite@6.3.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0))': + '@vitest/mocker@3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(vite@6.3.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0))': dependencies: '@vitest/spy': 3.2.4 estree-walker: 3.0.3 magic-string: 0.30.17 optionalDependencies: msw: 2.10.4(@types/node@24.0.10)(typescript@5.8.3) - vite: 6.3.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0) + vite: 6.3.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0) '@vitest/pretty-format@2.0.5': dependencies: @@ -17471,7 +17473,7 @@ snapshots: sirv: 3.0.1 tinyglobby: 0.2.14 tinyrainbow: 2.0.0 - vitest: 3.2.4(@edge-runtime/vm@3.2.0)(@types/debug@4.1.12)(@types/node@24.0.10)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0) + vitest: 3.2.4(@edge-runtime/vm@3.2.0)(@types/debug@4.1.12)(@types/node@20.19.4)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(msw@2.10.4(@types/node@20.19.4)(typescript@5.8.3))(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0) '@vitest/utils@2.0.5': dependencies: @@ -24053,7 +24055,7 @@ snapshots: dependencies: '@types/chai': 5.2.2 '@vitest/expect': 3.2.4 - '@vitest/mocker': 3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(vite@6.3.5(@types/node@24.0.10)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0)) + '@vitest/mocker': 3.2.4(msw@2.10.4(@types/node@24.0.10)(typescript@5.8.3))(vite@6.3.5(@types/node@20.19.4)(jiti@2.4.2)(lightningcss@1.30.1)(sass@1.89.2)(terser@5.43.1)(tsx@4.20.3)(yaml@2.8.0)) '@vitest/pretty-format': 3.2.4 '@vitest/runner': 3.2.4 '@vitest/snapshot': 3.2.4