mirror of https://github.com/buster-so/buster.git
172 lines
4.5 KiB
TypeScript
172 lines
4.5 KiB
TypeScript
|
import FirecrawlApp from '@mendable/firecrawl-js';
|
||
|
import { CompanyResearchError } from '../deep-research/types';
|
||
|
|
||
|
export interface FirecrawlConfig {
|
||
|
apiKey?: string;
|
||
|
apiUrl?: string;
|
||
|
}
|
||
|
|
||
|
interface FirecrawlResponse {
|
||
|
id?: string;
|
||
|
data?: {
|
||
|
id?: string;
|
||
|
};
|
||
|
jobId?: string;
|
||
|
}
|
||
|
|
||
|
interface Source {
|
||
|
url: string;
|
||
|
title: string;
|
||
|
description: string;
|
||
|
}
|
||
|
|
||
|
interface JobStatusResponse {
|
||
|
status?: string;
|
||
|
data?: {
|
||
|
status?: string;
|
||
|
currentStep?: string;
|
||
|
totalSteps?: number;
|
||
|
progress?: number;
|
||
|
finalAnalysis?: string;
|
||
|
sources?: Source[];
|
||
|
error?: string;
|
||
|
};
|
||
|
error?: string;
|
||
|
}
|
||
|
|
||
|
interface ScrapeResponse {
|
||
|
success: boolean;
|
||
|
data?: {
|
||
|
markdown?: string;
|
||
|
html?: string;
|
||
|
rawHtml?: string;
|
||
|
links?: string[];
|
||
|
screenshot?: string;
|
||
|
};
|
||
|
error?: string;
|
||
|
}
|
||
|
|
||
|
export class FirecrawlService {
|
||
|
private app: FirecrawlApp;
|
||
|
|
||
|
constructor(config?: FirecrawlConfig) {
|
||
|
const apiKey = config?.apiKey || process.env.FIRECRAWL_API_KEY;
|
||
|
|
||
|
if (!apiKey) {
|
||
|
throw new CompanyResearchError(
|
||
|
'Firecrawl API key is required. Set FIRECRAWL_API_KEY environment variable or pass it in config.',
|
||
|
'API_ERROR'
|
||
|
);
|
||
|
}
|
||
|
|
||
|
this.app = new FirecrawlApp({
|
||
|
apiKey,
|
||
|
...(config?.apiUrl && { apiUrl: config.apiUrl }),
|
||
|
});
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Start a deep research job using Firecrawl's native method
|
||
|
*/
|
||
|
async startDeepResearch(
|
||
|
query: string,
|
||
|
options?: {
|
||
|
systemPrompt?: string;
|
||
|
analysisPrompt?: string;
|
||
|
}
|
||
|
): Promise<string | JobStatusResponse> {
|
||
|
try {
|
||
|
// Use the SDK's native deep research method
|
||
|
const response = await this.app.deepResearch(query, {
|
||
|
...(options?.systemPrompt && { systemPrompt: options.systemPrompt }),
|
||
|
...(options?.analysisPrompt && { analysisPrompt: options.analysisPrompt }),
|
||
|
});
|
||
|
|
||
|
const firecrawlResponse = response as FirecrawlResponse & JobStatusResponse;
|
||
|
|
||
|
// Check if this is an immediate result (has finalAnalysis)
|
||
|
if (firecrawlResponse.data?.finalAnalysis || firecrawlResponse.status === 'completed') {
|
||
|
// Return the complete response for immediate processing
|
||
|
return firecrawlResponse as JobStatusResponse;
|
||
|
}
|
||
|
|
||
|
// Extract job ID for polling-based responses
|
||
|
const jobId =
|
||
|
firecrawlResponse?.id || firecrawlResponse?.data?.id || firecrawlResponse?.jobId;
|
||
|
|
||
|
if (!jobId) {
|
||
|
throw new CompanyResearchError(
|
||
|
'Failed to start deep research job - no job ID returned and no immediate results',
|
||
|
'API_ERROR',
|
||
|
response
|
||
|
);
|
||
|
}
|
||
|
|
||
|
return jobId;
|
||
|
} catch (error) {
|
||
|
throw new CompanyResearchError(
|
||
|
`Failed to start deep research job: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
||
|
'API_ERROR',
|
||
|
error instanceof Error ? error : String(error)
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Check the status of a deep research job using Firecrawl's native method
|
||
|
*/
|
||
|
async getJobStatus(jobId: string): Promise<JobStatusResponse> {
|
||
|
try {
|
||
|
// Use the SDK's native status check method
|
||
|
const response = await (
|
||
|
this.app as unknown as { checkDeepResearchStatus: (id: string) => Promise<unknown> }
|
||
|
).checkDeepResearchStatus(jobId);
|
||
|
return response as JobStatusResponse;
|
||
|
} catch (error) {
|
||
|
throw new CompanyResearchError(
|
||
|
`Failed to get job status: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
||
|
'API_ERROR',
|
||
|
error instanceof Error ? error : String(error)
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Scrape a single URL using Firecrawl's native method
|
||
|
*/
|
||
|
async scrapeUrl(
|
||
|
url: string,
|
||
|
options?: {
|
||
|
formats?: ('markdown' | 'html' | 'rawHtml' | 'links' | 'screenshot')[];
|
||
|
onlyMainContent?: boolean;
|
||
|
}
|
||
|
): Promise<ScrapeResponse> {
|
||
|
try {
|
||
|
const response = await this.app.scrapeUrl(url, {
|
||
|
formats: options?.formats || ['markdown'],
|
||
|
onlyMainContent: options?.onlyMainContent ?? true,
|
||
|
});
|
||
|
|
||
|
return response as ScrapeResponse;
|
||
|
} catch (error) {
|
||
|
throw new CompanyResearchError(
|
||
|
`Failed to scrape URL: ${error instanceof Error ? error.message : 'Unknown error'}`,
|
||
|
'API_ERROR',
|
||
|
error instanceof Error ? error : String(error)
|
||
|
);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Validate that a URL is accessible and scrapeable
|
||
|
*/
|
||
|
async validateUrl(url: string): Promise<boolean> {
|
||
|
try {
|
||
|
await this.scrapeUrl(url, { formats: ['markdown'], onlyMainContent: true });
|
||
|
return true;
|
||
|
} catch {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
}
|