enhance ui

This commit is contained in:
Krishav Raj Singh 2025-08-06 23:47:48 +05:30
parent 5e79d47875
commit aea04d0cd5
4 changed files with 215 additions and 225 deletions

View File

@ -8,7 +8,7 @@ import base64
import os
import traceback
from datetime import datetime
import random
# Import Stagehand
from stagehand import Stagehand, StagehandConfig
@ -73,13 +73,10 @@ class StagehandBrowserAutomation:
def __init__(self):
self.router = APIRouter()
self.logger = logging.getLogger("stagehand_browser_automation")
self.screenshot_dir = os.path.join(os.getcwd(), "screenshots")
os.makedirs(self.screenshot_dir, exist_ok=True)
# Stagehand session management
self.stagehand: Optional[Stagehand] = None
self.browser_initialized = False
self._startup_called = False # Prevent double initialization
self.current_api_key: Optional[str] = None # Store current API key
# Core Stagehand endpoints
@ -88,7 +85,7 @@ class StagehandBrowserAutomation:
self.router.post("/stagehand/extract")(self.extract)
self.router.post("/stagehand/screenshot")(self.screenshot)
self.router.post("/stagehand/observe")(self.observe)
async def ensure_initialized(self, model_api_key: str):
"""Ensure Stagehand is initialized (simple browser_api pattern)"""
if self.browser_initialized and self.current_api_key == model_api_key:
@ -106,7 +103,6 @@ class StagehandBrowserAutomation:
try:
print("Starting Stagehand browser initialization...")
self._startup_called = True
# API key must be provided via request parameter
if not model_api_key:
@ -119,57 +115,33 @@ class StagehandBrowserAutomation:
# Use same pattern as working browser_api (no X server dependencies)
print("Creating StagehandConfig using browser_api pattern...")
# Try browser launch strategies like browser_api does
browser_configs = [
# First attempt: non-headless (like browser_api)
{
"headless": False,
"timeout": 60000,
"accept_downloads": True,
"downloads_path": "/workspace"
},
# Fallback: minimal options (like browser_api)
{
"timeout": 90000,
"accept_downloads": True,
"downloads_path": "/workspace"
}
]
# Define single browser launch configuration
# Single browser configuration (no fallback)
browser_options = {
"headless": False,
"timeout": 60000,
"accept_downloads": True,
"downloads_path": "/workspace",
}
try:
print("Initializing browser (headless=False)...")
config = StagehandConfig(
env="LOCAL",
model_name="anthropic/claude-3-5-sonnet-20241022",
model_api_key=self.current_api_key,
enable_caching=True,
dom_settle_timeout_ms=30000,
verbose=2,
local_browser_launch_options=browser_options,
)
config = None
last_error = None
for i, browser_options in enumerate(browser_configs):
try:
print(f"Trying browser config {i+1}/{len(browser_configs)}... ({'headless' if browser_options.get('headless') else 'non-headless'})")
config = StagehandConfig(
env="LOCAL",
model_name="anthropic/claude-3-5-sonnet-20241022",
model_api_key=self.current_api_key,
enable_caching=True,
dom_settle_timeout_ms=30000,
verbose=2,
local_browser_launch_options=browser_options,
)
# Test the config by trying to create Stagehand instance
test_stagehand = Stagehand(config)
await test_stagehand.init()
# If we get here, the config works
print(f"✅ Browser config {i+1} succeeded!")
await test_stagehand.close() # Clean up test instance
break
except Exception as e:
last_error = e
error_str = str(e)
print(f"❌ Browser config {i+1} failed: {error_str}")
config = None # Reset config for next iteration
continue
if not config:
raise RuntimeError(f"All browser configurations failed. Last error: {last_error}")
# Validate configuration by launching once
test_stagehand = Stagehand(config)
await test_stagehand.init()
await test_stagehand.close()
except Exception as e:
raise RuntimeError(f"Browser configuration failed: {e}")
# Initialize the actual Stagehand instance (config was already tested)
print("Creating final Stagehand instance...")
@ -209,7 +181,6 @@ class StagehandBrowserAutomation:
except Exception as e:
print(f"❌ Stagehand startup error: {str(e)}")
traceback.print_exc()
self._startup_called = False # Reset flag on error
self.browser_initialized = False
raise RuntimeError(f"Stagehand initialization failed: {str(e)}")
@ -233,7 +204,6 @@ class StagehandBrowserAutomation:
# Reset state
self.stagehand = None
self.browser_initialized = False
self._startup_called = False
self.current_api_key = None
print("Stagehand session state reset")
@ -300,8 +270,30 @@ class StagehandBrowserAutomation:
if "Page crashed" in str(action_error) or "Target closed" in str(action_error):
print(f"Detected browser crash during {action_type}, attempting recovery...")
try:
# Try to get current URL before shutdown for recovery
current_url = None
try:
if self.stagehand and hasattr(self.stagehand, 'page'):
current_url = self.stagehand.page.url
print(f"Preserving URL for recovery: {current_url}")
except:
print("Could not get current URL before shutdown")
# Reinitialize the browser
# Close existing (crashed) browser and start fresh
await self.shutdown()
await self.ensure_initialized(model_api_key)
# Navigate back to the original page if we have it
if current_url and current_url != "https://www.google.com" and action_type != "navigate":
try:
print(f"Recovering original page: {current_url}")
await self.stagehand.page.goto(current_url, wait_until="domcontentloaded", timeout=30000)
await asyncio.sleep(1) # Brief pause to let page settle
except Exception as nav_error:
print(f"Could not recover original URL {current_url}: {nav_error}")
# Continue anyway, will use Google.com
# Retry the action once
if action_type == "navigate":
return await self._navigate_stagehand(params['url'])
@ -397,23 +389,8 @@ class StagehandBrowserAutomation:
if "Page crashed" in error_msg:
error_msg += ". Browser page crashed - this may be due to browser configuration issues in the Docker environment. Consider using the regular browser_api on port 8003 instead."
# Try to get some state info even after error (like browser_api)
try:
screenshot, page_info = await self._get_stagehand_state("navigate_error_recovery")
return BrowserActionResult(
success=False,
message=error_msg,
url=page_info['url'],
title=page_info['title'],
screenshot_base64=screenshot,
error=f"Navigation failed: {error_msg}"
)
except:
# Complete fallback
return BrowserActionResult(
success=False,
error=f"Navigation failed: {error_msg}"
)
# Propagate exception so outer logic can restart the browser and retry
raise RuntimeError(f"Navigation failed due to page crash: {error_msg}")
async def _act_stagehand(self, action: str) -> BrowserActionResult:
"""Execute an action using Stagehand"""
@ -448,9 +425,9 @@ class StagehandBrowserAutomation:
error_msg = str(e)
print(f"Stagehand action error: {error_msg}")
# Check if it's a page crash
if "Page crashed" in error_msg:
error_msg += ". Browser page crashed during action execution."
# Check if it's a page crash - re-raise so recovery logic can handle it
if "Page crashed" in error_msg or "Target closed" in error_msg:
raise RuntimeError(f"Action failed due to page crash: {error_msg}")
return BrowserActionResult(
success=False,
@ -499,9 +476,9 @@ class StagehandBrowserAutomation:
error_msg = str(e)
print(f"Stagehand extraction error: {error_msg}")
# Check if it's a page crash
if "Page crashed" in error_msg:
error_msg += ". Browser page crashed during content extraction."
# Check if it's a page crash - re-raise so recovery logic can handle it
if "Page crashed" in error_msg or "Target closed" in error_msg:
raise RuntimeError(f"Extraction failed due to page crash: {error_msg}")
return BrowserActionResult(
success=False,
@ -544,9 +521,9 @@ class StagehandBrowserAutomation:
error_msg = str(e)
print(f"Stagehand screenshot error: {error_msg}")
# Check if it's a page crash
if "Page crashed" in error_msg:
error_msg += ". Browser page crashed during screenshot."
# Check if it's a page crash - re-raise so recovery logic can handle it
if "Page crashed" in error_msg or "Target closed" in error_msg:
raise RuntimeError(f"Screenshot failed due to page crash: {error_msg}")
return BrowserActionResult(
success=False,
@ -572,6 +549,7 @@ class StagehandBrowserAutomation:
)
# Format observations for response
observation_content = []
if observations:
for i, obs in enumerate(observations, 1):
@ -583,7 +561,7 @@ class StagehandBrowserAutomation:
else:
# Convert to dict if possible
obs_dict = obs.__dict__ if hasattr(obs, '__dict__') else {"observation": str(obs)}
observation_content.append(f"Observation {i}: {json.dumps(obs_dict, indent=2)}")
formatted_content = "\n\n".join(observation_content) if observation_content else "No observations found"
@ -597,8 +575,7 @@ class StagehandBrowserAutomation:
url=page_info['url'],
title=page_info['title'],
screenshot_base64=screenshot,
content=formatted_content,
element_count=len(observations) if observations else 0
content=formatted_content
)
except asyncio.TimeoutError:
@ -612,9 +589,9 @@ class StagehandBrowserAutomation:
error_msg = str(e)
print(f"Stagehand observation error: {error_msg}")
# Check if it's a page crash
if "Page crashed" in error_msg:
error_msg += ". Browser page crashed during observation."
# Check if it's a page crash - re-raise so recovery logic can handle it
if "Page crashed" in error_msg or "Target closed" in error_msg:
raise RuntimeError(f"Observation failed due to page crash: {error_msg}")
return BrowserActionResult(
success=False,

View File

@ -6,13 +6,14 @@ import React from 'react';
import { Slider } from '@/components/ui/slider';
import { Skeleton } from '@/components/ui/skeleton';
import { ApiMessageType } from '@/components/thread/types';
import { CircleDashed, X, ChevronLeft, ChevronRight, Computer, Radio, Maximize2, Minimize2, Copy, Check } from 'lucide-react';
import { CircleDashed, X, ChevronLeft, ChevronRight, Computer, Minimize2 } from 'lucide-react';
import { cn } from '@/lib/utils';
import { useIsMobile } from '@/hooks/use-mobile';
import { Button } from '@/components/ui/button';
import { ToolView } from './tool-views/wrapper';
import { motion, AnimatePresence } from 'framer-motion';
import { toast } from 'sonner';
import { useVncPreloader } from '@/hooks/useVncPreloader';
export interface ToolCallInput {
assistantCall: {
@ -88,6 +89,78 @@ export function ToolCallSidePanel({
const isMobile = useIsMobile();
// Use VNC preloader hook for connection management
const { isPreloaded: isVncReady, preloadedIframe } = useVncPreloader(project);
const [lastProjectSandboxId, setLastProjectSandboxId] = React.useState<string | null>(null);
// Reset state when project/sandbox changes
React.useEffect(() => {
const currentSandboxId = project?.sandbox?.id;
if (currentSandboxId && currentSandboxId !== lastProjectSandboxId) {
setLastProjectSandboxId(currentSandboxId);
}
}, [project?.sandbox?.id, lastProjectSandboxId]);
// Reuse the preloaded iframe instead of creating a new one
const persistentVncIframe = React.useMemo(() => {
const sandbox = project?.sandbox;
if (!sandbox?.vnc_preview || !sandbox?.pass) return null;
return (
<div className="w-full h-full overflow-hidden relative">
{/* Reuse preloaded iframe if available */}
{isVncReady && preloadedIframe ? (
<div
ref={(container) => {
if (container && preloadedIframe && preloadedIframe.parentNode !== container) {
// Move the preloaded iframe to this container and style it for display
preloadedIframe.style.position = 'static';
preloadedIframe.style.left = '0';
preloadedIframe.style.top = '0';
preloadedIframe.style.width = 'calc(100% + 10px)';
preloadedIframe.style.height = 'calc(100% + 100px)';
preloadedIframe.style.marginTop = '-70px';
preloadedIframe.style.marginLeft = '-10px';
preloadedIframe.style.marginBottom = '-30px';
preloadedIframe.style.marginRight = '-10px';
preloadedIframe.style.border = '0';
preloadedIframe.className = 'w-full border-0';
preloadedIframe.title = 'Persistent Browser Preview';
// Move iframe to the display container
container.appendChild(preloadedIframe);
}
}}
className="w-full h-full"
/>
) : (
/* Show loading state while VNC is connecting */
<div className="absolute inset-0 bg-black/50 flex items-center justify-center">
<div className="bg-white dark:bg-zinc-900 rounded-lg p-4 shadow-lg max-w-sm mx-4">
<div className="flex items-center gap-3 mb-2">
<div className="w-4 h-4 border-2 border-blue-500 border-t-transparent rounded-full animate-spin"></div>
<h3 className="font-medium text-zinc-900 dark:text-zinc-100">
Connecting to Browser...
</h3>
</div>
<p className="text-sm text-zinc-600 dark:text-zinc-400">
Establishing VNC connection, please wait...
</p>
</div>
</div>
)}
</div>
);
}, [project?.sandbox, isVncReady, preloadedIframe]);
const handleClose = React.useCallback(() => {
onClose();
}, [onClose]);
@ -622,6 +695,27 @@ export function ToolCallSidePanel({
);
}
// Check if this is a browser tool to show/hide persistent iframe
const toolName = displayToolCall.assistantCall.name?.toLowerCase() || '';
const isBrowserTool = ['browser-navigate-to', 'browser-act', 'browser-extract-content', 'browser-observe', 'browser-screenshot'].includes(toolName);
// Determine if this is the last tool call (same logic as before)
const isLastToolCall = displayIndex === displayTotalCalls - 1;
// Check if browser is currently running
const isRunning = isStreaming || agentStatus === 'running';
// Check if tool has screenshot content
const hasScreenshot = displayToolCall?.toolResult?.content &&
typeof displayToolCall.toolResult.content === 'string' &&
(displayToolCall.toolResult.content.includes('image_url') ||
displayToolCall.toolResult.content.includes('screenshot_base64'));
// Show iframe when: browser tool + last tool call + (running OR no screenshot)
const shouldShowIframe = isBrowserTool && isLastToolCall && (isRunning || !hasScreenshot);
const toolView = (
<ToolView
name={displayToolCall.assistantCall.name}
@ -637,6 +731,7 @@ export function ToolCallSidePanel({
currentIndex={displayIndex}
totalCalls={displayTotalCalls}
onFileClick={onFileClick}
shouldShowParentIframe={shouldShowIframe}
/>
);
@ -699,8 +794,20 @@ export function ToolCallSidePanel({
</div>
</motion.div>
<div className="flex-1 overflow-auto scrollbar-thin scrollbar-thumb-zinc-300 dark:scrollbar-thumb-zinc-700 scrollbar-track-transparent">
<div className="flex-1 overflow-auto scrollbar-thin scrollbar-thumb-zinc-300 dark:scrollbar-thumb-zinc-700 scrollbar-track-transparent relative">
{toolView}
{/* Persistent VNC iframe - ALWAYS in DOM, visibility controlled by CSS */}
{persistentVncIframe && (
<div
className={`absolute inset-0 z-10 ${shouldShowIframe ? 'block pointer-events-auto' : 'hidden pointer-events-none'}`}
style={{
top: '56px',
}}
>
{persistentVncIframe}
</div>
)}
</div>
</div>
);

View File

@ -1,11 +1,9 @@
import React, { useMemo } from 'react';
import React from 'react';
import {
Globe,
MonitorPlay,
ExternalLink,
CheckCircle,
AlertTriangle,
CircleDashed,
} from 'lucide-react';
import { ToolViewProps } from './types';
import {
@ -34,6 +32,7 @@ export function BrowserToolView({
messages = [],
currentIndex = 0,
totalCalls = 1,
shouldShowParentIframe = false,
}: ToolViewProps) {
// Try to extract data using the new parser first
const assistantToolData = extractToolData(assistantContent);
@ -151,45 +150,10 @@ export function BrowserToolView({
}
}
const vncPreviewUrl = project?.sandbox?.vnc_preview
? `${project.sandbox.vnc_preview}/vnc_lite.html?password=${project?.sandbox?.pass}&autoconnect=true&scale=local&width=1024&height=768`
: undefined;
const isRunning = isStreaming || agentStatus === 'running';
const isLastToolCall = currentIndex === totalCalls - 1;
const vncIframe = useMemo(() => {
if (!vncPreviewUrl) return null;
return (
<iframe
src={vncPreviewUrl}
title="Browser preview"
className="w-full h-full border-0 min-h-[600px]"
style={{ width: '100%', height: '100%', minHeight: '600px' }}
/>
);
}, [vncPreviewUrl]);
const [progress, setProgress] = React.useState(100);
React.useEffect(() => {
if (isRunning) {
setProgress(0);
const timer = setInterval(() => {
setProgress((prevProgress) => {
if (prevProgress >= 95) {
clearInterval(timer);
return prevProgress;
}
return prevProgress + 2;
});
}, 500);
return () => clearInterval(timer);
} else {
setProgress(100);
}
}, [isRunning]);
// Reset loading state when screenshot changes
React.useEffect(() => {
@ -217,7 +181,7 @@ export function BrowserToolView({
{imageLoading && (
<ImageLoader />
)}
<Card className={`p-0 overflow-hidden border ${imageLoading ? 'hidden' : 'block'}`}>
<Card className={`p-0 relative mb-16 overflow-hidden border ${imageLoading ? 'hidden' : 'block'}`}>
<img
src={screenshotUrl}
alt="Browser Screenshot"
@ -299,108 +263,49 @@ export function BrowserToolView({
)}
{isRunning && (
<Badge className="bg-gradient-to-b from-blue-200 to-blue-100 text-blue-700 dark:from-blue-800/50 dark:to-blue-900/60 dark:text-blue-300">
<CircleDashed className="h-3.5 w-3.5 animate-spin" />
Executing browser action
</Badge>
<Badge
variant="outline"
className="px-3 py-1.5 rounded-lg shadow-lg backdrop-blur-md "
>
<div className="w-2 h-2 rounded-full bg-emerald-500 animate-pulse" />
Live Preview
</Badge>
)}
</div>
</CardHeader>
<CardContent className="p-0 flex-1 overflow-hidden relative" style={{ height: 'calc(100vh - 150px)', minHeight: '600px' }}>
<div className="flex-1 flex h-full items-stretch bg-white dark:bg-black">
{isLastToolCall ? (
isRunning && vncIframe ? (
<div className="flex flex-col items-center justify-center w-full h-full min-h-[600px]" style={{ minHeight: '600px' }}>
<div className="relative w-full h-full min-h-[600px]" style={{ minHeight: '600px' }}>
{vncIframe}
<div className="absolute top-4 right-4 z-10">
<Badge className="bg-blue-500/90 text-white border-none shadow-lg animate-pulse">
<CircleDashed className="h-3 w-3 animate-spin" />
{operation} in progress
</Badge>
</div>
</div>
{shouldShowParentIframe ? (
/* Parent iframe should be visible, show transparent background */
<div className="w-full h-full min-h-[600px] bg-transparent" />
) : (screenshotUrl || screenshotBase64) ? (
/* Show screenshot when parent iframe shouldn't be visible */
renderScreenshot()
) : isLastToolCall ? (
/* Last tool call with no screenshot and no iframe: Show fallback */
<div className="p-8 flex flex-col items-center justify-center w-full bg-gradient-to-b from-white to-zinc-50 dark:from-zinc-950 dark:to-zinc-900 text-zinc-700 dark:text-zinc-400">
<div className="w-20 h-20 rounded-full flex items-center justify-center mb-6 bg-gradient-to-b from-purple-100 to-purple-50 shadow-inner dark:from-purple-800/40 dark:to-purple-900/60">
<MonitorPlay className="h-10 w-10 text-purple-400 dark:text-purple-600" />
</div>
) : (screenshotUrl || screenshotBase64) ? (
renderScreenshot()
) : vncIframe ? (
// Use the memoized iframe
<div className="flex flex-col items-center justify-center w-full h-full min-h-[600px]" style={{ minHeight: '600px' }}>
{vncIframe}
<h3 className="text-xl font-semibold mb-2 text-zinc-900 dark:text-zinc-100">
Browser preview not available
</h3>
</div>
) : (
/* Previous tool calls with no screenshot: Show fallback message */
<div className="p-8 flex flex-col items-center justify-center w-full bg-gradient-to-b from-white to-zinc-50 dark:from-zinc-950 dark:to-zinc-900 text-zinc-700 dark:text-zinc-400">
<div className="w-20 h-20 rounded-full flex items-center justify-center mb-6 bg-gradient-to-b from-zinc-100 to-zinc-50 shadow-inner dark:from-zinc-800/40 dark:to-zinc-900/60">
<MonitorPlay className="h-10 w-10 text-zinc-400 dark:text-zinc-600" />
</div>
) : (
<div className="p-8 flex flex-col items-center justify-center w-full bg-gradient-to-b from-white to-zinc-50 dark:from-zinc-950 dark:to-zinc-900 text-zinc-700 dark:text-zinc-400">
<div className="w-20 h-20 rounded-full flex items-center justify-center mb-6 bg-gradient-to-b from-purple-100 to-purple-50 shadow-inner dark:from-purple-800/40 dark:to-purple-900/60">
<MonitorPlay className="h-10 w-10 text-purple-400 dark:text-purple-600" />
</div>
<h3 className="text-xl font-semibold mb-2 text-zinc-900 dark:text-zinc-100">
Browser preview not available
</h3>
{url && (
<div className="mt-4">
<Button
variant="outline"
size="sm"
className="bg-white dark:bg-zinc-900 border-zinc-200 dark:border-zinc-700 shadow-sm hover:shadow-md transition-shadow"
asChild
>
<a href={url} target="_blank" rel="noopener noreferrer">
<ExternalLink className="h-3.5 w-3.5 mr-2" />
Visit URL
</a>
</Button>
</div>
)}
</div>
)
) :
(screenshotUrl || screenshotBase64) ? (
<div className="flex items-center justify-center w-full h-full overflow-auto relative p-4">
{imageLoading && (
<ImageLoader />
)}
<Card className={`p-0 overflow-hidden border ${imageLoading ? 'hidden' : 'block'}`}>
{screenshotUrl ? (
<img
src={screenshotUrl}
alt="Browser Screenshot"
className="max-w-full max-h-full object-contain"
onLoad={handleImageLoad}
onError={handleImageError}
/>
) : (
<img
src={`data:image/jpeg;base64,${screenshotBase64}`}
alt="Browser Screenshot"
className="max-w-full max-h-full object-contain"
onLoad={handleImageLoad}
onError={handleImageError}
/>
)}
</Card>
{imageError && !imageLoading && (
<div className="absolute inset-0 flex items-center justify-center bg-zinc-50 dark:bg-zinc-900">
<div className="text-center text-zinc-500 dark:text-zinc-400">
<AlertTriangle className="h-8 w-8 mx-auto mb-2" />
<p>Failed to load screenshot</p>
</div>
</div>
)}
</div>
) : (
<div className="p-8 h-full flex flex-col items-center justify-center w-full bg-gradient-to-b from-white to-zinc-50 dark:from-zinc-950 dark:to-zinc-900 text-zinc-700 dark:text-zinc-400">
<div className="w-20 h-20 rounded-full flex items-center justify-center mb-6 bg-gradient-to-b from-zinc-100 to-zinc-50 shadow-inner dark:from-zinc-800/40 dark:to-zinc-900/60">
<MonitorPlay className="h-10 w-10 text-zinc-400 dark:text-zinc-600" />
</div>
<h3 className="text-xl font-semibold mb-2 text-zinc-900 dark:text-zinc-100">
No Browser State Available
</h3>
<p className="text-sm text-zinc-500 dark:text-zinc-400">
Browser state image not found for this action
</p>
</div>
)}
<h3 className="text-xl font-semibold mb-2 text-zinc-900 dark:text-zinc-100">
No Browser State Available
</h3>
<p className="text-sm text-zinc-500 dark:text-zinc-400">
Browser state image not found for this action
</p>
</div>
)}
</div>
</CardContent>

View File

@ -14,6 +14,7 @@ export interface ToolViewProps {
currentIndex?: number;
totalCalls?: number;
onFileClick?: (filePath: string) => void;
shouldShowParentIframe?: boolean;
}
export interface BrowserToolViewProps extends ToolViewProps {