Merge pull request #899 from kortix-ai/cleanup-improvements

improvements
This commit is contained in:
Marko Kraemer 2025-07-05 20:14:58 +02:00 committed by GitHub
commit dba4e6e739
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
24 changed files with 442 additions and 1199 deletions

View File

@ -124,16 +124,6 @@ class ThreadAgentResponse(BaseModel):
source: str # "thread", "default", "none", "missing"
message: str
class AgentBuilderChatRequest(BaseModel):
message: str
conversation_history: List[Dict[str, str]] = []
partial_config: Optional[Dict[str, Any]] = None
class AgentBuilderChatResponse(BaseModel):
response: str
suggested_config: Optional[Dict[str, Any]] = None
next_step: Optional[str] = None
def initialize(
_db: DBConnection,
_instance_id: Optional[str] = None
@ -299,71 +289,6 @@ async def get_agent_run_with_access_check(client, agent_run_id: str, user_id: st
await verify_thread_access(client, thread_id, user_id)
return agent_run_data
async def enhance_system_prompt(agent_name: str, description: str, user_system_prompt: str) -> str:
"""
Enhance a basic system prompt using GPT-4o to create a more comprehensive and effective system prompt.
Args:
agent_name: Name of the agent
description: Description of the agent
user_system_prompt: User's basic system prompt/instructions
Returns:
Enhanced system prompt generated by GPT-4o
"""
try:
system_message = """You are an expert at creating comprehensive system prompts for AI agents. Your task is to take basic agent information and transform it into a detailed, effective system prompt that will help the agent perform optimally.
Guidelines for creating system prompts:
1. Be specific about the agent's role, expertise, and capabilities
2. Include clear behavioral guidelines and interaction style
3. Specify the agent's knowledge domains and areas of expertise
4. Include guidance on how to handle different types of requests
5. Set appropriate boundaries and limitations
6. Make the prompt engaging and easy to understand
7. Ensure the prompt is comprehensive but not overly verbose
8. Include relevant context about tools and capabilities the agent might have
The enhanced prompt should be professional, clear, and actionable."""
user_message = f"""Please create an enhanced system prompt for an AI agent with the following details:
Agent Name: {agent_name}
Agent Description: {description}
User's Instructions: {user_system_prompt}
Transform this basic information into a comprehensive, effective system prompt that will help the agent perform at its best. The prompt should be detailed enough to guide the agent's behavior while remaining clear and actionable."""
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": user_message}
]
logger.info(f"Enhancing system prompt for agent: {agent_name}")
response = await make_llm_api_call(
messages=messages,
model_name="openai/gpt-4o",
max_tokens=2000,
temperature=0.7
)
if response and response.get('choices') and response['choices'][0].get('message'):
enhanced_prompt = response['choices'][0]['message'].get('content', '').strip()
if enhanced_prompt:
logger.info(f"Successfully enhanced system prompt for agent: {agent_name}")
return enhanced_prompt
else:
logger.warning(f"GPT-4o returned empty enhanced prompt for agent: {agent_name}")
return user_system_prompt
else:
logger.warning(f"Failed to get valid response from GPT-4o for agent: {agent_name}")
return user_system_prompt
except Exception as e:
logger.error(f"Error enhancing system prompt for agent {agent_name}: {str(e)}")
# Return the original prompt if enhancement fails
return user_system_prompt
@router.post("/thread/{thread_id}/agent/start")
async def start_agent(
thread_id: str,
@ -1210,11 +1135,8 @@ async def initiate_agent_with_files(
raise HTTPException(status_code=500, detail=f"Failed to initiate agent session: {str(e)}")
# Custom agents
@router.get("/agents", response_model=AgentsResponse)
async def get_agents(
user_id: str = Depends(get_current_user_id_from_jwt),

View File

@ -58,7 +58,7 @@ async def run_agent(
if not trace:
trace = langfuse.trace(name="run_agent", session_id=thread_id, metadata={"project_id": project_id})
thread_manager = ThreadManager(trace=trace, is_agent_builder=is_agent_builder, target_agent_id=target_agent_id, agent_config=agent_config)
thread_manager = ThreadManager(trace=trace, is_agent_builder=is_agent_builder or False, target_agent_id=target_agent_id, agent_config=agent_config)
client = await thread_manager.db.client

View File

@ -8,7 +8,7 @@ server tool calls through dynamically generated individual function methods.
import json
from typing import Any, Dict, List, Optional
from agentpress.tool import Tool, ToolResult, openapi_schema, xml_schema, ToolSchema, SchemaType
from mcp_local.client import MCPManager
from mcp_service.client import MCPManager
from utils.logger import logger
import inspect
from mcp import ClientSession

View File

@ -6,9 +6,10 @@ reaching the context window limitations of LLM models.
"""
import json
from typing import List, Dict, Any, Optional
from typing import List, Dict, Any, Optional, Union
from litellm import token_counter, completion_cost
from litellm.utils import token_counter
from litellm.cost_calculator import completion_cost
from services.supabase import DBConnection
from services.llm import make_llm_api_call
from utils.logger import logger
@ -29,270 +30,291 @@ class ContextManager:
"""
self.db = DBConnection()
self.token_threshold = token_threshold
def is_tool_result_message(self, msg: Dict[str, Any]) -> bool:
"""Check if a message is a tool result message."""
if not ("content" in msg and msg['content']):
return False
content = msg['content']
if isinstance(content, str) and "ToolResult" in content:
return True
if isinstance(content, dict) and "tool_execution" in content:
return True
if isinstance(content, dict) and "interactive_elements" in content:
return True
if isinstance(content, str):
try:
parsed_content = json.loads(content)
if isinstance(parsed_content, dict) and "tool_execution" in parsed_content:
return True
if isinstance(parsed_content, dict) and "interactive_elements" in content:
return True
except (json.JSONDecodeError, TypeError):
pass
return False
async def get_thread_token_count(self, thread_id: str) -> int:
"""Get the current token count for a thread using LiteLLM.
Args:
thread_id: ID of the thread to analyze
Returns:
The total token count for relevant messages in the thread
"""
logger.debug(f"Getting token count for thread {thread_id}")
try:
# Get messages for the thread
messages = await self.get_messages_for_summarization(thread_id)
if not messages:
logger.debug(f"No messages found for thread {thread_id}")
return 0
# Use litellm's token_counter for accurate model-specific counting
# This is much more accurate than the SQL-based estimation
token_count = token_counter(model="gpt-4", messages=messages)
logger.info(f"Thread {thread_id} has {token_count} tokens (calculated with litellm)")
return token_count
except Exception as e:
logger.error(f"Error getting token count: {str(e)}")
return 0
async def get_messages_for_summarization(self, thread_id: str) -> List[Dict[str, Any]]:
"""Get all LLM messages from the thread that need to be summarized.
This gets messages after the most recent summary or all messages if
no summary exists. Unlike get_llm_messages, this includes ALL messages
since the last summary, even if we're generating a new summary.
Args:
thread_id: ID of the thread to get messages from
Returns:
List of message objects to summarize
"""
logger.debug(f"Getting messages for summarization for thread {thread_id}")
client = await self.db.client
try:
# Find the most recent summary message
summary_result = await client.table('messages').select('created_at') \
.eq('thread_id', thread_id) \
.eq('type', 'summary') \
.eq('is_llm_message', True) \
.order('created_at', desc=True) \
.limit(1) \
.execute()
# Get messages after the most recent summary or all messages if no summary
if summary_result.data and len(summary_result.data) > 0:
last_summary_time = summary_result.data[0]['created_at']
logger.debug(f"Found last summary at {last_summary_time}")
# Get all messages after the summary, but NOT including the summary itself
messages_result = await client.table('messages').select('*') \
.eq('thread_id', thread_id) \
.eq('is_llm_message', True) \
.gt('created_at', last_summary_time) \
.order('created_at') \
.execute()
def compress_message(self, msg_content: Union[str, dict], message_id: Optional[str] = None, max_length: int = 3000) -> Union[str, dict]:
"""Compress the message content."""
if isinstance(msg_content, str):
if len(msg_content) > max_length:
return msg_content[:max_length] + "... (truncated)" + f"\n\nmessage_id \"{message_id}\"\nUse expand-message tool to see contents"
else:
logger.debug("No previous summary found, getting all messages")
# Get all messages
messages_result = await client.table('messages').select('*') \
.eq('thread_id', thread_id) \
.eq('is_llm_message', True) \
.order('created_at') \
.execute()
# Parse the message content if needed
messages = []
for msg in messages_result.data:
# Skip existing summary messages - we don't want to summarize summaries
if msg.get('type') == 'summary':
logger.debug(f"Skipping summary message from {msg.get('created_at')}")
continue
# Parse content if it's a string
content = msg['content']
if isinstance(content, str):
try:
content = json.loads(content)
except json.JSONDecodeError:
pass # Keep as string if not valid JSON
return msg_content
elif isinstance(msg_content, dict):
if len(json.dumps(msg_content)) > max_length:
return json.dumps(msg_content)[:max_length] + "... (truncated)" + f"\n\nmessage_id \"{message_id}\"\nUse expand-message tool to see contents"
else:
return msg_content
def safe_truncate(self, msg_content: Union[str, dict], max_length: int = 100000) -> Union[str, dict]:
"""Truncate the message content safely by removing the middle portion."""
max_length = min(max_length, 100000)
if isinstance(msg_content, str):
if len(msg_content) > max_length:
# Calculate how much to keep from start and end
keep_length = max_length - 150 # Reserve space for truncation message
start_length = keep_length // 2
end_length = keep_length - start_length
# Ensure we have the proper format for the LLM
if 'role' not in content and 'type' in msg:
# Convert message type to role if needed
role = msg['type']
if role == 'assistant' or role == 'user' or role == 'system' or role == 'tool':
content = {'role': role, 'content': content}
start_part = msg_content[:start_length]
end_part = msg_content[-end_length:] if end_length > 0 else ""
messages.append(content)
logger.info(f"Got {len(messages)} messages to summarize for thread {thread_id}")
return messages
except Exception as e:
logger.error(f"Error getting messages for summarization: {str(e)}", exc_info=True)
return []
async def create_summary(
self,
thread_id: str,
messages: List[Dict[str, Any]],
model: str = "gpt-4o-mini"
) -> Optional[Dict[str, Any]]:
"""Generate a summary of conversation messages.
return start_part + f"\n\n... (middle truncated) ...\n\n" + end_part + f"\n\nThis message is too long, repeat relevant information in your response to remember it"
else:
return msg_content
elif isinstance(msg_content, dict):
json_str = json.dumps(msg_content)
if len(json_str) > max_length:
# Calculate how much to keep from start and end
keep_length = max_length - 150 # Reserve space for truncation message
start_length = keep_length // 2
end_length = keep_length - start_length
start_part = json_str[:start_length]
end_part = json_str[-end_length:] if end_length > 0 else ""
return start_part + f"\n\n... (middle truncated) ...\n\n" + end_part + f"\n\nThis message is too long, repeat relevant information in your response to remember it"
else:
return msg_content
def compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
"""Compress the tool result messages except the most recent one."""
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
max_tokens_value = max_tokens or (100 * 1000)
if uncompressed_total_token_count > max_tokens_value:
_i = 0 # Count the number of ToolResult messages
for msg in reversed(messages): # Start from the end and work backwards
if self.is_tool_result_message(msg): # Only compress ToolResult messages
_i += 1 # Count the number of ToolResult messages
msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
if msg_token_count > token_threshold: # If the message is too long
if _i > 1: # If this is not the most recent ToolResult message
message_id = msg.get('message_id') # Get the message_id
if message_id:
msg["content"] = self.compress_message(msg["content"], message_id, token_threshold * 3)
else:
logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
else:
msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
return messages
def compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
"""Compress the user messages except the most recent one."""
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
max_tokens_value = max_tokens or (100 * 1000)
if uncompressed_total_token_count > max_tokens_value:
_i = 0 # Count the number of User messages
for msg in reversed(messages): # Start from the end and work backwards
if msg.get('role') == 'user': # Only compress User messages
_i += 1 # Count the number of User messages
msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
if msg_token_count > token_threshold: # If the message is too long
if _i > 1: # If this is not the most recent User message
message_id = msg.get('message_id') # Get the message_id
if message_id:
msg["content"] = self.compress_message(msg["content"], message_id, token_threshold * 3)
else:
logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
else:
msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
return messages
def compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
"""Compress the assistant messages except the most recent one."""
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
max_tokens_value = max_tokens or (100 * 1000)
if uncompressed_total_token_count > max_tokens_value:
_i = 0 # Count the number of Assistant messages
for msg in reversed(messages): # Start from the end and work backwards
if msg.get('role') == 'assistant': # Only compress Assistant messages
_i += 1 # Count the number of Assistant messages
msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
if msg_token_count > token_threshold: # If the message is too long
if _i > 1: # If this is not the most recent Assistant message
message_id = msg.get('message_id') # Get the message_id
if message_id:
msg["content"] = self.compress_message(msg["content"], message_id, token_threshold * 3)
else:
logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
else:
msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
return messages
def remove_meta_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Remove meta messages from the messages."""
result: List[Dict[str, Any]] = []
for msg in messages:
msg_content = msg.get('content')
# Try to parse msg_content as JSON if it's a string
if isinstance(msg_content, str):
try:
msg_content = json.loads(msg_content)
except json.JSONDecodeError:
pass
if isinstance(msg_content, dict):
# Create a copy to avoid modifying the original
msg_content_copy = msg_content.copy()
if "tool_execution" in msg_content_copy:
tool_execution = msg_content_copy["tool_execution"].copy()
if "arguments" in tool_execution:
del tool_execution["arguments"]
msg_content_copy["tool_execution"] = tool_execution
# Create a new message dict with the modified content
new_msg = msg.copy()
new_msg["content"] = json.dumps(msg_content_copy)
result.append(new_msg)
else:
result.append(msg)
return result
def compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int] = 41000, token_threshold: int = 4096, max_iterations: int = 5) -> List[Dict[str, Any]]:
"""Compress the messages.
Args:
thread_id: ID of the thread to summarize
messages: Messages to summarize
model: LLM model to use for summarization
Returns:
Summary message object or None if summarization failed
messages: List of messages to compress
llm_model: Model name for token counting
max_tokens: Maximum allowed tokens
token_threshold: Token threshold for individual message compression (must be a power of 2)
max_iterations: Maximum number of compression iterations
"""
# Set model-specific token limits
if 'sonnet' in llm_model.lower():
max_tokens = 200 * 1000 - 64000 - 28000
elif 'gpt' in llm_model.lower():
max_tokens = 128 * 1000 - 28000
elif 'gemini' in llm_model.lower():
max_tokens = 1000 * 1000 - 300000
elif 'deepseek' in llm_model.lower():
max_tokens = 128 * 1000 - 28000
else:
max_tokens = 41 * 1000 - 10000
result = messages
result = self.remove_meta_messages(result)
uncompressed_total_token_count = token_counter(model=llm_model, messages=result)
result = self.compress_tool_result_messages(result, llm_model, max_tokens, token_threshold)
result = self.compress_user_messages(result, llm_model, max_tokens, token_threshold)
result = self.compress_assistant_messages(result, llm_model, max_tokens, token_threshold)
compressed_token_count = token_counter(model=llm_model, messages=result)
logger.info(f"compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}") # Log the token compression for debugging later
if max_iterations <= 0:
logger.warning(f"compress_messages: Max iterations reached, omitting messages")
result = self.compress_messages_by_omitting_messages(messages, llm_model, max_tokens)
return result
if compressed_token_count > max_tokens:
logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}")
result = self.compress_messages(messages, llm_model, max_tokens, token_threshold // 2, max_iterations - 1)
return self.middle_out_messages(result)
def compress_messages_by_omitting_messages(
self,
messages: List[Dict[str, Any]],
llm_model: str,
max_tokens: Optional[int] = 41000,
removal_batch_size: int = 10,
min_messages_to_keep: int = 10
) -> List[Dict[str, Any]]:
"""Compress the messages by omitting messages from the middle.
Args:
messages: List of messages to compress
llm_model: Model name for token counting
max_tokens: Maximum allowed tokens
removal_batch_size: Number of messages to remove per iteration
min_messages_to_keep: Minimum number of messages to preserve
"""
if not messages:
logger.warning("No messages to summarize")
return None
logger.info(f"Creating summary for thread {thread_id} with {len(messages)} messages")
# Create system message with summarization instructions
system_message = {
"role": "system",
"content": f"""You are a specialized summarization assistant. Your task is to create a concise but comprehensive summary of the conversation history.
The summary should:
1. Preserve all key information including decisions, conclusions, and important context
2. Include any tools that were used and their results
3. Maintain chronological order of events
4. Be presented as a narrated list of key points with section headers
5. Include only factual information from the conversation (no new information)
6. Be concise but detailed enough that the conversation can continue with this summary as context
VERY IMPORTANT: This summary will replace older parts of the conversation in the LLM's context window, so ensure it contains ALL key information and LATEST STATE OF THE CONVERSATION - SO WE WILL KNOW HOW TO PICK UP WHERE WE LEFT OFF.
THE CONVERSATION HISTORY TO SUMMARIZE IS AS FOLLOWS:
===============================================================
==================== CONVERSATION HISTORY ====================
{messages}
==================== END OF CONVERSATION HISTORY ====================
===============================================================
"""
}
try:
# Call LLM to generate summary
response = await make_llm_api_call(
model_name=model,
messages=[system_message, {"role": "user", "content": "PLEASE PROVIDE THE SUMMARY NOW."}],
temperature=0,
max_tokens=SUMMARY_TARGET_TOKENS,
stream=False
)
return messages
if response and hasattr(response, 'choices') and response.choices:
summary_content = response.choices[0].message.content
# Track token usage
try:
token_count = token_counter(model=model, messages=[{"role": "user", "content": summary_content}])
cost = completion_cost(model=model, prompt="", completion=summary_content)
logger.info(f"Summary generated with {token_count} tokens at cost ${cost:.6f}")
except Exception as e:
logger.error(f"Error calculating token usage: {str(e)}")
# Format the summary message with clear beginning and end markers
formatted_summary = f"""
======== CONVERSATION HISTORY SUMMARY ========
result = messages
result = self.remove_meta_messages(result)
{summary_content}
# Early exit if no compression needed
initial_token_count = token_counter(model=llm_model, messages=result)
max_allowed_tokens = max_tokens or (100 * 1000)
if initial_token_count <= max_allowed_tokens:
return result
======== END OF SUMMARY ========
# Separate system message (assumed to be first) from conversation messages
system_message = messages[0] if messages and messages[0].get('role') == 'system' else None
conversation_messages = result[1:] if system_message else result
safety_limit = 500
current_token_count = initial_token_count
while current_token_count > max_allowed_tokens and safety_limit > 0:
safety_limit -= 1
if len(conversation_messages) <= min_messages_to_keep:
logger.warning(f"Cannot compress further: only {len(conversation_messages)} messages remain (min: {min_messages_to_keep})")
break
The above is a summary of the conversation history. The conversation continues below.
"""
# Format the summary message
summary_message = {
"role": "user",
"content": formatted_summary
}
return summary_message
# Calculate removal strategy based on current message count
if len(conversation_messages) > (removal_batch_size * 2):
# Remove from middle, keeping recent and early context
middle_start = len(conversation_messages) // 2 - (removal_batch_size // 2)
middle_end = middle_start + removal_batch_size
conversation_messages = conversation_messages[:middle_start] + conversation_messages[middle_end:]
else:
logger.error("Failed to generate summary: Invalid response")
return None
except Exception as e:
logger.error(f"Error creating summary: {str(e)}", exc_info=True)
return None
# Remove from earlier messages, preserving recent context
messages_to_remove = min(removal_batch_size, len(conversation_messages) // 2)
if messages_to_remove > 0:
conversation_messages = conversation_messages[messages_to_remove:]
else:
# Can't remove any more messages
break
# Recalculate token count
messages_to_count = ([system_message] + conversation_messages) if system_message else conversation_messages
current_token_count = token_counter(model=llm_model, messages=messages_to_count)
# Prepare final result
final_messages = ([system_message] + conversation_messages) if system_message else conversation_messages
final_token_count = token_counter(model=llm_model, messages=final_messages)
async def check_and_summarize_if_needed(
self,
thread_id: str,
add_message_callback,
model: str = "gpt-4o-mini",
force: bool = False
) -> bool:
"""Check if thread needs summarization and summarize if so.
logger.info(f"compress_messages_by_omitting_messages: {initial_token_count} -> {final_token_count} tokens ({len(messages)} -> {len(final_messages)} messages)")
return final_messages
def middle_out_messages(self, messages: List[Dict[str, Any]], max_messages: int = 320) -> List[Dict[str, Any]]:
"""Remove messages from the middle of the list, keeping max_messages total."""
if len(messages) <= max_messages:
return messages
Args:
thread_id: ID of the thread to check
add_message_callback: Callback to add the summary message to the thread
model: LLM model to use for summarization
force: Whether to force summarization regardless of token count
Returns:
True if summarization was performed, False otherwise
"""
try:
# Get token count using LiteLLM (accurate model-specific counting)
token_count = await self.get_thread_token_count(thread_id)
# If token count is below threshold and not forcing, no summarization needed
if token_count < self.token_threshold and not force:
logger.debug(f"Thread {thread_id} has {token_count} tokens, below threshold {self.token_threshold}")
return False
# Log reason for summarization
if force:
logger.info(f"Forced summarization of thread {thread_id} with {token_count} tokens")
else:
logger.info(f"Thread {thread_id} exceeds token threshold ({token_count} >= {self.token_threshold}), summarizing...")
# Get messages to summarize
messages = await self.get_messages_for_summarization(thread_id)
# If there are too few messages, don't summarize
if len(messages) < 3:
logger.info(f"Thread {thread_id} has too few messages ({len(messages)}) to summarize")
return False
# Create summary
summary = await self.create_summary(thread_id, messages, model)
if summary:
# Add summary message to thread
await add_message_callback(
thread_id=thread_id,
type="summary",
content=summary,
is_llm_message=True,
metadata={"token_count": token_count}
)
logger.info(f"Successfully added summary to thread {thread_id}")
return True
else:
logger.error(f"Failed to create summary for thread {thread_id}")
return False
except Exception as e:
logger.error(f"Error in check_and_summarize_if_needed: {str(e)}", exc_info=True)
return False
# Keep half from the beginning and half from the end
keep_start = max_messages // 2
keep_end = max_messages - keep_start
return messages[:keep_start] + messages[-keep_end:]

View File

@ -1602,51 +1602,6 @@ class ResponseProcessor:
)
return message_obj # Return the full message object
# Check if this is an MCP tool (function_name starts with "call_mcp_tool")
function_name = tool_call.get("function_name", "")
# Check if this is an MCP tool - either the old call_mcp_tool or a dynamically registered MCP tool
is_mcp_tool = False
if function_name == "call_mcp_tool":
is_mcp_tool = True
else:
# Check if the result indicates it's an MCP tool by looking for MCP metadata
if hasattr(result, 'output') and isinstance(result.output, str):
# Check for MCP metadata pattern in the output
if "MCP Tool Result from" in result.output and "Tool Metadata:" in result.output:
is_mcp_tool = True
# Also check for MCP metadata in JSON format
elif "mcp_metadata" in result.output:
is_mcp_tool = True
if is_mcp_tool:
# Special handling for MCP tools - make content prominent and LLM-friendly
result_role = "user" if strategy == "user_message" else "assistant"
# Extract the actual content from the ToolResult
if hasattr(result, 'output'):
mcp_content = str(result.output)
else:
mcp_content = str(result)
# Create a simple, LLM-friendly message format that puts content first
simple_message = {
"role": result_role,
"content": mcp_content # Direct content, no complex nesting
}
logger.info(f"Adding MCP tool result with simplified format for LLM visibility")
self.trace.event(name="adding_mcp_tool_result_simplified", level="DEFAULT", status_message="Adding MCP tool result with simplified format for LLM visibility")
message_obj = await self.add_message(
thread_id=thread_id,
type="tool",
content=simple_message,
is_llm_message=True,
metadata=metadata
)
return message_obj
# For XML and other non-native tools, use the new structured format
# Determine message role based on strategy
result_role = "user" if strategy == "user_message" else "assistant"
@ -1781,28 +1736,6 @@ class ResponseProcessor:
return structured_result_v1
def _format_xml_tool_result(self, tool_call: Dict[str, Any], result: ToolResult) -> str:
"""Format a tool result wrapped in a <tool_result> tag.
DEPRECATED: This method is kept for backwards compatibility.
New implementations should use _create_structured_tool_result instead.
Args:
tool_call: The tool call that was executed
result: The result of the tool execution
Returns:
String containing the formatted result wrapped in <tool_result> tag
"""
# Always use xml_tag_name if it exists
if "xml_tag_name" in tool_call:
xml_tag_name = tool_call["xml_tag_name"]
return f"<tool_result> <{xml_tag_name}> {str(result)} </{xml_tag_name}> </tool_result>"
# Non-XML tool, just return the function result
function_name = tool_call["function_name"]
return f"Result for {function_name}: {str(result)}"
def _create_tool_context(self, tool_call: Dict[str, Any], tool_index: int, assistant_message_id: Optional[str] = None, parsing_details: Optional[Dict[str, Any]] = None) -> ToolExecutionContext:
"""Create a tool execution context with display name and parsing details populated."""
context = ToolExecutionContext(

View File

@ -25,7 +25,7 @@ from utils.logger import logger
from langfuse.client import StatefulGenerationClient, StatefulTraceClient
from services.langfuse import langfuse
import datetime
from litellm import token_counter
from litellm.utils import token_counter
# Type alias for tool choice
ToolChoice = Literal["auto", "required", "none"]
@ -65,279 +65,6 @@ class ThreadManager:
)
self.context_manager = ContextManager()
def _is_tool_result_message(self, msg: Dict[str, Any]) -> bool:
if not ("content" in msg and msg['content']):
return False
content = msg['content']
if isinstance(content, str) and "ToolResult" in content: return True
if isinstance(content, dict) and "tool_execution" in content: return True
if isinstance(content, dict) and "interactive_elements" in content: return True
if isinstance(content, str):
try:
parsed_content = json.loads(content)
if isinstance(parsed_content, dict) and "tool_execution" in parsed_content: return True
if isinstance(parsed_content, dict) and "interactive_elements" in content: return True
except (json.JSONDecodeError, TypeError):
pass
return False
def _compress_message(self, msg_content: Union[str, dict], message_id: Optional[str] = None, max_length: int = 3000) -> Union[str, dict]:
"""Compress the message content."""
# print("max_length", max_length)
if isinstance(msg_content, str):
if len(msg_content) > max_length:
return msg_content[:max_length] + "... (truncated)" + f"\n\nmessage_id \"{message_id}\"\nUse expand-message tool to see contents"
else:
return msg_content
elif isinstance(msg_content, dict):
if len(json.dumps(msg_content)) > max_length:
return json.dumps(msg_content)[:max_length] + "... (truncated)" + f"\n\nmessage_id \"{message_id}\"\nUse expand-message tool to see contents"
else:
return msg_content
def _safe_truncate(self, msg_content: Union[str, dict], max_length: int = 100000) -> Union[str, dict]:
"""Truncate the message content safely by removing the middle portion."""
max_length = min(max_length, 100000)
if isinstance(msg_content, str):
if len(msg_content) > max_length:
# Calculate how much to keep from start and end
keep_length = max_length - 150 # Reserve space for truncation message
start_length = keep_length // 2
end_length = keep_length - start_length
start_part = msg_content[:start_length]
end_part = msg_content[-end_length:] if end_length > 0 else ""
return start_part + f"\n\n... (middle truncated) ...\n\n" + end_part + f"\n\nThis message is too long, repeat relevant information in your response to remember it"
else:
return msg_content
elif isinstance(msg_content, dict):
json_str = json.dumps(msg_content)
if len(json_str) > max_length:
# Calculate how much to keep from start and end
keep_length = max_length - 150 # Reserve space for truncation message
start_length = keep_length // 2
end_length = keep_length - start_length
start_part = json_str[:start_length]
end_part = json_str[-end_length:] if end_length > 0 else ""
return start_part + f"\n\n... (middle truncated) ...\n\n" + end_part + f"\n\nThis message is too long, repeat relevant information in your response to remember it"
else:
return msg_content
def _compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]:
"""Compress the tool result messages except the most recent one."""
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
if uncompressed_total_token_count > (max_tokens or (100 * 1000)):
_i = 0 # Count the number of ToolResult messages
for msg in reversed(messages): # Start from the end and work backwards
if self._is_tool_result_message(msg): # Only compress ToolResult messages
_i += 1 # Count the number of ToolResult messages
msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
if msg_token_count > token_threshold: # If the message is too long
if _i > 1: # If this is not the most recent ToolResult message
message_id = msg.get('message_id') # Get the message_id
if message_id:
msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3)
else:
logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
else:
msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2))
return messages
def _compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]:
"""Compress the user messages except the most recent one."""
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
if uncompressed_total_token_count > (max_tokens or (100 * 1000)):
_i = 0 # Count the number of User messages
for msg in reversed(messages): # Start from the end and work backwards
if msg.get('role') == 'user': # Only compress User messages
_i += 1 # Count the number of User messages
msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
if msg_token_count > token_threshold: # If the message is too long
if _i > 1: # If this is not the most recent User message
message_id = msg.get('message_id') # Get the message_id
if message_id:
msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3)
else:
logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
else:
msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2))
return messages
def _compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: Optional[int] = 1000) -> List[Dict[str, Any]]:
"""Compress the assistant messages except the most recent one."""
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
if uncompressed_total_token_count > (max_tokens or (100 * 1000)):
_i = 0 # Count the number of Assistant messages
for msg in reversed(messages): # Start from the end and work backwards
if msg.get('role') == 'assistant': # Only compress Assistant messages
_i += 1 # Count the number of Assistant messages
msg_token_count = token_counter(messages=[msg]) # Count the number of tokens in the message
if msg_token_count > token_threshold: # If the message is too long
if _i > 1: # If this is not the most recent Assistant message
message_id = msg.get('message_id') # Get the message_id
if message_id:
msg["content"] = self._compress_message(msg["content"], message_id, token_threshold * 3)
else:
logger.warning(f"UNEXPECTED: Message has no message_id {str(msg)[:100]}")
else:
msg["content"] = self._safe_truncate(msg["content"], int(max_tokens * 2))
return messages
def _remove_meta_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Remove meta messages from the messages."""
result: List[Dict[str, Any]] = []
for msg in messages:
msg_content = msg.get('content')
# Try to parse msg_content as JSON if it's a string
if isinstance(msg_content, str):
try: msg_content = json.loads(msg_content)
except json.JSONDecodeError: pass
if isinstance(msg_content, dict):
# Create a copy to avoid modifying the original
msg_content_copy = msg_content.copy()
if "tool_execution" in msg_content_copy:
tool_execution = msg_content_copy["tool_execution"].copy()
if "arguments" in tool_execution:
del tool_execution["arguments"]
msg_content_copy["tool_execution"] = tool_execution
# Create a new message dict with the modified content
new_msg = msg.copy()
new_msg["content"] = json.dumps(msg_content_copy)
result.append(new_msg)
else:
result.append(msg)
return result
def _compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int] = 41000, token_threshold: Optional[int] = 4096, max_iterations: int = 5) -> List[Dict[str, Any]]:
"""Compress the messages.
token_threshold: must be a power of 2
"""
if 'sonnet' in llm_model.lower():
max_tokens = 200 * 1000 - 64000 - 28000
elif 'gpt' in llm_model.lower():
max_tokens = 128 * 1000 - 28000
elif 'gemini' in llm_model.lower():
max_tokens = 1000 * 1000 - 300000
elif 'deepseek' in llm_model.lower():
max_tokens = 128 * 1000 - 28000
else:
max_tokens = 41 * 1000 - 10000
result = messages
result = self._remove_meta_messages(result)
uncompressed_total_token_count = token_counter(model=llm_model, messages=result)
result = self._compress_tool_result_messages(result, llm_model, max_tokens, token_threshold)
result = self._compress_user_messages(result, llm_model, max_tokens, token_threshold)
result = self._compress_assistant_messages(result, llm_model, max_tokens, token_threshold)
compressed_token_count = token_counter(model=llm_model, messages=result)
logger.info(f"_compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}") # Log the token compression for debugging later
if max_iterations <= 0:
logger.warning(f"_compress_messages: Max iterations reached, omitting messages")
result = self._compress_messages_by_omitting_messages(messages, llm_model, max_tokens)
return result
if (compressed_token_count > max_tokens):
logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}")
result = self._compress_messages(messages, llm_model, max_tokens, int(token_threshold / 2), max_iterations - 1)
return self._middle_out_messages(result)
def _compress_messages_by_omitting_messages(
self,
messages: List[Dict[str, Any]],
llm_model: str,
max_tokens: Optional[int] = 41000,
removal_batch_size: int = 10,
min_messages_to_keep: int = 10
) -> List[Dict[str, Any]]:
"""Compress the messages by omitting messages from the middle.
Args:
messages: List of messages to compress
llm_model: Model name for token counting
max_tokens: Maximum allowed tokens
removal_batch_size: Number of messages to remove per iteration
min_messages_to_keep: Minimum number of messages to preserve
"""
if not messages:
return messages
result = messages
result = self._remove_meta_messages(result)
# Early exit if no compression needed
initial_token_count = token_counter(model=llm_model, messages=result)
max_allowed_tokens = max_tokens or (100 * 1000)
if initial_token_count <= max_allowed_tokens:
return result
# Separate system message (assumed to be first) from conversation messages
system_message = messages[0] if messages and messages[0].get('role') == 'system' else None
conversation_messages = result[1:] if system_message else result
safety_limit = 500
current_token_count = initial_token_count
while current_token_count > max_allowed_tokens and safety_limit > 0:
safety_limit -= 1
if len(conversation_messages) <= min_messages_to_keep:
logger.warning(f"Cannot compress further: only {len(conversation_messages)} messages remain (min: {min_messages_to_keep})")
break
# Calculate removal strategy based on current message count
if len(conversation_messages) > (removal_batch_size * 2):
# Remove from middle, keeping recent and early context
middle_start = len(conversation_messages) // 2 - (removal_batch_size // 2)
middle_end = middle_start + removal_batch_size
conversation_messages = conversation_messages[:middle_start] + conversation_messages[middle_end:]
else:
# Remove from earlier messages, preserving recent context
messages_to_remove = min(removal_batch_size, len(conversation_messages) // 2)
if messages_to_remove > 0:
conversation_messages = conversation_messages[messages_to_remove:]
else:
# Can't remove any more messages
break
# Recalculate token count
messages_to_count = ([system_message] + conversation_messages) if system_message else conversation_messages
current_token_count = token_counter(model=llm_model, messages=messages_to_count)
# Prepare final result
final_messages = ([system_message] + conversation_messages) if system_message else conversation_messages
final_token_count = token_counter(model=llm_model, messages=final_messages)
logger.info(f"_compress_messages_by_omitting_messages: {initial_token_count} -> {final_token_count} tokens ({len(messages)} -> {len(final_messages)} messages)")
return final_messages
def _middle_out_messages(self, messages: List[Dict[str, Any]], max_messages: int = 320) -> List[Dict[str, Any]]:
"""Remove messages from the middle of the list, keeping max_messages total."""
if len(messages) <= max_messages:
return messages
# Keep half from the beginning and half from the end
keep_start = max_messages // 2
keep_end = max_messages - keep_start
return messages[:keep_start] + messages[-keep_end:]
def add_tool(self, tool_class: Type[Tool], function_names: Optional[List[str]] = None, **kwargs):
"""Add a tool to the ThreadManager."""
self.tool_registry.register_tool(tool_class, function_names, **kwargs)
@ -637,7 +364,7 @@ Here are the XML tools available with examples:
openapi_tool_schemas = self.tool_registry.get_openapi_schemas()
logger.debug(f"Retrieved {len(openapi_tool_schemas) if openapi_tool_schemas else 0} OpenAPI tool schemas")
prepared_messages = self._compress_messages(prepared_messages, llm_model)
prepared_messages = self.context_manager.compress_messages(prepared_messages, llm_model)
# 5. Make LLM API call
logger.debug("Making LLM API call")

View File

@ -22,7 +22,7 @@ from sandbox import api as sandbox_api
from services import billing as billing_api
from flags import api as feature_flags_api
from services import transcription as transcription_api
from services.mcp_custom import discover_custom_tools
from mcp_service.mcp_custom import discover_custom_tools
import sys
from services import email_api
from triggers import api as triggers_api
@ -151,8 +151,8 @@ app.include_router(billing_api.router, prefix="/api")
app.include_router(feature_flags_api.router, prefix="/api")
from mcp_local import api as mcp_api
from mcp_local import secure_api as secure_mcp_api
from mcp_service import api as mcp_api
from mcp_service import secure_api as secure_mcp_api
app.include_router(mcp_api.router, prefix="/api")
app.include_router(secure_mcp_api.router, prefix="/api/secure-mcp")

View File

@ -81,8 +81,8 @@ services:
redis:
image: redis:8-alpine
# ports:
# - "127.0.0.1:6379:6379"
ports:
- "127.0.0.1:6379:6379"
volumes:
- redis_data:/data
- ./services/docker/redis.conf:/usr/local/etc/redis/redis.conf:ro
@ -104,8 +104,8 @@ services:
rabbitmq:
image: rabbitmq
# ports:
# - "127.0.0.1:5672:5672"
ports:
- "127.0.0.1:5672:5672"
volumes:
- rabbitmq_data:/var/lib/rabbitmq
restart: unless-stopped

View File

@ -1,299 +0,0 @@
import os
import sys
import json
import asyncio
import subprocess
from typing import Dict, Any
from concurrent.futures import ThreadPoolExecutor
from fastapi import HTTPException # type: ignore
from utils.logger import logger
from mcp import ClientSession
from mcp.client.sse import sse_client # type: ignore
from mcp.client.streamable_http import streamablehttp_client # type: ignore
windows_executor = ThreadPoolExecutor(max_workers=4)
# def run_mcp_stdio_sync(command, args, env_vars, timeout=30):
# try:
# env = os.environ.copy()
# env.update(env_vars)
# full_command = [command] + args
# process = subprocess.Popen(
# full_command,
# stdin=subprocess.PIPE,
# stdout=subprocess.PIPE,
# stderr=subprocess.PIPE,
# env=env,
# text=True,
# bufsize=0,
# creationflags=subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
# )
# init_request = {
# "jsonrpc": "2.0",
# "id": 1,
# "method": "initialize",
# "params": {
# "protocolVersion": "2024-11-05",
# "capabilities": {},
# "clientInfo": {"name": "mcp-client", "version": "1.0.0"}
# }
# }
# process.stdin.write(json.dumps(init_request) + "\n")
# process.stdin.flush()
# init_response_line = process.stdout.readline().strip()
# if not init_response_line:
# raise Exception("No response from MCP server during initialization")
# init_response = json.loads(init_response_line)
# init_notification = {
# "jsonrpc": "2.0",
# "method": "notifications/initialized"
# }
# process.stdin.write(json.dumps(init_notification) + "\n")
# process.stdin.flush()
# tools_request = {
# "jsonrpc": "2.0",
# "id": 2,
# "method": "tools/list",
# "params": {}
# }
# process.stdin.write(json.dumps(tools_request) + "\n")
# process.stdin.flush()
# tools_response_line = process.stdout.readline().strip()
# if not tools_response_line:
# raise Exception("No response from MCP server for tools list")
# tools_response = json.loads(tools_response_line)
# tools_info = []
# if "result" in tools_response and "tools" in tools_response["result"]:
# for tool in tools_response["result"]["tools"]:
# tool_info = {
# "name": tool["name"],
# "description": tool.get("description", ""),
# "input_schema": tool.get("inputSchema", {})
# }
# tools_info.append(tool_info)
# return {
# "status": "connected",
# "transport": "stdio",
# "tools": tools_info
# }
# except subprocess.TimeoutExpired:
# return {
# "status": "error",
# "error": f"Process timeout after {timeout} seconds",
# "tools": []
# }
# except json.JSONDecodeError as e:
# return {
# "status": "error",
# "error": f"Invalid JSON response: {str(e)}",
# "tools": []
# }
# except Exception as e:
# return {
# "status": "error",
# "error": str(e),
# "tools": []
# }
# finally:
# try:
# if 'process' in locals():
# process.terminate()
# process.wait(timeout=5)
# except:
# pass
# async def connect_stdio_server_windows(server_name, server_config, all_tools, timeout):
# """Windows-compatible stdio connection using subprocess"""
# logger.info(f"Connecting to {server_name} using Windows subprocess method")
# command = server_config["command"]
# args = server_config.get("args", [])
# env_vars = server_config.get("env", {})
# loop = asyncio.get_event_loop()
# result = await loop.run_in_executor(
# windows_executor,
# run_mcp_stdio_sync,
# command,
# args,
# env_vars,
# timeout
# )
# all_tools[server_name] = result
# if result["status"] == "connected":
# logger.info(f" {server_name}: Connected via Windows subprocess ({len(result['tools'])} tools)")
# else:
# logger.error(f" {server_name}: Error - {result['error']}")
# async def list_mcp_tools_mixed_windows(config, timeout=15):
# all_tools = {}
# if "mcpServers" not in config:
# return all_tools
# mcp_servers = config["mcpServers"]
# for server_name, server_config in mcp_servers.items():
# logger.info(f"Connecting to MCP server: {server_name}")
# if server_config.get("disabled", False):
# all_tools[server_name] = {"status": "disabled", "tools": []}
# logger.info(f" {server_name}: Disabled")
# continue
# try:
# await connect_stdio_server_windows(server_name, server_config, all_tools, timeout)
# except asyncio.TimeoutError:
# all_tools[server_name] = {
# "status": "error",
# "error": f"Connection timeout after {timeout} seconds",
# "tools": []
# }
# logger.error(f" {server_name}: Timeout after {timeout} seconds")
# except Exception as e:
# error_msg = str(e)
# all_tools[server_name] = {
# "status": "error",
# "error": error_msg,
# "tools": []
# }
# logger.error(f" {server_name}: Error - {error_msg}")
# import traceback
# logger.debug(f"Full traceback for {server_name}: {traceback.format_exc()}")
# return all_tools
async def discover_custom_tools(request_type: str, config: Dict[str, Any]):
logger.info(f"Received custom MCP discovery request: type={request_type}")
logger.debug(f"Request config: {config}")
tools = []
server_name = None
# if request_type == 'json':
# try:
# all_tools = await list_mcp_tools_mixed_windows(config, timeout=30)
# if "mcpServers" in config and config["mcpServers"]:
# server_name = list(config["mcpServers"].keys())[0]
# if server_name in all_tools:
# server_info = all_tools[server_name]
# if server_info["status"] == "connected":
# tools = server_info["tools"]
# logger.info(f"Found {len(tools)} tools for server {server_name}")
# else:
# error_msg = server_info.get("error", "Unknown error")
# logger.error(f"Server {server_name} failed: {error_msg}")
# raise HTTPException(
# status_code=400,
# detail=f"Failed to connect to MCP server '{server_name}': {error_msg}"
# )
# else:
# logger.error(f"Server {server_name} not found in results")
# raise HTTPException(status_code=400, detail=f"Server '{server_name}' not found in results")
# else:
# logger.error("No MCP servers configured")
# raise HTTPException(status_code=400, detail="No MCP servers configured")
# except HTTPException:
# raise
# except Exception as e:
# logger.error(f"Error connecting to stdio MCP server: {e}")
# import traceback
# logger.error(f"Full traceback: {traceback.format_exc()}")
# raise HTTPException(status_code=400, detail=f"Failed to connect to MCP server: {str(e)}")
# if request_type == 'http':
# if 'url' not in config:
# raise HTTPException(status_code=400, detail="HTTP configuration must include 'url' field")
# url = config['url']
# await connect_streamable_http_server(url)
# tools = await connect_streamable_http_server(url)
# elif request_type == 'sse':
# if 'url' not in config:
# raise HTTPException(status_code=400, detail="SSE configuration must include 'url' field")
# url = config['url']
# headers = config.get('headers', {})
# try:
# async with asyncio.timeout(15):
# try:
# async with sse_client(url, headers=headers) as (read, write):
# async with ClientSession(read, write) as session:
# await session.initialize()
# tools_result = await session.list_tools()
# tools_info = []
# for tool in tools_result.tools:
# tool_info = {
# "name": tool.name,
# "description": tool.description,
# "input_schema": tool.inputSchema
# }
# tools_info.append(tool_info)
# for tool_info in tools_info:
# tools.append({
# "name": tool_info["name"],
# "description": tool_info["description"],
# "inputSchema": tool_info["input_schema"]
# })
# except TypeError as e:
# if "unexpected keyword argument" in str(e):
# async with sse_client(url) as (read, write):
# async with ClientSession(read, write) as session:
# await session.initialize()
# tools_result = await session.list_tools()
# tools_info = []
# for tool in tools_result.tools:
# tool_info = {
# "name": tool.name,
# "description": tool.description,
# "input_schema": tool.inputSchema
# }
# tools_info.append(tool_info)
# for tool_info in tools_info:
# tools.append({
# "name": tool_info["name"],
# "description": tool_info["description"],
# "inputSchema": tool_info["input_schema"]
# })
# else:
# raise
# except asyncio.TimeoutError:
# raise HTTPException(status_code=408, detail="Connection timeout - server took too long to respond")
# except Exception as e:
# logger.error(f"Error connecting to SSE MCP server: {e}")
# raise HTTPException(status_code=400, detail=f"Failed to connect to MCP server: {str(e)}")
# else:
# raise HTTPException(status_code=400, detail="Invalid server type. Must be 'json' or 'sse'")
# response_data = {"tools": tools, "count": len(tools)}
# if server_name:
# response_data["serverName"] = server_name
# logger.info(f"Returning {len(tools)} tools for server {server_name}")
# return response_data

View File

@ -136,10 +136,10 @@ export function FooterSection() {
className="block w-full h-48 md:h-64 relative mt-24 z-0 cursor-pointer"
>
<div className="absolute inset-0 bg-gradient-to-t from-transparent to-background z-10 from-40%" />
<div className="absolute inset-0 mx-6">
<div className="absolute inset-0 ">
<FlickeringGrid
text={tablet ? 'Agents Agents Agents' : 'Agents Agents Agents'}
fontSize={tablet ? 70 : 90}
text={tablet ? 'Agents' : 'Agents Agents Agents'}
fontSize={tablet ? 60 : 90}
className="h-full w-full"
squareSize={2}
gridGap={tablet ? 2 : 3}

View File

@ -7,7 +7,6 @@ import {
Link as LinkIcon,
MoreHorizontal,
Trash2,
Plus,
MessagesSquare,
Loader2,
Share2,
@ -383,97 +382,37 @@ export function NavAgents() {
<Trash2 className="h-4 w-4" />
</Button>
</>
) : (
<Tooltip>
<TooltipTrigger asChild>
<div>
<Link
href="/dashboard"
className="text-muted-foreground hover:text-foreground h-7 w-7 flex items-center justify-center rounded-md"
>
<Plus className="h-4 w-4" />
<span className="sr-only">New Agent</span>
</Link>
</div>
</TooltipTrigger>
<TooltipContent>New Agent</TooltipContent>
</Tooltip>
)}
) : null}
</div>
) : null}
</div>
<SidebarMenu className="overflow-y-auto max-h-[calc(100vh-200px)] [&::-webkit-scrollbar]:hidden [-ms-overflow-style:'none'] [scrollbar-width:'none']">
{state === 'collapsed' && (
<SidebarMenuItem>
<Tooltip>
<TooltipTrigger asChild>
<div>
<SidebarMenuButton asChild>
<Link href="/dashboard" className="flex items-center">
<Plus className="h-4 w-4" />
<span>New Agent</span>
</Link>
</SidebarMenuButton>
</div>
</TooltipTrigger>
<TooltipContent>New Agent</TooltipContent>
</Tooltip>
</SidebarMenuItem>
)}
{isLoading ? (
// Show skeleton loaders while loading
Array.from({ length: 3 }).map((_, index) => (
<SidebarMenuItem key={`skeleton-${index}`}>
<SidebarMenuButton>
<div className="h-4 w-4 bg-sidebar-foreground/10 rounded-md animate-pulse"></div>
<div className="h-3 bg-sidebar-foreground/10 rounded w-3/4 animate-pulse"></div>
</SidebarMenuButton>
</SidebarMenuItem>
))
) : combinedThreads.length > 0 ? (
// Show all threads with project info
{state !== 'collapsed' && (
<>
{combinedThreads.map((thread) => {
// Check if this thread is currently active
const isActive = pathname?.includes(thread.threadId) || false;
const isThreadLoading = loadingThreadId === thread.threadId;
const isSelected = selectedThreads.has(thread.threadId);
{isLoading ? (
// Show skeleton loaders while loading
Array.from({ length: 3 }).map((_, index) => (
<SidebarMenuItem key={`skeleton-${index}`}>
<SidebarMenuButton>
<div className="h-4 w-4 bg-sidebar-foreground/10 rounded-md animate-pulse"></div>
<div className="h-3 bg-sidebar-foreground/10 rounded w-3/4 animate-pulse"></div>
</SidebarMenuButton>
</SidebarMenuItem>
))
) : combinedThreads.length > 0 ? (
// Show all threads with project info
<>
{combinedThreads.map((thread) => {
// Check if this thread is currently active
const isActive = pathname?.includes(thread.threadId) || false;
const isThreadLoading = loadingThreadId === thread.threadId;
const isSelected = selectedThreads.has(thread.threadId);
return (
<SidebarMenuItem key={`thread-${thread.threadId}`} className="group">
{state === 'collapsed' ? (
<Tooltip>
<TooltipTrigger asChild>
<div>
<SidebarMenuButton
asChild
className={
isActive ? 'bg-accent text-accent-foreground' :
isSelected ? 'bg-primary/10' : ''
}
>
<Link
href={thread.url}
onClick={(e) =>
handleThreadClick(e, thread.threadId, thread.url)
}
>
{isThreadLoading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<MessagesSquare className="h-4 w-4" />
)}
<span>{thread.projectName}</span>
</Link>
</SidebarMenuButton>
</div>
</TooltipTrigger>
<TooltipContent>{thread.projectName}</TooltipContent>
</Tooltip>
) : (
<div className="relative">
return (
<SidebarMenuItem key={`thread-${thread.threadId}`} className="group/row">
<SidebarMenuButton
asChild
className={`relative ${isActive
@ -483,113 +422,101 @@ export function NavAgents() {
: ''
}`}
>
<Link
href={thread.url}
onClick={(e) =>
handleThreadClick(e, thread.threadId, thread.url)
}
className="flex items-center"
>
<div className="flex items-center group/icon relative">
{/* Show checkbox on hover or when selected, otherwise show MessagesSquare */}
{isThreadLoading ? (
<Loader2 className="h-4 w-4 animate-spin" />
) : (
<>
{/* MessagesSquare icon - hidden on hover if not selected */}
<MessagesSquare
className={`h-4 w-4 transition-opacity duration-150 ${isSelected ? 'opacity-0' : 'opacity-100 group-hover/icon:opacity-0'
}`}
/>
{/* Checkbox - appears on hover or when selected */}
<div
className={`absolute inset-0 flex items-center justify-center transition-opacity duration-150 ${isSelected
? 'opacity-100'
: 'opacity-0 group-hover/icon:opacity-100'
}`}
onClick={(e) => toggleThreadSelection(thread.threadId, e)}
>
<div
className={`h-4 w-4 border rounded cursor-pointer hover:bg-muted/50 transition-colors flex items-center justify-center ${isSelected
? 'bg-primary border-primary'
: 'border-muted-foreground/30 bg-background'
}`}
>
{isSelected && <Check className="h-3 w-3 text-primary-foreground" />}
</div>
</div>
</>
)}
</div>
<span className="ml-2">{thread.projectName}</span>
</Link>
</SidebarMenuButton>
</div>
)}
{state !== 'collapsed' && !isSelected && (
<DropdownMenu>
<DropdownMenuTrigger asChild>
<SidebarMenuAction
showOnHover
className="group-hover:opacity-100"
onClick={() => {
// Ensure pointer events are enabled when dropdown opens
document.body.style.pointerEvents = 'auto';
}}
>
<MoreHorizontal />
<span className="sr-only">More</span>
</SidebarMenuAction>
</DropdownMenuTrigger>
<DropdownMenuContent
className="w-56 rounded-lg"
side={isMobile ? 'bottom' : 'right'}
align={isMobile ? 'end' : 'start'}
>
<DropdownMenuItem onClick={() => {
setSelectedItem({ threadId: thread?.threadId, projectId: thread?.projectId })
setShowShareModal(true)
}}>
<Share2 className="text-muted-foreground" />
<span>Share Chat</span>
</DropdownMenuItem>
<DropdownMenuItem asChild>
<a
<div className="flex items-center w-full">
<Link
href={thread.url}
target="_blank"
rel="noopener noreferrer"
onClick={(e) =>
handleThreadClick(e, thread.threadId, thread.url)
}
className="flex items-center flex-1 min-w-0"
>
<ArrowUpRight className="text-muted-foreground" />
<span>Open in New Tab</span>
</a>
</DropdownMenuItem>
<DropdownMenuSeparator />
<DropdownMenuItem
onClick={() =>
handleDeleteThread(
thread.threadId,
thread.projectName,
)
}
>
<Trash2 className="text-muted-foreground" />
<span>Delete</span>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
)}
</SidebarMenuItem>
);
})}
{isThreadLoading ? (
<Loader2 className="h-4 w-4 animate-spin mr-2 flex-shrink-0" />
) : null}
<span className="truncate">{thread.projectName}</span>
</Link>
{/* Checkbox - only visible on hover of this specific area */}
<div
className="mr-1 flex-shrink-0 w-4 h-4 flex items-center justify-center group/checkbox"
onClick={(e) => toggleThreadSelection(thread.threadId, e)}
>
<div
className={`h-4 w-4 border rounded cursor-pointer transition-all duration-150 flex items-center justify-center ${isSelected
? 'opacity-100 bg-primary border-primary hover:bg-primary/90'
: 'opacity-0 group-hover/checkbox:opacity-100 border-muted-foreground/30 bg-background hover:bg-muted/50'
}`}
>
{isSelected && <Check className="h-3 w-3 text-primary-foreground" />}
</div>
</div>
{/* Dropdown Menu - inline with content */}
<DropdownMenu>
<DropdownMenuTrigger asChild>
<button
className="flex-shrink-0 w-4 h-4 flex items-center justify-center hover:bg-muted/50 rounded transition-all duration-150 text-muted-foreground hover:text-foreground opacity-0 group-hover/row:opacity-100"
onClick={(e) => {
e.preventDefault();
e.stopPropagation();
// Ensure pointer events are enabled when dropdown opens
document.body.style.pointerEvents = 'auto';
}}
>
<MoreHorizontal className="h-4 w-4" />
<span className="sr-only">More actions</span>
</button>
</DropdownMenuTrigger>
<DropdownMenuContent
className="w-56 rounded-lg"
side={isMobile ? 'bottom' : 'right'}
align={isMobile ? 'end' : 'start'}
>
<DropdownMenuItem onClick={() => {
setSelectedItem({ threadId: thread?.threadId, projectId: thread?.projectId })
setShowShareModal(true)
}}>
<Share2 className="text-muted-foreground" />
<span>Share Chat</span>
</DropdownMenuItem>
<DropdownMenuItem asChild>
<a
href={thread.url}
target="_blank"
rel="noopener noreferrer"
>
<ArrowUpRight className="text-muted-foreground" />
<span>Open in New Tab</span>
</a>
</DropdownMenuItem>
<DropdownMenuSeparator />
<DropdownMenuItem
onClick={() =>
handleDeleteThread(
thread.threadId,
thread.projectName,
)
}
>
<Trash2 className="text-muted-foreground" />
<span>Delete</span>
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
</div>
</SidebarMenuButton>
</SidebarMenuItem>
);
})}
</>
) : (
<SidebarMenuItem>
<SidebarMenuButton className="text-sidebar-foreground/70">
<span>No tasks yet</span>
</SidebarMenuButton>
</SidebarMenuItem>
)}
</>
) : (
<SidebarMenuItem>
<SidebarMenuButton className="text-sidebar-foreground/70">
<MessagesSquare className="h-4 w-4" />
<span>No tasks yet</span>
</SidebarMenuButton>
</SidebarMenuItem>
)}
</SidebarMenu>

View File

@ -2,7 +2,7 @@
import * as React from 'react';
import Link from 'next/link';
import { Bot, Menu, Store, Shield, Key, Workflow } from 'lucide-react';
import { Bot, Menu, Store, Shield, Key, Workflow, Plus } from 'lucide-react';
import { NavAgents } from '@/components/sidebar/nav-agents';
import { NavUserWithTeams } from '@/components/sidebar/nav-user-with-teams';
@ -132,22 +132,20 @@ export function SidebarLeft({
<SidebarContent className="[&::-webkit-scrollbar]:hidden [-ms-overflow-style:'none'] [scrollbar-width:'none']">
{!flagsLoading && (customAgentsEnabled || marketplaceEnabled) && (
<SidebarGroup>
{customAgentsEnabled && (
<Link href="/agents">
<SidebarMenuButton className={cn({
'bg-primary/10 font-medium': pathname === '/agents',
})}>
<Bot className="h-4 w-4 mr-2" />
<span className="flex items-center justify-between w-full">
Agent Playground
</span>
</SidebarMenuButton>
</Link>
)}
<Link href="/dashboard">
<SidebarMenuButton className={cn({
'bg-accent text-accent-foreground font-medium': pathname === '/dashboard',
})}>
<Plus className="h-4 w-4 mr-2" />
<span className="flex items-center justify-between w-full">
New Task
</span>
</SidebarMenuButton>
</Link>
{marketplaceEnabled && (
<Link href="/marketplace">
<SidebarMenuButton className={cn({
'bg-primary/10 font-medium': pathname === '/marketplace',
'bg-accent text-accent-foreground font-medium': pathname === '/marketplace',
})}>
<Store className="h-4 w-4 mr-2" />
<span className="flex items-center justify-between w-full">
@ -156,10 +154,22 @@ export function SidebarLeft({
</SidebarMenuButton>
</Link>
)}
{customAgentsEnabled && (
<Link href="/agents">
<SidebarMenuButton className={cn({
'bg-accent text-accent-foreground font-medium': pathname === '/agents',
})}>
<Bot className="h-4 w-4 mr-2" />
<span className="flex items-center justify-between w-full">
Agent Playground
</span>
</SidebarMenuButton>
</Link>
)}
{customAgentsEnabled && (
<Link href="/settings/credentials">
<SidebarMenuButton className={cn({
'bg-primary/10 font-medium': pathname === '/settings/credentials',
'bg-accent text-accent-foreground font-medium': pathname === '/settings/credentials',
})}>
<Key className="h-4 w-4 mr-2" />
<span className="flex items-center justify-between w-full">

View File

@ -311,7 +311,7 @@ export const ChatInput = forwardRef<ChatInputHandles, ChatInputProps>(
</div>
</Card>
{isAgentRunning && (
{/* {isAgentRunning && (
<motion.div
initial={{ opacity: 0, y: -10 }}
animate={{ opacity: 1, y: 0 }}
@ -322,7 +322,8 @@ export const ChatInput = forwardRef<ChatInputHandles, ChatInputProps>(
<span>{agentName ? `${agentName} is working...` : 'Suna is working...'}</span>
</div>
</motion.div>
)}
)} */}
</div>
);
},

View File

@ -264,7 +264,7 @@ export function SiteHeader({
</TooltipContent>
</Tooltip>
<Tooltip>
{/* <Tooltip>
<TooltipTrigger asChild>
<Button
variant="ghost"
@ -278,7 +278,7 @@ export function SiteHeader({
<TooltipContent>
<p>Toggle Computer Preview (CMD+I)</p>
</TooltipContent>
</Tooltip>
</Tooltip> */}
</TooltipProvider>
)}
</div>

View File

@ -143,7 +143,7 @@ export const siteConfig = {
hours: '2 hours',
features: [
'$20/month usage',
'+ $5 free included',
// '+ $5 free included',
'Private projects',
'More models',
],
@ -165,7 +165,7 @@ export const siteConfig = {
hours: '6 hours',
features: [
'$50/month usage',
'+ $5 free included',
// '+ $5 free included',
'Private projects',
'More models',
],
@ -186,7 +186,7 @@ export const siteConfig = {
hours: '12 hours',
features: [
'$100/month usage',
'+ $5 free included',
// '+ $5 free included',
'Private projects',
'Priority support',
],
@ -208,7 +208,7 @@ export const siteConfig = {
hours: '25 hours',
features: [
'$200/month usage',
'+ $5 free included',
// '+ $5 free included',
'Private projects',
'More models',
],
@ -229,7 +229,7 @@ export const siteConfig = {
hours: '50 hours',
features: [
'$400/month usage',
'+ $5 free included',
// '+ $5 free included',
'Private projects',
'Access to intelligent Model (Full Suna)',
'Priority support',
@ -253,7 +253,7 @@ export const siteConfig = {
hours: '125 hours',
features: [
'$800/month usage',
'+ $5 free included',
// '+ $5 free included',
'Private projects',
'Access to intelligent Model (Full Suna)',
'Priority support',
@ -278,7 +278,7 @@ export const siteConfig = {
hours: '200 hours',
features: [
'$1000/month usage',
'+ $5 free included',
// '+ $5 free included',
'Private projects',
'Access to intelligent Model (Full Suna)',
'Priority support',