mirror of https://github.com/kortix-ai/suna.git
Merge branch 'main' of https://github.com/kortix-ai/suna into fix-ui-bugs
This commit is contained in:
commit
c00bc82b2b
|
@ -88,9 +88,15 @@ class ContextManager:
|
||||||
else:
|
else:
|
||||||
return msg_content
|
return msg_content
|
||||||
|
|
||||||
def compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
|
def compress_tool_result_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000, uncompressed_total_token_count: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||||
"""Compress the tool result messages except the most recent one."""
|
"""Compress the tool result messages except the most recent one.
|
||||||
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
|
|
||||||
|
Compression is deterministic (simple truncation), ensuring consistent results across requests.
|
||||||
|
This allows prompt caching (applied later) to produce cache hits on identical compressed content.
|
||||||
|
"""
|
||||||
|
if uncompressed_total_token_count is None:
|
||||||
|
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
|
||||||
|
|
||||||
max_tokens_value = max_tokens or (100 * 1000)
|
max_tokens_value = max_tokens or (100 * 1000)
|
||||||
|
|
||||||
if uncompressed_total_token_count > max_tokens_value:
|
if uncompressed_total_token_count > max_tokens_value:
|
||||||
|
@ -112,9 +118,15 @@ class ContextManager:
|
||||||
msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
|
msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
def compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
|
def compress_user_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000, uncompressed_total_token_count: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||||
"""Compress the user messages except the most recent one."""
|
"""Compress the user messages except the most recent one.
|
||||||
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
|
|
||||||
|
Compression is deterministic (simple truncation), ensuring consistent results across requests.
|
||||||
|
This allows prompt caching (applied later) to produce cache hits on identical compressed content.
|
||||||
|
"""
|
||||||
|
if uncompressed_total_token_count is None:
|
||||||
|
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
|
||||||
|
|
||||||
max_tokens_value = max_tokens or (100 * 1000)
|
max_tokens_value = max_tokens or (100 * 1000)
|
||||||
|
|
||||||
if uncompressed_total_token_count > max_tokens_value:
|
if uncompressed_total_token_count > max_tokens_value:
|
||||||
|
@ -136,9 +148,15 @@ class ContextManager:
|
||||||
msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
|
msg["content"] = self.safe_truncate(msg["content"], int(max_tokens_value * 2))
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
def compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000) -> List[Dict[str, Any]]:
|
def compress_assistant_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int], token_threshold: int = 1000, uncompressed_total_token_count: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||||
"""Compress the assistant messages except the most recent one."""
|
"""Compress the assistant messages except the most recent one.
|
||||||
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
|
|
||||||
|
Compression is deterministic (simple truncation), ensuring consistent results across requests.
|
||||||
|
This allows prompt caching (applied later) to produce cache hits on identical compressed content.
|
||||||
|
"""
|
||||||
|
if uncompressed_total_token_count is None:
|
||||||
|
uncompressed_total_token_count = token_counter(model=llm_model, messages=messages)
|
||||||
|
|
||||||
max_tokens_value = max_tokens or (100 * 1000)
|
max_tokens_value = max_tokens or (100 * 1000)
|
||||||
|
|
||||||
if uncompressed_total_token_count > max_tokens_value:
|
if uncompressed_total_token_count > max_tokens_value:
|
||||||
|
@ -188,15 +206,10 @@ class ContextManager:
|
||||||
result.append(msg)
|
result.append(msg)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int] = 41000, token_threshold: int = 4096, max_iterations: int = 5) -> List[Dict[str, Any]]:
|
def compress_messages(self, messages: List[Dict[str, Any]], llm_model: str, max_tokens: Optional[int] = 41000, token_threshold: int = 4096, max_iterations: int = 5, actual_total_tokens: Optional[int] = None, system_prompt: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
|
||||||
"""Compress the messages.
|
"""Compress the messages WITHOUT applying caching during iterations.
|
||||||
|
|
||||||
Args:
|
Caching should be applied ONCE at the end by the caller, not during compression.
|
||||||
messages: List of messages to compress
|
|
||||||
llm_model: Model name for token counting
|
|
||||||
max_tokens: Maximum allowed tokens
|
|
||||||
token_threshold: Token threshold for individual message compression (must be a power of 2)
|
|
||||||
max_iterations: Maximum number of compression iterations
|
|
||||||
"""
|
"""
|
||||||
# Get model-specific token limits from constants
|
# Get model-specific token limits from constants
|
||||||
context_window = model_manager.get_context_window(llm_model)
|
context_window = model_manager.get_context_window(llm_model)
|
||||||
|
@ -218,24 +231,46 @@ class ContextManager:
|
||||||
result = messages
|
result = messages
|
||||||
result = self.remove_meta_messages(result)
|
result = self.remove_meta_messages(result)
|
||||||
|
|
||||||
uncompressed_total_token_count = token_counter(model=llm_model, messages=result)
|
# Calculate initial token count - just conversation + system prompt, NO caching overhead
|
||||||
|
print(f"actual_total_tokens: {actual_total_tokens}")
|
||||||
|
if actual_total_tokens is not None:
|
||||||
|
uncompressed_total_token_count = actual_total_tokens
|
||||||
|
else:
|
||||||
|
print("no actual_total_tokens")
|
||||||
|
# Count conversation + system prompt WITHOUT caching
|
||||||
|
if system_prompt:
|
||||||
|
uncompressed_total_token_count = token_counter(model=llm_model, messages=[system_prompt] + result)
|
||||||
|
else:
|
||||||
|
uncompressed_total_token_count = token_counter(model=llm_model, messages=result)
|
||||||
|
logger.info(f"Initial token count (no caching): {uncompressed_total_token_count}")
|
||||||
|
|
||||||
result = self.compress_tool_result_messages(result, llm_model, max_tokens, token_threshold)
|
# Apply compression
|
||||||
result = self.compress_user_messages(result, llm_model, max_tokens, token_threshold)
|
result = self.compress_tool_result_messages(result, llm_model, max_tokens, token_threshold, uncompressed_total_token_count)
|
||||||
result = self.compress_assistant_messages(result, llm_model, max_tokens, token_threshold)
|
result = self.compress_user_messages(result, llm_model, max_tokens, token_threshold, uncompressed_total_token_count)
|
||||||
|
result = self.compress_assistant_messages(result, llm_model, max_tokens, token_threshold, uncompressed_total_token_count)
|
||||||
|
|
||||||
compressed_token_count = token_counter(model=llm_model, messages=result)
|
# Recalculate WITHOUT caching overhead
|
||||||
|
if system_prompt:
|
||||||
logger.info(f"Context compression: {uncompressed_total_token_count} -> {compressed_token_count} tokens")
|
compressed_total = token_counter(model=llm_model, messages=[system_prompt] + result)
|
||||||
|
else:
|
||||||
|
compressed_total = token_counter(model=llm_model, messages=result)
|
||||||
|
|
||||||
|
logger.info(f"Context compression: {uncompressed_total_token_count} -> {compressed_total} token")
|
||||||
|
|
||||||
|
# Recurse if still too large
|
||||||
if max_iterations <= 0:
|
if max_iterations <= 0:
|
||||||
logger.warning(f"compress_messages: Max iterations reached, omitting messages")
|
logger.warning(f"Max iterations reached, omitting messages")
|
||||||
result = self.compress_messages_by_omitting_messages(messages, llm_model, max_tokens)
|
result = self.compress_messages_by_omitting_messages(result, llm_model, max_tokens, system_prompt=system_prompt)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
if compressed_token_count > max_tokens:
|
if compressed_total > max_tokens:
|
||||||
logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}")
|
logger.warning(f"Further compression needed: {compressed_total} > {max_tokens}")
|
||||||
result = self.compress_messages(messages, llm_model, max_tokens, token_threshold // 2, max_iterations - 1)
|
# Recursive call - still NO caching
|
||||||
|
result = self.compress_messages(
|
||||||
|
result, llm_model, max_tokens,
|
||||||
|
token_threshold // 2, max_iterations - 1,
|
||||||
|
compressed_total, system_prompt,
|
||||||
|
)
|
||||||
|
|
||||||
return self.middle_out_messages(result)
|
return self.middle_out_messages(result)
|
||||||
|
|
||||||
|
@ -245,7 +280,8 @@ class ContextManager:
|
||||||
llm_model: str,
|
llm_model: str,
|
||||||
max_tokens: Optional[int] = 41000,
|
max_tokens: Optional[int] = 41000,
|
||||||
removal_batch_size: int = 10,
|
removal_batch_size: int = 10,
|
||||||
min_messages_to_keep: int = 10
|
min_messages_to_keep: int = 10,
|
||||||
|
system_prompt: Optional[Dict[str, Any]] = None
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""Compress the messages by omitting messages from the middle.
|
"""Compress the messages by omitting messages from the middle.
|
||||||
|
|
||||||
|
@ -263,15 +299,19 @@ class ContextManager:
|
||||||
result = self.remove_meta_messages(result)
|
result = self.remove_meta_messages(result)
|
||||||
|
|
||||||
# Early exit if no compression needed
|
# Early exit if no compression needed
|
||||||
initial_token_count = token_counter(model=llm_model, messages=result)
|
if system_prompt:
|
||||||
|
initial_token_count = token_counter(model=llm_model, messages=[system_prompt] + result)
|
||||||
|
else:
|
||||||
|
initial_token_count = token_counter(model=llm_model, messages=result)
|
||||||
|
|
||||||
max_allowed_tokens = max_tokens or (100 * 1000)
|
max_allowed_tokens = max_tokens or (100 * 1000)
|
||||||
|
|
||||||
if initial_token_count <= max_allowed_tokens:
|
if initial_token_count <= max_allowed_tokens:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Separate system message (assumed to be first) from conversation messages
|
# Separate system message (assumed to be first) from conversation messages
|
||||||
system_message = messages[0] if messages and isinstance(messages[0], dict) and messages[0].get('role') == 'system' else None
|
system_message = system_prompt
|
||||||
conversation_messages = result[1:] if system_message else result
|
conversation_messages = result
|
||||||
|
|
||||||
safety_limit = 500
|
safety_limit = 500
|
||||||
current_token_count = initial_token_count
|
current_token_count = initial_token_count
|
||||||
|
@ -302,9 +342,14 @@ class ContextManager:
|
||||||
messages_to_count = ([system_message] + conversation_messages) if system_message else conversation_messages
|
messages_to_count = ([system_message] + conversation_messages) if system_message else conversation_messages
|
||||||
current_token_count = token_counter(model=llm_model, messages=messages_to_count)
|
current_token_count = token_counter(model=llm_model, messages=messages_to_count)
|
||||||
|
|
||||||
# Prepare final result
|
# Prepare final result - return only conversation messages (matches compress_messages pattern)
|
||||||
final_messages = ([system_message] + conversation_messages) if system_message else conversation_messages
|
final_messages = conversation_messages
|
||||||
final_token_count = token_counter(model=llm_model, messages=final_messages)
|
|
||||||
|
# Log with system prompt included for accurate token reporting
|
||||||
|
if system_message:
|
||||||
|
final_token_count = token_counter(model=llm_model, messages=[system_message] + final_messages)
|
||||||
|
else:
|
||||||
|
final_token_count = token_counter(model=llm_model, messages=final_messages)
|
||||||
|
|
||||||
logger.info(f"Context compression (omit): {initial_token_count} -> {final_token_count} tokens ({len(messages)} -> {len(final_messages)} messages)")
|
logger.info(f"Context compression (omit): {initial_token_count} -> {final_token_count} tokens ({len(messages)} -> {len(final_messages)} messages)")
|
||||||
|
|
||||||
|
|
|
@ -232,6 +232,12 @@ def apply_anthropic_caching_strategy(
|
||||||
This prevents cache invalidation while optimizing for context window utilization
|
This prevents cache invalidation while optimizing for context window utilization
|
||||||
and cost efficiency across different conversation patterns.
|
and cost efficiency across different conversation patterns.
|
||||||
"""
|
"""
|
||||||
|
# DEBUG: Count message roles to verify tool results are included
|
||||||
|
message_roles = [msg.get('role', 'unknown') for msg in conversation_messages]
|
||||||
|
role_counts = {}
|
||||||
|
for role in message_roles:
|
||||||
|
role_counts[role] = role_counts.get(role, 0) + 1
|
||||||
|
logger.debug(f"🔍 CACHING INPUT: {len(conversation_messages)} messages - Roles: {role_counts}")
|
||||||
if not conversation_messages:
|
if not conversation_messages:
|
||||||
conversation_messages = []
|
conversation_messages = []
|
||||||
|
|
||||||
|
@ -256,10 +262,15 @@ def apply_anthropic_caching_strategy(
|
||||||
|
|
||||||
# Calculate mathematically optimized cache threshold
|
# Calculate mathematically optimized cache threshold
|
||||||
if cache_threshold_tokens is None:
|
if cache_threshold_tokens is None:
|
||||||
|
# Include system prompt tokens in calculation for accurate density (like compression does)
|
||||||
|
# Use token_counter on combined messages to match compression's calculation method
|
||||||
|
from litellm import token_counter
|
||||||
|
total_tokens = token_counter(model=model_name, messages=[working_system_prompt] + conversation_messages) if conversation_messages else 0
|
||||||
|
|
||||||
cache_threshold_tokens = calculate_optimal_cache_threshold(
|
cache_threshold_tokens = calculate_optimal_cache_threshold(
|
||||||
context_window_tokens,
|
context_window_tokens,
|
||||||
len(conversation_messages),
|
len(conversation_messages),
|
||||||
get_messages_token_count(conversation_messages, model_name) if conversation_messages else 0
|
total_tokens # Now includes system prompt for accurate density calculation
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(f"📊 Applying single cache breakpoint strategy for {len(conversation_messages)} messages")
|
logger.info(f"📊 Applying single cache breakpoint strategy for {len(conversation_messages)} messages")
|
||||||
|
@ -307,6 +318,7 @@ def apply_anthropic_caching_strategy(
|
||||||
max_cacheable_tokens = int(context_window_tokens * 0.8)
|
max_cacheable_tokens = int(context_window_tokens * 0.8)
|
||||||
|
|
||||||
if total_conversation_tokens <= max_cacheable_tokens:
|
if total_conversation_tokens <= max_cacheable_tokens:
|
||||||
|
logger.debug(f"Conversation fits within cache limits - use chunked approach")
|
||||||
# Conversation fits within cache limits - use chunked approach
|
# Conversation fits within cache limits - use chunked approach
|
||||||
chunks_created = create_conversation_chunks(
|
chunks_created = create_conversation_chunks(
|
||||||
conversation_messages,
|
conversation_messages,
|
||||||
|
@ -350,6 +362,7 @@ def create_conversation_chunks(
|
||||||
Final messages are NEVER cached to prevent cache invalidation.
|
Final messages are NEVER cached to prevent cache invalidation.
|
||||||
Returns number of cache blocks created.
|
Returns number of cache blocks created.
|
||||||
"""
|
"""
|
||||||
|
logger.debug(f"Creating conversation chunks - chunk threshold: {chunk_threshold_tokens}, max blocks: {max_blocks}")
|
||||||
if not messages or max_blocks <= 0:
|
if not messages or max_blocks <= 0:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ from langfuse.client import StatefulGenerationClient, StatefulTraceClient
|
||||||
from core.services.langfuse import langfuse
|
from core.services.langfuse import langfuse
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from core.billing.billing_integration import billing_integration
|
from core.billing.billing_integration import billing_integration
|
||||||
|
from litellm.utils import token_counter
|
||||||
|
|
||||||
ToolChoice = Literal["auto", "required", "none"]
|
ToolChoice = Literal["auto", "required", "none"]
|
||||||
|
|
||||||
|
@ -305,8 +306,11 @@ class ThreadManager:
|
||||||
if ENABLE_CONTEXT_MANAGER:
|
if ENABLE_CONTEXT_MANAGER:
|
||||||
logger.debug(f"Context manager enabled, compressing {len(messages)} messages")
|
logger.debug(f"Context manager enabled, compressing {len(messages)} messages")
|
||||||
context_manager = ContextManager()
|
context_manager = ContextManager()
|
||||||
|
|
||||||
compressed_messages = context_manager.compress_messages(
|
compressed_messages = context_manager.compress_messages(
|
||||||
messages, llm_model, max_tokens=llm_max_tokens
|
messages, llm_model, max_tokens=llm_max_tokens,
|
||||||
|
actual_total_tokens=None, # Will be calculated inside
|
||||||
|
system_prompt=system_prompt # KEY FIX: No caching during compression
|
||||||
)
|
)
|
||||||
logger.debug(f"Context compression completed: {len(messages)} -> {len(compressed_messages)} messages")
|
logger.debug(f"Context compression completed: {len(messages)} -> {len(compressed_messages)} messages")
|
||||||
messages = compressed_messages
|
messages = compressed_messages
|
||||||
|
@ -340,6 +344,10 @@ class ThreadManager:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to update Langfuse generation: {e}")
|
logger.warning(f"Failed to update Langfuse generation: {e}")
|
||||||
|
|
||||||
|
# Log final prepared messages token count
|
||||||
|
final_prepared_tokens = token_counter(model=llm_model, messages=prepared_messages)
|
||||||
|
logger.info(f"📤 Final prepared messages being sent to LLM: {final_prepared_tokens} tokens")
|
||||||
|
|
||||||
# Make LLM call
|
# Make LLM call
|
||||||
try:
|
try:
|
||||||
llm_response = await make_llm_api_call(
|
llm_response = await make_llm_api_call(
|
||||||
|
|
|
@ -53,26 +53,52 @@ export function renderAttachments(attachments: string[], fileViewerHandler?: (fi
|
||||||
// Render Markdown content while preserving XML tags that should be displayed as tool calls
|
// Render Markdown content while preserving XML tags that should be displayed as tool calls
|
||||||
function preprocessTextOnlyTools(content: string): string {
|
function preprocessTextOnlyTools(content: string): string {
|
||||||
console.log('🔍 preprocessTextOnlyTools called with:', typeof content, content);
|
console.log('🔍 preprocessTextOnlyTools called with:', typeof content, content);
|
||||||
|
|
||||||
if (!content || typeof content !== 'string') {
|
if (!content || typeof content !== 'string') {
|
||||||
console.warn('❌ preprocessTextOnlyTools: Invalid content type:', typeof content, content);
|
console.warn('❌ preprocessTextOnlyTools: Invalid content type:', typeof content, content);
|
||||||
return content || '';
|
return content || '';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle new function calls format for text-only tools - extract text parameter content
|
// For ask/complete tools, we need to preserve them if they have attachments
|
||||||
// Complete XML format
|
// Only strip them if they don't have attachments parameter
|
||||||
content = content.replace(/<function_calls>\s*<invoke name="ask">\s*<parameter name="text">([\s\S]*?)<\/parameter>[\s\S]*?<\/invoke>\s*<\/function_calls>/gi, '$1');
|
|
||||||
content = content.replace(/<function_calls>\s*<invoke name="complete">\s*<parameter name="text">([\s\S]*?)<\/parameter>[\s\S]*?<\/invoke>\s*<\/function_calls>/gi, '$1');
|
// Handle new function calls format - only strip if no attachments
|
||||||
|
content = content.replace(/<function_calls>\s*<invoke name="ask">\s*<parameter name="text">([\s\S]*?)<\/parameter>\s*<\/invoke>\s*<\/function_calls>/gi, (match) => {
|
||||||
|
if (match.includes('<parameter name="attachments"')) return match;
|
||||||
|
return match.replace(/<function_calls>\s*<invoke name="ask">\s*<parameter name="text">([\s\S]*?)<\/parameter>\s*<\/invoke>\s*<\/function_calls>/gi, '$1');
|
||||||
|
});
|
||||||
|
|
||||||
|
content = content.replace(/<function_calls>\s*<invoke name="complete">\s*<parameter name="text">([\s\S]*?)<\/parameter>\s*<\/invoke>\s*<\/function_calls>/gi, (match) => {
|
||||||
|
if (match.includes('<parameter name="attachments"')) return match;
|
||||||
|
return match.replace(/<function_calls>\s*<invoke name="complete">\s*<parameter name="text">([\s\S]*?)<\/parameter>\s*<\/invoke>\s*<\/function_calls>/gi, '$1');
|
||||||
|
});
|
||||||
|
|
||||||
content = content.replace(/<function_calls>\s*<invoke name="present_presentation">[\s\S]*?<parameter name="text">([\s\S]*?)<\/parameter>[\s\S]*?<\/invoke>\s*<\/function_calls>/gi, '$1');
|
content = content.replace(/<function_calls>\s*<invoke name="present_presentation">[\s\S]*?<parameter name="text">([\s\S]*?)<\/parameter>[\s\S]*?<\/invoke>\s*<\/function_calls>/gi, '$1');
|
||||||
|
|
||||||
// Handle streaming/partial XML for message tools - extract text parameter content even if incomplete
|
// Handle streaming/partial XML for message tools - only strip if no attachments visible yet
|
||||||
content = content.replace(/<function_calls>\s*<invoke name="ask">\s*<parameter name="text">([\s\S]*?)$/gi, '$1');
|
content = content.replace(/<function_calls>\s*<invoke name="ask">\s*<parameter name="text">([\s\S]*?)$/gi, (match) => {
|
||||||
content = content.replace(/<function_calls>\s*<invoke name="complete">\s*<parameter name="text">([\s\S]*?)$/gi, '$1');
|
if (match.includes('<parameter name="attachments"')) return match;
|
||||||
|
return match.replace(/<function_calls>\s*<invoke name="ask">\s*<parameter name="text">([\s\S]*?)$/gi, '$1');
|
||||||
|
});
|
||||||
|
|
||||||
|
content = content.replace(/<function_calls>\s*<invoke name="complete">\s*<parameter name="text">([\s\S]*?)$/gi, (match) => {
|
||||||
|
if (match.includes('<parameter name="attachments"')) return match;
|
||||||
|
return match.replace(/<function_calls>\s*<invoke name="complete">\s*<parameter name="text">([\s\S]*?)$/gi, '$1');
|
||||||
|
});
|
||||||
|
|
||||||
content = content.replace(/<function_calls>\s*<invoke name="present_presentation">[\s\S]*?<parameter name="text">([\s\S]*?)$/gi, '$1');
|
content = content.replace(/<function_calls>\s*<invoke name="present_presentation">[\s\S]*?<parameter name="text">([\s\S]*?)$/gi, '$1');
|
||||||
|
|
||||||
// Also handle old format for backward compatibility
|
// Also handle old format - only strip if no attachments attribute
|
||||||
content = content.replace(/<ask[^>]*>([\s\S]*?)<\/ask>/gi, '$1');
|
content = content.replace(/<ask[^>]*>([\s\S]*?)<\/ask>/gi, (match) => {
|
||||||
content = content.replace(/<complete[^>]*>([\s\S]*?)<\/complete>/gi, '$1');
|
if (match.match(/<ask[^>]*attachments=/i)) return match;
|
||||||
|
return match.replace(/<ask[^>]*>([\s\S]*?)<\/ask>/gi, '$1');
|
||||||
|
});
|
||||||
|
|
||||||
|
content = content.replace(/<complete[^>]*>([\s\S]*?)<\/complete>/gi, (match) => {
|
||||||
|
if (match.match(/<complete[^>]*attachments=/i)) return match;
|
||||||
|
return match.replace(/<complete[^>]*>([\s\S]*?)<\/complete>/gi, '$1');
|
||||||
|
});
|
||||||
|
|
||||||
content = content.replace(/<present_presentation[^>]*>([\s\S]*?)<\/present_presentation>/gi, '$1');
|
content = content.replace(/<present_presentation[^>]*>([\s\S]*?)<\/present_presentation>/gi, '$1');
|
||||||
return content;
|
return content;
|
||||||
}
|
}
|
||||||
|
@ -88,7 +114,7 @@ export function renderMarkdownContent(
|
||||||
) {
|
) {
|
||||||
// Preprocess content to convert text-only tools to natural text
|
// Preprocess content to convert text-only tools to natural text
|
||||||
content = preprocessTextOnlyTools(content);
|
content = preprocessTextOnlyTools(content);
|
||||||
|
|
||||||
// If in debug mode, just display raw content in a pre tag
|
// If in debug mode, just display raw content in a pre tag
|
||||||
if (debugMode) {
|
if (debugMode) {
|
||||||
return (
|
return (
|
||||||
|
@ -139,7 +165,7 @@ export function renderMarkdownContent(
|
||||||
{renderAttachments(attachmentArray, fileViewerHandler, sandboxId, project)}
|
{renderAttachments(attachmentArray, fileViewerHandler, sandboxId, project)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
||||||
// Also render standalone attachments outside the message
|
// Also render standalone attachments outside the message
|
||||||
const standaloneAttachments = renderStandaloneAttachments(attachmentArray, fileViewerHandler, sandboxId, project);
|
const standaloneAttachments = renderStandaloneAttachments(attachmentArray, fileViewerHandler, sandboxId, project);
|
||||||
if (standaloneAttachments) {
|
if (standaloneAttachments) {
|
||||||
|
@ -165,7 +191,7 @@ export function renderMarkdownContent(
|
||||||
{renderAttachments(attachmentArray, fileViewerHandler, sandboxId, project)}
|
{renderAttachments(attachmentArray, fileViewerHandler, sandboxId, project)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
||||||
// Also render standalone attachments outside the message
|
// Also render standalone attachments outside the message
|
||||||
const standaloneAttachments = renderStandaloneAttachments(attachmentArray, fileViewerHandler, sandboxId, project);
|
const standaloneAttachments = renderStandaloneAttachments(attachmentArray, fileViewerHandler, sandboxId, project);
|
||||||
if (standaloneAttachments) {
|
if (standaloneAttachments) {
|
||||||
|
@ -268,7 +294,7 @@ export function renderMarkdownContent(
|
||||||
{renderAttachments(attachments, fileViewerHandler, sandboxId, project)}
|
{renderAttachments(attachments, fileViewerHandler, sandboxId, project)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
||||||
// Also render standalone attachments outside the message
|
// Also render standalone attachments outside the message
|
||||||
const standaloneAttachments = renderStandaloneAttachments(attachments, fileViewerHandler, sandboxId, project);
|
const standaloneAttachments = renderStandaloneAttachments(attachments, fileViewerHandler, sandboxId, project);
|
||||||
if (standaloneAttachments) {
|
if (standaloneAttachments) {
|
||||||
|
@ -296,7 +322,7 @@ export function renderMarkdownContent(
|
||||||
{renderAttachments(attachments, fileViewerHandler, sandboxId, project)}
|
{renderAttachments(attachments, fileViewerHandler, sandboxId, project)}
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
||||||
// Also render standalone attachments outside the message
|
// Also render standalone attachments outside the message
|
||||||
const standaloneAttachments = renderStandaloneAttachments(attachments, fileViewerHandler, sandboxId, project);
|
const standaloneAttachments = renderStandaloneAttachments(attachments, fileViewerHandler, sandboxId, project);
|
||||||
if (standaloneAttachments) {
|
if (standaloneAttachments) {
|
||||||
|
@ -671,13 +697,13 @@ export const ThreadContent: React.FC<ThreadContentProps> = ({
|
||||||
// Use merged groups instead of original grouped messages
|
// Use merged groups instead of original grouped messages
|
||||||
const finalGroupedMessages = mergedGroups;
|
const finalGroupedMessages = mergedGroups;
|
||||||
|
|
||||||
|
|
||||||
// Helper function to add streaming content to groups
|
// Helper function to add streaming content to groups
|
||||||
const appendStreamingContent = (content: string, isPlayback: boolean = false) => {
|
const appendStreamingContent = (content: string, isPlayback: boolean = false) => {
|
||||||
const messageId = isPlayback ? 'playbackStreamingText' : 'streamingTextContent';
|
const messageId = isPlayback ? 'playbackStreamingText' : 'streamingTextContent';
|
||||||
const metadata = isPlayback ? 'playbackStreamingText' : 'streamingTextContent';
|
const metadata = isPlayback ? 'playbackStreamingText' : 'streamingTextContent';
|
||||||
const keySuffix = isPlayback ? 'playback-streaming' : 'streaming';
|
const keySuffix = isPlayback ? 'playback-streaming' : 'streaming';
|
||||||
|
|
||||||
const lastGroup = finalGroupedMessages.at(-1);
|
const lastGroup = finalGroupedMessages.at(-1);
|
||||||
if (!lastGroup || lastGroup.type === 'user') {
|
if (!lastGroup || lastGroup.type === 'user') {
|
||||||
// Create new assistant group for streaming content
|
// Create new assistant group for streaming content
|
||||||
|
@ -770,7 +796,7 @@ export const ThreadContent: React.FC<ThreadContentProps> = ({
|
||||||
<div key={group.key} className="space-y-3">
|
<div key={group.key} className="space-y-3">
|
||||||
{/* All file attachments rendered outside message bubble */}
|
{/* All file attachments rendered outside message bubble */}
|
||||||
{renderStandaloneAttachments(attachments as string[], handleOpenFileViewer, sandboxId, project, true)}
|
{renderStandaloneAttachments(attachments as string[], handleOpenFileViewer, sandboxId, project, true)}
|
||||||
|
|
||||||
<div className="flex justify-end">
|
<div className="flex justify-end">
|
||||||
<div className="flex max-w-[85%] rounded-3xl rounded-br-lg bg-card border px-4 py-3 break-words overflow-hidden">
|
<div className="flex max-w-[85%] rounded-3xl rounded-br-lg bg-card border px-4 py-3 break-words overflow-hidden">
|
||||||
<div className="space-y-3 min-w-0 flex-1">
|
<div className="space-y-3 min-w-0 flex-1">
|
||||||
|
@ -789,7 +815,7 @@ export const ThreadContent: React.FC<ThreadContentProps> = ({
|
||||||
// Get agent_id from the first assistant message in this group
|
// Get agent_id from the first assistant message in this group
|
||||||
const firstAssistantMsg = group.messages.find(m => m.type === 'assistant');
|
const firstAssistantMsg = group.messages.find(m => m.type === 'assistant');
|
||||||
const groupAgentId = firstAssistantMsg?.agent_id;
|
const groupAgentId = firstAssistantMsg?.agent_id;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<div key={group.key} ref={groupIndex === groupedMessages.length - 1 ? latestMessageRef : null}>
|
<div key={group.key} ref={groupIndex === groupedMessages.length - 1 ? latestMessageRef : null}>
|
||||||
<div className="flex flex-col gap-2">
|
<div className="flex flex-col gap-2">
|
||||||
|
@ -903,7 +929,7 @@ export const ThreadContent: React.FC<ThreadContentProps> = ({
|
||||||
|
|
||||||
// Preprocess content first to remove text-only tool tags
|
// Preprocess content first to remove text-only tool tags
|
||||||
const textToRender = preprocessTextOnlyTools(streamingTextContent || '');
|
const textToRender = preprocessTextOnlyTools(streamingTextContent || '');
|
||||||
|
|
||||||
let detectedTag: string | null = null;
|
let detectedTag: string | null = null;
|
||||||
let tagStartIndex = -1;
|
let tagStartIndex = -1;
|
||||||
if (textToRender) {
|
if (textToRender) {
|
||||||
|
@ -927,11 +953,11 @@ export const ThreadContent: React.FC<ThreadContentProps> = ({
|
||||||
}
|
}
|
||||||
const textBeforeTag = detectedTag ? textToRender.substring(0, tagStartIndex) : textToRender;
|
const textBeforeTag = detectedTag ? textToRender.substring(0, tagStartIndex) : textToRender;
|
||||||
const showCursor =
|
const showCursor =
|
||||||
(streamHookStatus ===
|
(streamHookStatus ===
|
||||||
'streaming' ||
|
'streaming' ||
|
||||||
streamHookStatus ===
|
streamHookStatus ===
|
||||||
'connecting') &&
|
'connecting') &&
|
||||||
!detectedTag;
|
!detectedTag;
|
||||||
|
|
||||||
// Show minimal processing indicator when agent is active but no streaming text after preprocessing
|
// Show minimal processing indicator when agent is active but no streaming text after preprocessing
|
||||||
if (!textToRender && (streamHookStatus === 'streaming' || streamHookStatus === 'connecting')) {
|
if (!textToRender && (streamHookStatus === 'streaming' || streamHookStatus === 'connecting')) {
|
||||||
|
@ -946,8 +972,8 @@ export const ThreadContent: React.FC<ThreadContentProps> = ({
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<StreamingText
|
<StreamingText
|
||||||
content={textBeforeTag}
|
content={textBeforeTag}
|
||||||
className="text-sm prose prose-sm dark:prose-invert chat-markdown max-w-none [&>:first-child]:mt-0 prose-headings:mt-3 break-words overflow-wrap-anywhere"
|
className="text-sm prose prose-sm dark:prose-invert chat-markdown max-w-none [&>:first-child]:mt-0 prose-headings:mt-3 break-words overflow-wrap-anywhere"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
@ -972,7 +998,7 @@ export const ThreadContent: React.FC<ThreadContentProps> = ({
|
||||||
{(() => {
|
{(() => {
|
||||||
// Preprocess content first to remove text-only tool tags
|
// Preprocess content first to remove text-only tool tags
|
||||||
const textToRender = preprocessTextOnlyTools(streamingText || '');
|
const textToRender = preprocessTextOnlyTools(streamingText || '');
|
||||||
|
|
||||||
let detectedTag: string | null = null;
|
let detectedTag: string | null = null;
|
||||||
let tagStartIndex = -1;
|
let tagStartIndex = -1;
|
||||||
if (textToRender) {
|
if (textToRender) {
|
||||||
|
@ -1009,8 +1035,8 @@ export const ThreadContent: React.FC<ThreadContentProps> = ({
|
||||||
</pre>
|
</pre>
|
||||||
) : (
|
) : (
|
||||||
<>
|
<>
|
||||||
<StreamingText
|
<StreamingText
|
||||||
content={textBeforeTag}
|
content={textBeforeTag}
|
||||||
className="text-sm prose prose-sm dark:prose-invert chat-markdown max-w-none [&>:first-child]:mt-0 prose-headings:mt-3 break-words overflow-wrap-anywhere"
|
className="text-sm prose prose-sm dark:prose-invert chat-markdown max-w-none [&>:first-child]:mt-0 prose-headings:mt-3 break-words overflow-wrap-anywhere"
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue