mirror of https://github.com/kortix-ai/suna.git
Merge pull request #83 => improve cache-control to handle tmp messages
improve cache-control to handle tmp messages
This commit is contained in:
commit
eda0cee931
|
@ -854,6 +854,7 @@ async def generate_and_update_project_name(project_id: str, prompt: str):
|
|||
logger.warning(f"Failed to get valid response from LLM for project {project_id} naming. Response: {response}")
|
||||
|
||||
# Update database if name was generated
|
||||
|
||||
if generated_name:
|
||||
update_result = await client.table('projects') \
|
||||
.update({"name": generated_name}) \
|
||||
|
|
|
@ -160,12 +160,10 @@ def prepare_params(
|
|||
# Check model name *after* potential modifications (like adding bedrock/ prefix)
|
||||
effective_model_name = params.get("model", model_name) # Use model from params if set, else original
|
||||
if "claude" in effective_model_name.lower() or "anthropic" in effective_model_name.lower():
|
||||
logger.debug("Applying minimal Anthropic prompt caching.")
|
||||
messages = params["messages"] # Direct reference, modification affects params
|
||||
|
||||
# Ensure messages is a list
|
||||
if not isinstance(messages, list):
|
||||
logger.warning(f"Messages is not a list ({type(messages)}), skipping Anthropic cache control.")
|
||||
return params # Return early if messages format is unexpected
|
||||
|
||||
# 1. Process the first message if it's a system prompt with string content
|
||||
|
@ -176,7 +174,6 @@ def prepare_params(
|
|||
messages[0]["content"] = [
|
||||
{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}
|
||||
]
|
||||
logger.debug("Applied cache_control to system message (converted from string).")
|
||||
elif isinstance(content, list):
|
||||
# If content is already a list, check if the first text block needs cache_control
|
||||
for item in content:
|
||||
|
@ -184,36 +181,49 @@ def prepare_params(
|
|||
if "cache_control" not in item:
|
||||
item["cache_control"] = {"type": "ephemeral"}
|
||||
break # Apply to the first text block only for system prompt
|
||||
else:
|
||||
logger.warning("System message content is not a string or list, skipping cache_control.")
|
||||
|
||||
# 2. Find and process the last user message
|
||||
# 2. Find and process relevant user and assistant messages
|
||||
last_user_idx = -1
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
if messages[i].get("role") == "user":
|
||||
last_user_idx = i
|
||||
break
|
||||
second_last_user_idx = -1
|
||||
last_assistant_idx = -1
|
||||
|
||||
if last_user_idx != -1:
|
||||
last_user_message = messages[last_user_idx]
|
||||
content = last_user_message.get("content")
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
role = messages[i].get("role")
|
||||
if role == "user":
|
||||
if last_user_idx == -1:
|
||||
last_user_idx = i
|
||||
elif second_last_user_idx == -1:
|
||||
second_last_user_idx = i
|
||||
elif role == "assistant":
|
||||
if last_assistant_idx == -1:
|
||||
last_assistant_idx = i
|
||||
|
||||
# Stop searching if we've found all needed messages
|
||||
if last_user_idx != -1 and second_last_user_idx != -1 and last_assistant_idx != -1:
|
||||
break
|
||||
|
||||
# Helper function to apply cache control
|
||||
def apply_cache_control(message_idx: int, message_role: str):
|
||||
if message_idx == -1:
|
||||
return
|
||||
|
||||
message = messages[message_idx]
|
||||
content = message.get("content")
|
||||
|
||||
if isinstance(content, str):
|
||||
# Wrap the string content in the required list structure
|
||||
last_user_message["content"] = [
|
||||
message["content"] = [
|
||||
{"type": "text", "text": content, "cache_control": {"type": "ephemeral"}}
|
||||
]
|
||||
logger.debug(f"Applied cache_control to last user message (string content, index {last_user_idx}).")
|
||||
elif isinstance(content, list):
|
||||
# Modify text blocks within the list directly
|
||||
for item in content:
|
||||
if isinstance(item, dict) and item.get("type") == "text":
|
||||
# Add cache_control if not already present
|
||||
if "cache_control" not in item:
|
||||
item["cache_control"] = {"type": "ephemeral"}
|
||||
|
||||
else:
|
||||
logger.warning(f"Last user message (index {last_user_idx}) content is not a string or list ({type(content)}), skipping cache_control.")
|
||||
# Apply cache control to the identified messages
|
||||
apply_cache_control(last_user_idx, "last user")
|
||||
apply_cache_control(second_last_user_idx, "second last user")
|
||||
apply_cache_control(last_assistant_idx, "last assistant")
|
||||
|
||||
# Add reasoning_effort for Anthropic models if enabled
|
||||
use_thinking = enable_thinking if enable_thinking is not None else False
|
||||
|
@ -269,6 +279,7 @@ async def make_llm_api_call(
|
|||
LLMRetryError: If API call fails after retries
|
||||
LLMError: For other API-related errors
|
||||
"""
|
||||
# debug <timestamp>.json messages
|
||||
logger.debug(f"Making LLM API call to model: {model_name} (Thinking: {enable_thinking}, Effort: {reasoning_effort})")
|
||||
params = prepare_params(
|
||||
messages=messages,
|
||||
|
@ -286,7 +297,6 @@ async def make_llm_api_call(
|
|||
enable_thinking=enable_thinking,
|
||||
reasoning_effort=reasoning_effort
|
||||
)
|
||||
|
||||
last_error = None
|
||||
for attempt in range(MAX_RETRIES):
|
||||
try:
|
||||
|
|
Loading…
Reference in New Issue