mirror of https://github.com/kortix-ai/suna.git
fix(thread_manager): adjust truncation logic and improve token compression handling
This commit is contained in:
parent
3507230ed0
commit
05a80e5f55
|
@ -92,10 +92,11 @@ class ThreadManager:
|
||||||
|
|
||||||
def _safe_truncate(self, msg_content: Union[str, dict], max_length: int = 100000) -> Union[str, dict]:
|
def _safe_truncate(self, msg_content: Union[str, dict], max_length: int = 100000) -> Union[str, dict]:
|
||||||
"""Truncate the message content safely by removing the middle portion."""
|
"""Truncate the message content safely by removing the middle portion."""
|
||||||
|
max_length = min(max_length, 100000)
|
||||||
if isinstance(msg_content, str):
|
if isinstance(msg_content, str):
|
||||||
if len(msg_content) > max_length:
|
if len(msg_content) > max_length:
|
||||||
# Calculate how much to keep from start and end
|
# Calculate how much to keep from start and end
|
||||||
keep_length = max_length - 100 # Reserve space for truncation message
|
keep_length = max_length - 150 # Reserve space for truncation message
|
||||||
start_length = keep_length // 2
|
start_length = keep_length // 2
|
||||||
end_length = keep_length - start_length
|
end_length = keep_length - start_length
|
||||||
|
|
||||||
|
@ -109,7 +110,7 @@ class ThreadManager:
|
||||||
json_str = json.dumps(msg_content)
|
json_str = json.dumps(msg_content)
|
||||||
if len(json_str) > max_length:
|
if len(json_str) > max_length:
|
||||||
# Calculate how much to keep from start and end
|
# Calculate how much to keep from start and end
|
||||||
keep_length = max_length - 100 # Reserve space for truncation message
|
keep_length = max_length - 150 # Reserve space for truncation message
|
||||||
start_length = keep_length // 2
|
start_length = keep_length // 2
|
||||||
end_length = keep_length - start_length
|
end_length = keep_length - start_length
|
||||||
|
|
||||||
|
@ -225,10 +226,6 @@ class ThreadManager:
|
||||||
else:
|
else:
|
||||||
max_tokens = 41 * 1000 - 10000
|
max_tokens = 41 * 1000 - 10000
|
||||||
|
|
||||||
if max_iterations <= 0:
|
|
||||||
logger.warning(f"_compress_messages: Max iterations reached, returning uncompressed messages")
|
|
||||||
return messages
|
|
||||||
|
|
||||||
result = messages
|
result = messages
|
||||||
result = self._remove_meta_messages(result)
|
result = self._remove_meta_messages(result)
|
||||||
|
|
||||||
|
@ -242,6 +239,10 @@ class ThreadManager:
|
||||||
|
|
||||||
logger.info(f"_compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}") # Log the token compression for debugging later
|
logger.info(f"_compress_messages: {uncompressed_total_token_count} -> {compressed_token_count}") # Log the token compression for debugging later
|
||||||
|
|
||||||
|
if max_iterations <= 0:
|
||||||
|
logger.warning(f"_compress_messages: Max iterations reached")
|
||||||
|
return result
|
||||||
|
|
||||||
if (compressed_token_count > max_tokens):
|
if (compressed_token_count > max_tokens):
|
||||||
logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}")
|
logger.warning(f"Further token compression is needed: {compressed_token_count} > {max_tokens}")
|
||||||
result = self._compress_messages(messages, llm_model, max_tokens, int(token_threshold / 2), max_iterations - 1)
|
result = self._compress_messages(messages, llm_model, max_tokens, int(token_threshold / 2), max_iterations - 1)
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue