From 1cbc2a63c947bd0aa6d05573b58258b8b184176f Mon Sep 17 00:00:00 2001 From: sharath <29162020+tnfssc@users.noreply.github.com> Date: Sat, 7 Jun 2025 10:14:54 +0000 Subject: [PATCH] refactor(llm): limit processing of user and assistant messages to a maximum of 4 --- backend/services/llm.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/backend/services/llm.py b/backend/services/llm.py index 06d92d7e..ea49b59c 100644 --- a/backend/services/llm.py +++ b/backend/services/llm.py @@ -181,7 +181,7 @@ def prepare_params( item["cache_control"] = {"type": "ephemeral"} break # Apply to the first text block only for system prompt - # 2. Find and process relevant user and assistant messages + # 2. Find and process relevant user and assistant messages (limit to 4 max) last_user_idx = -1 second_last_user_idx = -1 last_assistant_idx = -1 @@ -197,9 +197,10 @@ def prepare_params( if last_assistant_idx == -1: last_assistant_idx = i - # Stop searching if we've found all needed messages - if last_user_idx != -1 and second_last_user_idx != -1 and last_assistant_idx != -1: - break + # Stop searching if we've found all needed messages (system, last user, second last user, last assistant) + found_count = sum(idx != -1 for idx in [last_user_idx, second_last_user_idx, last_assistant_idx]) + if found_count >= 3: + break # Helper function to apply cache control def apply_cache_control(message_idx: int, message_role: str): @@ -219,7 +220,9 @@ def prepare_params( if "cache_control" not in item: item["cache_control"] = {"type": "ephemeral"} - # Apply cache control to the identified messages + # Apply cache control to the identified messages (max 4: system, last user, second last user, last assistant) + # System message is always at index 0 if present + apply_cache_control(0, "system") apply_cache_control(last_user_idx, "last user") apply_cache_control(second_last_user_idx, "second last user") apply_cache_control(last_assistant_idx, "last assistant")