diff --git a/backend/services/llm.py b/backend/services/llm.py
index 39c6c2b2..6187e247 100644
--- a/backend/services/llm.py
+++ b/backend/services/llm.py
@@ -160,10 +160,6 @@ def prepare_params(
             # "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
             "anthropic-beta": "output-128k-2025-02-19"
         }
-        params["fallbacks"] = [{
-            "model": "openrouter/anthropic/claude-sonnet-4",
-            "messages": messages,
-        }]
         # params["mock_testing_fallback"] = True
         logger.debug("Added Claude-specific headers")
 
@@ -191,6 +187,14 @@ def prepare_params(
             params["model_id"] = "arn:aws:bedrock:us-west-2:935064898258:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0"
             logger.debug(f"Auto-set model_id for Claude 3.7 Sonnet: {params['model_id']}")
 
+    fallback_model = get_openrouter_fallback(model_name)
+    if fallback_model:
+        params["fallbacks"] = [{
+            "model": fallback_model,
+            "messages": messages,
+        }]
+        logger.debug(f"Added OpenRouter fallback for model: {model_name} to {fallback_model}")
+
     # Apply Anthropic prompt caching (minimal implementation)
     # Check model name *after* potential modifications (like adding bedrock/ prefix)
     effective_model_name = params.get("model", model_name) # Use model from params if set, else original
@@ -320,27 +324,6 @@ async def make_llm_api_call(
             # logger.debug(f"Response: {response}")
             return response
 
-        except litellm.exceptions.InternalServerError as e:
-            # Check if it's an Anthropic overloaded error
-            if "Overloaded" in str(e) and "AnthropicException" in str(e):
-                fallback_model = get_openrouter_fallback(model_name)
-                if fallback_model and not params.get("model", "").startswith("openrouter/"):
-                    logger.warning(f"Anthropic overloaded, falling back to OpenRouter: {fallback_model}")
-                    params["model"] = fallback_model
-                    # Remove any model_id as it's specific to Bedrock
-                    params.pop("model_id", None)
-                    # Continue with next attempt using fallback model
-                    last_error = e
-                    await handle_error(e, attempt, MAX_RETRIES)
-                else:
-                    # No fallback available or already using OpenRouter
-                    last_error = e
-                    await handle_error(e, attempt, MAX_RETRIES)
-            else:
-                # Other internal server errors
-                last_error = e
-                await handle_error(e, attempt, MAX_RETRIES)
-
         except (litellm.exceptions.RateLimitError, OpenAIError, json.JSONDecodeError) as e:
             last_error = e
             await handle_error(e, attempt, MAX_RETRIES)
diff --git a/backend/utils/constants.py b/backend/utils/constants.py
index fd05e711..9d85684f 100644
--- a/backend/utils/constants.py
+++ b/backend/utils/constants.py
@@ -94,8 +94,8 @@ MODELS = {
         "tier_availability": ["paid"]
     },
     
-    "openrouter/x-ai/grok-4": {
-        "aliases": ["grok-4"],
+    "xai/grok-4": {
+        "aliases": ["grok-4", "x-ai/grok-4"],
         "pricing": {
             "input_cost_per_million_tokens": 5.00,
             "output_cost_per_million_tokens": 15.00