"""
LLM API interface for making calls to various language models.

This module provides a unified interface for making API calls to different LLM providers
(OpenAI, Anthropic, Groq, etc.) using LiteLLM. It includes support for:
- Streaming responses
- Tool calls and function calling
- Retry logic with exponential backoff
- Model-specific configurations
- Comprehensive error handling
"""

from typing import Union, Dict, Any, Optional, AsyncGenerator
import os
import json
import logging
import asyncio
from openai import OpenAIError
import litellm

# Environment variables for API keys
API_KEYS = {
    'OPENAI': os.environ.get('OPENAI_API_KEY'),
    'ANTHROPIC': os.environ.get('ANTHROPIC_API_KEY'),
    'GROQ': os.environ.get('GROQ_API_KEY')
}

# Set environment variables for API keys
for provider, key in API_KEYS.items():
    if key:
        os.environ[f'{provider}_API_KEY'] = key

class LLMConfig:
    """Configuration class for LLM API calls."""
    
    def __init__(
        self,
        model_name: str,
        temperature: float = 0,
        max_tokens: Optional[int] = None,
        response_format: Optional[Any] = None,
        tools: Optional[list] = None,
        tool_choice: str = "auto",
        api_key: Optional[str] = None,
        api_base: Optional[str] = None,
        stream: bool = False,
        top_p: Optional[float] = None
    ):
        self.model_name = model_name
        self.temperature = temperature
        self.max_tokens = max_tokens
        self.response_format = response_format
        self.tools = tools
        self.tool_choice = tool_choice
        self.api_key = api_key
        self.api_base = api_base
        self.stream = stream
        self.top_p = top_p

    def to_dict(self) -> Dict[str, Any]:
        """Convert config to dictionary for API call."""
        params = {
            "model": self.model_name,
            "messages": self.messages,
            "temperature": self.temperature,
            "response_format": self.response_format,
            "top_p": self.top_p,
            "stream": self.stream,
        }

        if self.api_key:
            params["api_key"] = self.api_key
        if self.api_base:
            params["api_base"] = self.api_base

        # Handle token limits for different models
        if 'o1' in self.model_name:
            if self.max_tokens is not None:
                params["max_completion_tokens"] = self.max_tokens
        else:
            if self.max_tokens is not None:
                params["max_tokens"] = self.max_tokens

        if self.tools:
            params["tools"] = self.tools
            params["tool_choice"] = self.tool_choice

        # Add special headers for Claude models
        if "claude" in self.model_name.lower() or "anthropic" in self.model_name.lower():
            params["extra_headers"] = {
                "anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
            }

        return params

class LLMError(Exception):
    """Base exception for LLM-related errors."""
    pass

class LLMRetryError(LLMError):
    """Exception raised when retries are exhausted."""
    pass

async def make_llm_api_call(
    messages: list,
    model_name: str,
    response_format: Optional[Any] = None,
    temperature: float = 0,
    max_tokens: Optional[int] = None,
    tools: Optional[list] = None,
    tool_choice: str = "auto",
    api_key: Optional[str] = None,
    api_base: Optional[str] = None,
    stream: bool = False,
    top_p: Optional[float] = None
) -> Union[Dict[str, Any], AsyncGenerator]:
    """
    Make an API call to a language model using LiteLLM.
    
    Args:
        messages: List of message dictionaries for the conversation
        model_name: Name of the model to use (e.g., "gpt-4", "claude-3")
        response_format: Desired format for the response
        temperature: Sampling temperature (0-1)
        max_tokens: Maximum tokens in the response
        tools: List of tool definitions for function calling
        tool_choice: How to select tools ("auto" or "none")
        api_key: Override default API key
        api_base: Override default API base URL
        stream: Whether to stream the response
        top_p: Top-p sampling parameter
        
    Returns:
        Union[Dict[str, Any], AsyncGenerator]: API response or stream
        
    Raises:
        LLMRetryError: If API call fails after retries
        LLMError: For other API-related errors
    """
    config = LLMConfig(
        model_name=model_name,
        temperature=temperature,
        max_tokens=max_tokens,
        response_format=response_format,
        tools=tools,
        tool_choice=tool_choice,
        api_key=api_key,
        api_base=api_base,
        stream=stream,
        top_p=top_p
    )
    config.messages = messages

    async def attempt_api_call(max_attempts: int = 3) -> Any:
        """Attempt API call with retry logic."""
        for attempt in range(max_attempts):
            try:
                params = config.to_dict()
                logging.info(f"Sending API request: {json.dumps(params, indent=2)}")
                
                response = await litellm.acompletion(**params)
                logging.info(f"Received API response: {response}")
                return response
                
            except litellm.exceptions.RateLimitError as e:
                logging.warning(f"Rate limit exceeded. Waiting for 30 seconds before retrying...")
                await asyncio.sleep(30)
            except OpenAIError as e:
                logging.info(f"API call failed, retrying attempt {attempt + 1}. Error: {e}")
                await asyncio.sleep(5)
            except json.JSONDecodeError:
                logging.error(f"JSON decoding failed, retrying attempt {attempt + 1}")
                await asyncio.sleep(5)
            except Exception as e:
                logging.error(f"Unexpected error during API call: {e}")
                raise LLMError(f"API call failed: {str(e)}")
                
        raise LLMRetryError("Failed to make API call after multiple attempts")

    return await attempt_api_call()