mirror of https://github.com/kortix-ai/suna.git
314 lines
12 KiB
Python
314 lines
12 KiB
Python
from typing import Union, Dict, Any, Optional, List
|
|
import litellm
|
|
import os
|
|
import json
|
|
import openai
|
|
from openai import OpenAIError
|
|
import asyncio
|
|
import logging
|
|
|
|
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY', None)
|
|
ANTHROPIC_API_KEY = os.environ.get('ANTHROPIC_API_KEY', None)
|
|
GROQ_API_KEY = os.environ.get('GROQ_API_KEY', None)
|
|
AGENTOPS_API_KEY = os.environ.get('AGENTOPS_API_KEY', None)
|
|
FIREWORKS_API_KEY = os.environ.get('FIREWORKS_AI_API_KEY', None)
|
|
DEEPSEEK_API_KEY = os.environ.get('DEEPSEEK_API_KEY', None)
|
|
OPENROUTER_API_KEY = os.environ.get('OPENROUTER_API_KEY', None)
|
|
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY', None)
|
|
|
|
AWS_ACCESS_KEY_ID = os.environ.get('AWS_ACCESS_KEY_ID', None)
|
|
AWS_SECRET_ACCESS_KEY = os.environ.get('AWS_SECRET_ACCESS_KEY', None)
|
|
AWS_REGION_NAME = os.environ.get('AWS_REGION_NAME', None)
|
|
|
|
if OPENAI_API_KEY:
|
|
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY
|
|
if ANTHROPIC_API_KEY:
|
|
os.environ['ANTHROPIC_API_KEY'] = ANTHROPIC_API_KEY
|
|
if GROQ_API_KEY:
|
|
os.environ['GROQ_API_KEY'] = GROQ_API_KEY
|
|
if FIREWORKS_API_KEY:
|
|
os.environ['FIREWORKS_AI_API_KEY'] = FIREWORKS_API_KEY
|
|
if DEEPSEEK_API_KEY:
|
|
os.environ['DEEPSEEK_API_KEY'] = DEEPSEEK_API_KEY
|
|
if OPENROUTER_API_KEY:
|
|
os.environ['OPENROUTER_API_KEY'] = OPENROUTER_API_KEY
|
|
if GEMINI_API_KEY:
|
|
os.environ['GEMINI_API_KEY'] = GEMINI_API_KEY
|
|
|
|
# Add AWS environment variables if they exist
|
|
if AWS_ACCESS_KEY_ID:
|
|
os.environ['AWS_ACCESS_KEY_ID'] = AWS_ACCESS_KEY_ID
|
|
if AWS_SECRET_ACCESS_KEY:
|
|
os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_ACCESS_KEY
|
|
if AWS_REGION_NAME:
|
|
os.environ['AWS_REGION_NAME'] = AWS_REGION_NAME
|
|
|
|
async def make_llm_api_call(
|
|
messages: list,
|
|
model_name: str,
|
|
response_format: Any = None,
|
|
temperature: float = 0,
|
|
max_tokens: int = None,
|
|
tools: list = None,
|
|
tool_choice: str = "auto",
|
|
api_key: str = None,
|
|
api_base: str = None,
|
|
agentops_session: Any = None,
|
|
stream: bool = False,
|
|
top_p: float = None,
|
|
stop: Optional[Union[str, List[str]]] = None # Add stop parameter
|
|
) -> Union[Dict[str, Any], Any]:
|
|
"""
|
|
Make an API call to a language model using litellm.
|
|
|
|
This function provides a unified interface for making calls to various LLM providers
|
|
(OpenAI, Anthropic, Groq, etc.) with support for streaming, tool calls, and retry logic.
|
|
|
|
Args:
|
|
messages (list): List of message dictionaries for the conversation
|
|
model_name (str): Name of the model to use (e.g., "gpt-4", "claude-3")
|
|
response_format (Any, optional): Desired format for the response
|
|
temperature (float, optional): Sampling temperature. Defaults to 0
|
|
max_tokens (int, optional): Maximum tokens in the response
|
|
tools (list, optional): List of tool definitions for function calling
|
|
tool_choice (str, optional): How to select tools ("auto" or "none")
|
|
api_key (str, optional): Override default API key
|
|
api_base (str, optional): Override default API base URL
|
|
agentops_session (Any, optional): Session for agentops integration
|
|
stream (bool, optional): Whether to stream the response. Defaults to False
|
|
top_p (float, optional): Top-p sampling parameter
|
|
stop (Union[str, List[str]], optional): Up to 4 sequences where the API will stop generating tokens
|
|
|
|
Returns:
|
|
Union[Dict[str, Any], Any]: API response, either complete or streaming
|
|
|
|
Raises:
|
|
Exception: If API call fails after retries
|
|
"""
|
|
litellm.set_verbose = False
|
|
|
|
async def attempt_api_call(api_call_func, max_attempts=3):
|
|
"""
|
|
Attempt an API call with retries.
|
|
|
|
Args:
|
|
api_call_func: Async function that makes the API call
|
|
max_attempts (int): Maximum number of retry attempts
|
|
|
|
Returns:
|
|
API response if successful
|
|
|
|
Raises:
|
|
Exception: If all retry attempts fail
|
|
"""
|
|
nonlocal model_name # Add this to access model_name
|
|
for attempt in range(max_attempts):
|
|
try:
|
|
return await api_call_func()
|
|
except litellm.exceptions.RateLimitError as e:
|
|
# Check if it's Bedrock Claude and switch to direct Anthropic
|
|
if "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0" in model_name:
|
|
logging.info("Rate limit hit with Bedrock Claude, falling back to direct Anthropic API...")
|
|
model_name = "anthropic/claude-3-5-sonnet-latest"
|
|
continue
|
|
|
|
logging.warning(f"Rate limit exceeded. Waiting for 30 seconds before retrying...")
|
|
await asyncio.sleep(30)
|
|
except OpenAIError as e:
|
|
logging.info(f"API call failed, retrying attempt {attempt + 1}. Error: {e}")
|
|
await asyncio.sleep(5)
|
|
except json.JSONDecodeError:
|
|
logging.error(f"JSON decoding failed, retrying attempt {attempt + 1}")
|
|
await asyncio.sleep(5)
|
|
raise Exception("Failed to make API call after multiple attempts.")
|
|
|
|
async def api_call():
|
|
"""
|
|
Prepare and execute the API call with the specified parameters.
|
|
|
|
Returns:
|
|
API response from the language model
|
|
"""
|
|
api_call_params = {
|
|
"model": model_name,
|
|
"messages": messages,
|
|
"temperature": temperature,
|
|
"response_format": response_format,
|
|
"top_p": top_p,
|
|
"stream": stream,
|
|
}
|
|
|
|
# Add stop parameter if provided
|
|
if stop is not None:
|
|
api_call_params["stop"] = stop
|
|
|
|
# Add optional parameters if provided
|
|
if api_key:
|
|
api_call_params["api_key"] = api_key
|
|
if api_base:
|
|
api_call_params["api_base"] = api_base
|
|
|
|
# Handle token limits differently for different models
|
|
if 'o1' in model_name:
|
|
if max_tokens is not None:
|
|
api_call_params["max_completion_tokens"] = max_tokens
|
|
else:
|
|
if max_tokens is not None:
|
|
api_call_params["max_tokens"] = max_tokens
|
|
|
|
if tools:
|
|
api_call_params["tools"] = tools
|
|
api_call_params["tool_choice"] = tool_choice
|
|
|
|
# Add special headers for Claude models
|
|
if "claude" in model_name.lower() or "anthropic" in model_name.lower():
|
|
api_call_params["extra_headers"] = {
|
|
"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"
|
|
}
|
|
|
|
# Add OpenRouter specific parameters
|
|
if "openrouter" in model_name.lower():
|
|
if settings.or_site_url:
|
|
api_call_params["headers"] = {
|
|
"HTTP-Referer": settings.or_site_url
|
|
}
|
|
if settings.or_app_name:
|
|
api_call_params["headers"] = {
|
|
"X-Title": settings.or_app_name
|
|
}
|
|
|
|
# Add special handling for Deepseek
|
|
if "deepseek" in model_name.lower():
|
|
api_call_params["frequency_penalty"] = 0.5
|
|
api_call_params["temperature"] = 0.7
|
|
api_call_params["presence_penalty"] = 0.1
|
|
|
|
# Add Bedrock-specific parameters
|
|
if "bedrock" in model_name.lower():
|
|
if settings.aws_access_key_id:
|
|
api_call_params["aws_access_key_id"] = settings.aws_access_key_id
|
|
if settings.aws_secret_access_key:
|
|
api_call_params["aws_secret_access_key"] = settings.aws_secret_access_key
|
|
if settings.aws_region_name:
|
|
api_call_params["aws_region_name"] = settings.aws_region_name
|
|
|
|
# Log the API request
|
|
# logging.info(f"Sending API request: {json.dumps(api_call_params, indent=2)}")
|
|
|
|
# Make the API call using either agentops session or direct litellm
|
|
if agentops_session:
|
|
response = await agentops_session.patch(litellm.acompletion)(**api_call_params)
|
|
else:
|
|
response = await litellm.acompletion(**api_call_params)
|
|
|
|
# logging.info(f"Received API response: {response}")
|
|
|
|
# # For streaming responses, attach cost tracking
|
|
# if stream:
|
|
# # Create a wrapper object to track costs across chunks
|
|
# cost_tracker = {
|
|
# "prompt_tokens": 0,
|
|
# "completion_tokens": 0,
|
|
# "total_tokens": 0,
|
|
# "cost": 0.0
|
|
# }
|
|
|
|
# # Get the cost per token for the model
|
|
# model_cost = litellm.model_cost.get(model_name, {})
|
|
# input_cost = model_cost.get('input_cost_per_token', 0)
|
|
# output_cost = model_cost.get('output_cost_per_token', 0)
|
|
|
|
# # Attach the cost tracker to the response
|
|
# response.cost_tracker = cost_tracker
|
|
# response.model_info = {
|
|
# "input_cost_per_token": input_cost,
|
|
# "output_cost_per_token": output_cost
|
|
# }
|
|
# else:
|
|
# # For non-streaming, cost is already included in the response
|
|
# response._hidden_params = {
|
|
# "response_cost": litellm.completion_cost(completion_response=response)
|
|
# }
|
|
|
|
return response
|
|
|
|
return await attempt_api_call(api_call)
|
|
|
|
if __name__ == "__main__":
|
|
import asyncio
|
|
async def test_llm_api_call(stream=True):
|
|
"""
|
|
Test function for the LLM API call functionality.
|
|
|
|
Args:
|
|
stream (bool): Whether to test streaming mode
|
|
"""
|
|
messages = [
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "Complex essay on economics"}
|
|
]
|
|
model_name = "gpt-4o"
|
|
|
|
response = await make_llm_api_call(messages, model_name, stream=stream)
|
|
|
|
if stream:
|
|
print("\n🤖 Streaming response:\n")
|
|
buffer = ""
|
|
async for chunk in response:
|
|
if isinstance(chunk, dict) and 'choices' in chunk:
|
|
content = chunk['choices'][0]['delta'].get('content', '')
|
|
else:
|
|
content = chunk.choices[0].delta.content
|
|
|
|
if content:
|
|
buffer += content
|
|
if content[-1].isspace():
|
|
print(buffer, end='', flush=True)
|
|
buffer = ""
|
|
|
|
if buffer:
|
|
print(buffer, flush=True)
|
|
print("\n✨ Stream completed.\n")
|
|
else:
|
|
print("\n🤖 Response:\n")
|
|
if isinstance(response, dict) and 'choices' in response:
|
|
print(response['choices'][0]['message']['content'])
|
|
else:
|
|
print(response.choices[0].message.content)
|
|
print()
|
|
|
|
# asyncio.run(test_llm_api_call())
|
|
|
|
async def test_bedrock():
|
|
"""
|
|
Test function for Bedrock API call.
|
|
"""
|
|
messages = [
|
|
{"role": "system", "content": "You are a helpful assistant."},
|
|
{"role": "user", "content": "Hello from Bedrock!"}
|
|
]
|
|
model_name = "bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0"
|
|
|
|
response = await make_llm_api_call(messages, model_name, stream=True)
|
|
|
|
print("\n🤖 Streaming response from Bedrock:\n")
|
|
buffer = ""
|
|
async for chunk in response:
|
|
if isinstance(chunk, dict) and 'choices' in chunk:
|
|
content = chunk['choices'][0]['delta'].get('content', '')
|
|
else:
|
|
content = chunk.choices[0].delta.content
|
|
|
|
if content:
|
|
buffer += content
|
|
if content[-1].isspace():
|
|
print(buffer, end='', flush=True)
|
|
buffer = ""
|
|
|
|
if buffer:
|
|
print(buffer, flush=True)
|
|
print("\n✨ Stream completed.\n")
|
|
|
|
# Add test_bedrock to the test runs
|
|
# asyncio.run(test_bedrock())
|