suna/backend/mcp_service/api.py

"""
MCP (Model Context Protocol) API module

This module handles MCP server discovery and configuration management.

Architecture:
1. Registry API (https://registry.smithery.ai) - For discovering MCP servers and getting metadata
2. Server API (https://server.smithery.ai) - For actually connecting to and using MCP servers

The flow:
1. Browse available MCP servers from the registry (this module)
2. Configure MCP servers with credentials and save to agent's configured_mcps
3. When agent runs, it connects to MCP servers using:
   https://server.smithery.ai/{qualifiedName}/mcp?config={base64_encoded_config}&api_key={smithery_api_key}
"""

from fastapi import APIRouter, HTTPException, Depends, Query
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, validator, HttpUrl
import httpx
import os
from urllib.parse import quote
from utils.logger import logger
from utils.auth_utils import get_current_user_id_from_jwt
from collections import OrderedDict

router = APIRouter()

# Smithery API configuration
SMITHERY_API_BASE_URL = "https://registry.smithery.ai"
SMITHERY_SERVER_BASE_URL = "https://server.smithery.ai"
SMITHERY_API_KEY = os.getenv("SMITHERY_API_KEY")


class MCPServer(BaseModel):
    """Represents an MCP server from Smithery"""
    qualifiedName: str
    displayName: str
    description: str
    createdAt: str
    useCount: int  # Changed from str to int
    homepage: str
    # These fields are only available in the detail endpoint
    iconUrl: Optional[str] = None
    isDeployed: Optional[bool] = None
    connections: Optional[List[Dict[str, Any]]] = None
    tools: Optional[List[Dict[str, Any]]] = None
    security: Optional[Dict[str, Any]] = None

class MCPServerListResponse(BaseModel):
    """Response model for MCP server list"""
    servers: List[MCPServer]
    pagination: Dict[str, int]

class MCPServerDetailResponse(BaseModel):
    """Response model for detailed MCP server information"""
    qualifiedName: str
    displayName: str
    iconUrl: Optional[str] = None
    deploymentUrl: Optional[str] = None
    connections: List[Dict[str, Any]]
    security: Optional[Dict[str, Any]] = None
    tools: Optional[List[Dict[str, Any]]] = None

class PopularServersV2Response(BaseModel):
    """Response model for v2 popular servers with categorization"""
    success: bool
    servers: List[Dict[str, Any]]
    categorized: Dict[str, List[Dict[str, Any]]]
    total: int
    categoryCount: int
    pagination: Dict[str, int]

class CustomMCPConnectionRequest(BaseModel):
    """Request model for connecting to a custom MCP server"""
    url: str
    config: Optional[Dict[str, Any]] = {}

    @validator('url')
    def validate_smithery_url(cls, v):
        """Validate that the URL is a Smithery server URL"""
        if not v.startswith('https://server.smithery.ai/'):
            raise ValueError('URL must be a Smithery server URL starting with https://server.smithery.ai/')
        return v

class CustomMCPConnectionResponse(BaseModel):
    """Response model for custom MCP connection"""
    success: bool
    qualifiedName: str
    displayName: str
    tools: list[Dict[str, Any]]
    config: Dict[str, Any]
    url: str
    message: str

@router.get("/mcp/servers", response_model=MCPServerListResponse)
async def list_mcp_servers(
    q: Optional[str] = Query(None, description="Search query for semantic search"),
    page: int = Query(1, ge=1, description="Page number"),
    pageSize: int = Query(20, ge=1, le=100, description="Items per page"),
    user_id: str = Depends(get_current_user_id_from_jwt)
):
    """
    List available MCP servers from Smithery.

    Query parameters:
    - q: Search query (semantic search)
    - page: Page number (default: 1)
    - pageSize: Number of items per page (default: 20, max: 100)

    Example queries:
    - "machine learning" - semantic search
    - "owner:smithery-ai" - filter by owner
    - "repo:fetch" - filter by repository
    - "is:deployed" - only deployed servers
    - "is:verified" - only verified servers
    """
    logger.info(f"Fetching MCP servers from Smithery for user {user_id} with query: {q}")

    try:
        async with httpx.AsyncClient() as client:
            headers = {
                "Accept": "application/json",
                "User-Agent": "Suna-MCP-Integration/1.0"
            }

            # Add API key if available
            if SMITHERY_API_KEY:
                headers["Authorization"] = f"Bearer {SMITHERY_API_KEY}"
                logger.debug("Using Smithery API key for authentication")
            else:
                logger.warning("No Smithery API key found in environment variables")

            params = {
                "page": page,
                "pageSize": pageSize
            }

            if q:
                params["q"] = q

            response = await client.get(
                f"{SMITHERY_API_BASE_URL}/servers",
                headers=headers,
                params=params,
                timeout=30.0
            )

            if response.status_code == 401:
                logger.warning("Smithery API authentication failed. API key may be required.")
                # Continue without auth - public servers should still be accessible

            response.raise_for_status()
            data = response.json()

            logger.info(f"Successfully fetched {len(data.get('servers', []))} MCP servers")
            return MCPServerListResponse(**data)

    except httpx.HTTPStatusError as e:
        logger.error(f"HTTP error fetching MCP servers: {e.response.status_code} - {e.response.text}")
        raise HTTPException(
            status_code=e.response.status_code,
            detail=f"Failed to fetch MCP servers from Smithery: {e.response.text}"
        )
    except Exception as e:
        logger.error(f"Error fetching MCP servers: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Failed to fetch MCP servers: {str(e)}"
        )

@router.get("/mcp/servers/{qualified_name:path}", response_model=MCPServerDetailResponse)
async def get_mcp_server_details(
    qualified_name: str,
    user_id: str = Depends(get_current_user_id_from_jwt)
):
    """
    Get detailed information about a specific MCP server.

    Parameters:
    - qualified_name: The unique identifier for the server (e.g., "exa", "@smithery-ai/github")
    """
    logger.info(f"Fetching details for MCP server: {qualified_name} for user {user_id}")

    try:
        async with httpx.AsyncClient() as client:
            headers = {
                "Accept": "application/json",
                "User-Agent": "Suna-MCP-Integration/1.0"
            }

            # Add API key if available
            if SMITHERY_API_KEY:
                headers["Authorization"] = f"Bearer {SMITHERY_API_KEY}"

            # URL encode the qualified name only if it contains special characters
            if '@' in qualified_name or '/' in qualified_name:
                encoded_name = quote(qualified_name, safe='')
            else:
                # Don't encode simple names like "exa"
                encoded_name = qualified_name

            url = f"{SMITHERY_API_BASE_URL}/servers/{encoded_name}"
            logger.debug(f"Requesting MCP server details from: {url}")

            response = await client.get(
                url,  # Use registry API for metadata
                headers=headers,
                timeout=30.0
            )

            logger.debug(f"Response status: {response.status_code}")

            response.raise_for_status()
            data = response.json()

            logger.info(f"Successfully fetched details for MCP server: {qualified_name}")
            logger.debug(f"Response data keys: {list(data.keys()) if isinstance(data, dict) else 'not a dict'}")

            return MCPServerDetailResponse(**data)

    except httpx.HTTPStatusError as e:
        if e.response.status_code == 404:
            logger.error(f"Server not found. Response: {e.response.text}")
            raise HTTPException(status_code=404, detail=f"MCP server '{qualified_name}' not found")

        logger.error(f"HTTP error fetching MCP server details: {e.response.status_code} - {e.response.text}")
        raise HTTPException(
            status_code=e.response.status_code,
            detail=f"Failed to fetch MCP server details: {e.response.text}"
        )
    except Exception as e:
        logger.error(f"Error fetching MCP server details: {str(e)}")
        raise HTTPException(
            status_code=500,
            detail=f"Failed to fetch MCP server details: {str(e)}"
        )

@router.get("/mcp/popular-servers")
async def get_popular_mcp_servers(
    user_id: str = Depends(get_current_user_id_from_jwt)
):
    """
    Get a curated list of popular/recommended MCP servers.
    This is a convenience endpoint that returns commonly used servers.
    """
    # Define some popular servers based on actual Smithery data
    popular_servers = [
        {
            "qualifiedName": "@wonderwhy-er/desktop-commander",
            "displayName": "Desktop Commander",
            "description": "Execute terminal commands and manage files with diff editing capabilities. Coding, shell and terminal, task automation",
            "icon": "💻",
            "category": "development"
        },
        {
            "qualifiedName": "@smithery-ai/server-sequential-thinking",
            "displayName": "Sequential Thinking",
            "description": "Dynamic and reflective problem-solving through a structured thinking process",
            "icon": "🧠",
            "category": "ai"
        },
        {
            "qualifiedName": "@microsoft/playwright-mcp",
            "displayName": "Playwright Automation",
            "description": "Automate web interactions, navigate, extract data, and perform actions on web pages",
            "icon": "🎭",
            "category": "automation"
        },
        {
            "qualifiedName": "exa",
            "displayName": "Exa Search",
            "description": "Fast, intelligent web search and crawling. Combines embeddings and traditional search",
            "icon": "🔍",
            "category": "search"
        },
        {
            "qualifiedName": "@smithery-ai/github",
            "displayName": "GitHub",
            "description": "Access the GitHub API, enabling file operations, repository management, and search",
            "icon": "🐙",
            "category": "development"
        },
        {
            "qualifiedName": "@nickclyde/duckduckgo-mcp-server",
            "displayName": "DuckDuckGo Search",
            "description": "Enable web search capabilities through DuckDuckGo. Fetch and parse webpage content",
            "icon": "🦆",
            "category": "search"
        }
    ]

    return {"servers": popular_servers}

@router.get("/mcp/popular-servers/v2", response_model=PopularServersV2Response)
async def get_popular_mcp_servers_v2(
    page: int = Query(1, ge=1, description="Page number"),
    pageSize: int = Query(100, ge=1, le=200, description="Items per page (max 500 for comprehensive categorization)"),
    user_id: str = Depends(get_current_user_id_from_jwt)
):
    """
    Get a comprehensive categorized list of popular MCP servers from Smithery Registry.

    Returns servers grouped by category with proper metadata and usage statistics.
    This endpoint fetches real data from the Smithery registry API and categorizes it.

    Query parameters:
    - page: Page number (default: 1)
    - pageSize: Number of items per page (default: 200, max: 500)
    """
    logger.info(f"Fetching v2 popular MCP servers for user {user_id}")

    try:
        async with httpx.AsyncClient() as client:
            headers = {
                "Accept": "application/json",
                "User-Agent": "Suna-MCP-Integration/1.0"
            }

            # Add API key if available
            if SMITHERY_API_KEY:
                headers["Authorization"] = f"Bearer {SMITHERY_API_KEY}"
                logger.debug("Using Smithery API key for authentication")
            else:
                logger.warning("No Smithery API key found in environment variables")

            # Use provided pagination parameters
            params = {
                "page": page,
                "pageSize": pageSize
            }

            response = await client.get(
                f"{SMITHERY_API_BASE_URL}/servers",
                headers=headers,
                params=params,
                timeout=30.0
            )

            if response.status_code != 200:
                logger.error(f"Failed to fetch MCP servers: {response.status_code} - {response.text}")
                return PopularServersV2Response(
                    success=False,
                    servers=[],
                    categorized={},
                    total=0,
                    categoryCount=0,
                    pagination={"currentPage": page, "pageSize": pageSize, "totalPages": 0, "totalCount": 0}
                )

            data = response.json()
            servers = data.get("servers", [])
            pagination_data = data.get("pagination", {})

            # Category mappings based on server types and names
            category_mappings = {
                # AI & Search
                "exa": "AI & Search",
                "perplexity": "AI & Search",
                "openai": "AI & Search",
                "anthropic": "AI & Search",
                "duckduckgo": "AI & Search",
                "brave": "AI & Search",
                "google": "AI & Search",
                "search": "AI & Search",

                # Development & Version Control
                "github": "Development & Version Control",
                "gitlab": "Development & Version Control",
                "bitbucket": "Development & Version Control",
                "git": "Development & Version Control",

                # Communication & Collaboration
                "slack": "Communication & Collaboration",
                "discord": "Communication & Collaboration",
                "teams": "Communication & Collaboration",
                "zoom": "Communication & Collaboration",
                "telegram": "Communication & Collaboration",

                # Project Management
                "linear": "Project Management",
                "jira": "Project Management",
                "asana": "Project Management",
                "notion": "Project Management",
                "trello": "Project Management",
                "monday": "Project Management",
                "clickup": "Project Management",

                # Data & Analytics
                "postgres": "Data & Analytics",
                "mysql": "Data & Analytics",
                "mongodb": "Data & Analytics",
                "bigquery": "Data & Analytics",
                "snowflake": "Data & Analytics",
                "sqlite": "Data & Analytics",
                "redis": "Data & Analytics",
                "database": "Data & Analytics",

                # Cloud & Infrastructure
                "aws": "Cloud & Infrastructure",
                "gcp": "Cloud & Infrastructure",
                "azure": "Cloud & Infrastructure",
                "vercel": "Cloud & Infrastructure",
                "netlify": "Cloud & Infrastructure",
                "cloudflare": "Cloud & Infrastructure",
                "docker": "Cloud & Infrastructure",

                # File Storage
                "gdrive": "File Storage",
                "google-drive": "File Storage",
                "dropbox": "File Storage",
                "box": "File Storage",
                "onedrive": "File Storage",
                "s3": "File Storage",
                "drive": "File Storage",

                # Customer Support
                "zendesk": "Customer Support",
                "intercom": "Customer Support",
                "freshdesk": "Customer Support",
                "helpscout": "Customer Support",

                # Marketing & Sales
                "hubspot": "Marketing & Sales",
                "salesforce": "Marketing & Sales",
                "mailchimp": "Marketing & Sales",
                "sendgrid": "Marketing & Sales",

                # Finance
                "stripe": "Finance",
                "quickbooks": "Finance",
                "xero": "Finance",
                "plaid": "Finance",

                # Automation & Productivity
                "playwright": "Automation & Productivity",
                "puppeteer": "Automation & Productivity",
                "selenium": "Automation & Productivity",
                "desktop-commander": "Automation & Productivity",
                "sequential-thinking": "Automation & Productivity",
                "automation": "Automation & Productivity",

                # Utilities
                "filesystem": "Utilities",
                "memory": "Utilities",
                "fetch": "Utilities",
                "time": "Utilities",
                "weather": "Utilities",
                "currency": "Utilities",
                "file": "Utilities",
            }

            # Categorize servers
            categorized_servers = {}

            for server in servers:
                qualified_name = server.get("qualifiedName", "")
                display_name = server.get("displayName", server.get("name", "Unknown"))
                description = server.get("description", "")

                # Determine category based on qualified name and description
                category = "Other"
                qualified_lower = qualified_name.lower()
                description_lower = description.lower()

                # Check qualified name first (most reliable)
                for key, cat in category_mappings.items():
                    if key in qualified_lower:
                        category = cat
                        break

                # If no match found, check description for category hints
                if category == "Other":
                    for key, cat in category_mappings.items():
                        if key in description_lower:
                            category = cat
                            break

                if category not in categorized_servers:
                    categorized_servers[category] = []

                categorized_servers[category].append({
                    "name": display_name,
                    "qualifiedName": qualified_name,
                    "description": description,
                    "iconUrl": server.get("iconUrl"),
                    "homepage": server.get("homepage"),
                    "useCount": server.get("useCount", 0),
                    "createdAt": server.get("createdAt"),
                    "isDeployed": server.get("isDeployed", False)
                })

            # Sort categories and servers within each category
            sorted_categories = OrderedDict()

            # Define priority order for categories
            priority_categories = [
                "AI & Search",
                "Development & Version Control",
                "Automation & Productivity",
                "Communication & Collaboration",
                "Project Management",
                "Data & Analytics",
                "Cloud & Infrastructure",
                "File Storage",
                "Marketing & Sales",
                "Customer Support",
                "Finance",
                "Utilities",
                "Other"
            ]

            # Add categories in priority order
            for cat in priority_categories:
                if cat in categorized_servers:
                    sorted_categories[cat] = sorted(
                        categorized_servers[cat],
                        key=lambda x: (-x.get("useCount", 0), x["name"].lower())  # Sort by useCount desc, then name
                    )

            # Add any remaining categories
            for cat in sorted(categorized_servers.keys()):
                if cat not in sorted_categories:
                    sorted_categories[cat] = sorted(
                        categorized_servers[cat],
                        key=lambda x: (-x.get("useCount", 0), x["name"].lower())
                    )

            logger.info(f"Successfully categorized {len(servers)} servers into {len(sorted_categories)} categories")

            return PopularServersV2Response(
                success=True,
                servers=servers,
                categorized=sorted_categories,
                total=pagination_data.get("totalCount", len(servers)),
                categoryCount=len(sorted_categories),
                pagination={
                    "currentPage": pagination_data.get("currentPage", page),
                    "pageSize": pagination_data.get("pageSize", pageSize),
                    "totalPages": pagination_data.get("totalPages", 1),
                    "totalCount": pagination_data.get("totalCount", len(servers))
                }
            )

    except Exception as e:
        logger.error(f"Error fetching v2 popular MCP servers: {str(e)}")
        return PopularServersV2Response(
            success=False,
            servers=[],
            categorized={},
            total=0,
            categoryCount=0,
            pagination={"currentPage": page, "pageSize": pageSize, "totalPages": 0, "totalCount": 0}
        )