mirror of https://github.com/kortix-ai/suna.git
fix: normalize path
This commit is contained in:
parent
49e0f2ea8c
commit
6caa0e3adf
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
import urllib.parse
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from fastapi import FastAPI, UploadFile, File, HTTPException, APIRouter, Form, Depends, Request
|
from fastapi import FastAPI, UploadFile, File, HTTPException, APIRouter, Form, Depends, Request
|
||||||
|
@ -30,6 +31,41 @@ class FileInfo(BaseModel):
|
||||||
mod_time: str
|
mod_time: str
|
||||||
permissions: Optional[str] = None
|
permissions: Optional[str] = None
|
||||||
|
|
||||||
|
def normalize_path(path: str) -> str:
|
||||||
|
"""
|
||||||
|
Normalize a path to ensure proper UTF-8 encoding and handling.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The file path, potentially containing URL-encoded characters
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized path with proper UTF-8 encoding
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# First, ensure the path is properly URL-decoded
|
||||||
|
decoded_path = urllib.parse.unquote(path)
|
||||||
|
|
||||||
|
# Handle Unicode escape sequences like \u0308
|
||||||
|
try:
|
||||||
|
# Replace Python-style Unicode escapes (\u0308) with actual characters
|
||||||
|
# This handles cases where the Unicode escape sequence is part of the URL
|
||||||
|
import re
|
||||||
|
unicode_pattern = re.compile(r'\\u([0-9a-fA-F]{4})')
|
||||||
|
|
||||||
|
def replace_unicode(match):
|
||||||
|
hex_val = match.group(1)
|
||||||
|
return chr(int(hex_val, 16))
|
||||||
|
|
||||||
|
decoded_path = unicode_pattern.sub(replace_unicode, decoded_path)
|
||||||
|
except Exception as unicode_err:
|
||||||
|
logger.warning(f"Error processing Unicode escapes in path '{path}': {str(unicode_err)}")
|
||||||
|
|
||||||
|
logger.debug(f"Normalized path from '{path}' to '{decoded_path}'")
|
||||||
|
return decoded_path
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error normalizing path '{path}': {str(e)}")
|
||||||
|
return path # Return original path if decoding fails
|
||||||
|
|
||||||
async def verify_sandbox_access(client, sandbox_id: str, user_id: Optional[str] = None):
|
async def verify_sandbox_access(client, sandbox_id: str, user_id: Optional[str] = None):
|
||||||
"""
|
"""
|
||||||
Verify that a user has access to a specific sandbox based on account membership.
|
Verify that a user has access to a specific sandbox based on account membership.
|
||||||
|
@ -114,6 +150,9 @@ async def create_file(
|
||||||
user_id: Optional[str] = Depends(get_optional_user_id)
|
user_id: Optional[str] = Depends(get_optional_user_id)
|
||||||
):
|
):
|
||||||
"""Create a file in the sandbox using direct file upload"""
|
"""Create a file in the sandbox using direct file upload"""
|
||||||
|
# Normalize the path to handle UTF-8 encoding correctly
|
||||||
|
path = normalize_path(path)
|
||||||
|
|
||||||
logger.info(f"Received file upload request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
logger.info(f"Received file upload request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
||||||
client = await db.client
|
client = await db.client
|
||||||
|
|
||||||
|
@ -144,6 +183,9 @@ async def list_files(
|
||||||
user_id: Optional[str] = Depends(get_optional_user_id)
|
user_id: Optional[str] = Depends(get_optional_user_id)
|
||||||
):
|
):
|
||||||
"""List files and directories at the specified path"""
|
"""List files and directories at the specified path"""
|
||||||
|
# Normalize the path to handle UTF-8 encoding correctly
|
||||||
|
path = normalize_path(path)
|
||||||
|
|
||||||
logger.info(f"Received list files request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
logger.info(f"Received list files request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
||||||
client = await db.client
|
client = await db.client
|
||||||
|
|
||||||
|
@ -186,7 +228,14 @@ async def read_file(
|
||||||
user_id: Optional[str] = Depends(get_optional_user_id)
|
user_id: Optional[str] = Depends(get_optional_user_id)
|
||||||
):
|
):
|
||||||
"""Read a file from the sandbox"""
|
"""Read a file from the sandbox"""
|
||||||
|
# Normalize the path to handle UTF-8 encoding correctly
|
||||||
|
original_path = path
|
||||||
|
path = normalize_path(path)
|
||||||
|
|
||||||
logger.info(f"Received file read request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
logger.info(f"Received file read request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
||||||
|
if original_path != path:
|
||||||
|
logger.info(f"Normalized path from '{original_path}' to '{path}'")
|
||||||
|
|
||||||
client = await db.client
|
client = await db.client
|
||||||
|
|
||||||
# Verify the user has access to this sandbox
|
# Verify the user has access to this sandbox
|
||||||
|
@ -196,17 +245,59 @@ async def read_file(
|
||||||
# Get sandbox using the safer method
|
# Get sandbox using the safer method
|
||||||
sandbox = await get_sandbox_by_id_safely(client, sandbox_id)
|
sandbox = await get_sandbox_by_id_safely(client, sandbox_id)
|
||||||
|
|
||||||
|
# Verify the file exists first
|
||||||
|
try:
|
||||||
|
filename = os.path.basename(path)
|
||||||
|
parent_dir = os.path.dirname(path)
|
||||||
|
|
||||||
|
# List files in the parent directory to check if the file exists
|
||||||
|
files_in_dir = sandbox.fs.list_files(parent_dir)
|
||||||
|
|
||||||
|
# Look for the target file with exact name match
|
||||||
|
file_exists = any(file.name == filename for file in files_in_dir)
|
||||||
|
|
||||||
|
if not file_exists:
|
||||||
|
logger.warning(f"File not found: {path} in sandbox {sandbox_id}")
|
||||||
|
|
||||||
|
# Try to find similar files to help diagnose
|
||||||
|
close_matches = [file.name for file in files_in_dir if filename.lower() in file.name.lower()]
|
||||||
|
error_detail = f"File '{filename}' not found in directory '{parent_dir}'"
|
||||||
|
|
||||||
|
if close_matches:
|
||||||
|
error_detail += f". Similar files in the directory: {', '.join(close_matches)}"
|
||||||
|
|
||||||
|
raise HTTPException(status_code=404, detail=error_detail)
|
||||||
|
except Exception as list_err:
|
||||||
|
# If we can't list files, continue with the download attempt
|
||||||
|
logger.warning(f"Error checking if file exists: {str(list_err)}")
|
||||||
|
|
||||||
# Read file
|
# Read file
|
||||||
content = sandbox.fs.download_file(path)
|
try:
|
||||||
|
content = sandbox.fs.download_file(path)
|
||||||
|
except Exception as download_err:
|
||||||
|
logger.error(f"Error downloading file {path} from sandbox {sandbox_id}: {str(download_err)}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Failed to download file: {str(download_err)}"
|
||||||
|
)
|
||||||
|
|
||||||
# Return a Response object with the content directly
|
# Return a Response object with the content directly
|
||||||
filename = os.path.basename(path)
|
filename = os.path.basename(path)
|
||||||
logger.info(f"Successfully read file {filename} from sandbox {sandbox_id}")
|
logger.info(f"Successfully read file {filename} from sandbox {sandbox_id}")
|
||||||
|
|
||||||
|
# Ensure proper encoding by explicitly using UTF-8 for the filename in Content-Disposition header
|
||||||
|
# This applies RFC 5987 encoding for the filename to support non-ASCII characters
|
||||||
|
encoded_filename = filename.encode('utf-8').decode('latin-1')
|
||||||
|
content_disposition = f"attachment; filename*=UTF-8''{encoded_filename}"
|
||||||
|
|
||||||
return Response(
|
return Response(
|
||||||
content=content,
|
content=content,
|
||||||
media_type="application/octet-stream",
|
media_type="application/octet-stream",
|
||||||
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
headers={"Content-Disposition": content_disposition}
|
||||||
)
|
)
|
||||||
|
except HTTPException:
|
||||||
|
# Re-raise HTTP exceptions without wrapping
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error reading file in sandbox {sandbox_id}: {str(e)}")
|
logger.error(f"Error reading file in sandbox {sandbox_id}: {str(e)}")
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
|
@ -124,7 +124,19 @@ function getFileUrl(sandboxId: string | undefined, path: string): string {
|
||||||
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
|
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle any potential Unicode escape sequences
|
||||||
|
try {
|
||||||
|
// Replace escaped Unicode sequences with actual characters
|
||||||
|
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||||
|
return String.fromCharCode(parseInt(hexCode, 16));
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Error processing Unicode escapes in path:', e);
|
||||||
|
}
|
||||||
|
|
||||||
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
||||||
|
|
||||||
|
// Properly encode the path parameter for UTF-8 support
|
||||||
url.searchParams.append('path', path);
|
url.searchParams.append('path', path);
|
||||||
|
|
||||||
return url.toString();
|
return url.toString();
|
||||||
|
|
|
@ -24,6 +24,15 @@ function normalizePath(path: string): string {
|
||||||
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
|
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Handle Unicode escape sequences like \u0308
|
||||||
|
try {
|
||||||
|
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||||
|
return String.fromCharCode(parseInt(hexCode, 16));
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Error processing Unicode escapes in path:', e);
|
||||||
|
}
|
||||||
|
|
||||||
return path;
|
return path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,6 +120,8 @@ export function useCachedFile<T = string>(
|
||||||
const normalizedPath = normalizePath(filePath || '');
|
const normalizedPath = normalizePath(filePath || '');
|
||||||
|
|
||||||
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
||||||
|
|
||||||
|
// Properly encode the path parameter for UTF-8 support
|
||||||
url.searchParams.append('path', normalizedPath);
|
url.searchParams.append('path', normalizedPath);
|
||||||
|
|
||||||
// Fetch with authentication
|
// Fetch with authentication
|
||||||
|
@ -377,6 +388,11 @@ export const FileCache = {
|
||||||
console.log(`[FILE CACHE] Preloading ${filePaths.length} files for sandbox ${sandboxId}`);
|
console.log(`[FILE CACHE] Preloading ${filePaths.length} files for sandbox ${sandboxId}`);
|
||||||
|
|
||||||
return Promise.all(filePaths.map(async (path) => {
|
return Promise.all(filePaths.map(async (path) => {
|
||||||
|
// Handle Unicode escape sequences in paths
|
||||||
|
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||||
|
return String.fromCharCode(parseInt(hexCode, 16));
|
||||||
|
});
|
||||||
|
|
||||||
const normalizedPath = normalizePath(path);
|
const normalizedPath = normalizePath(path);
|
||||||
const key = getCacheKey(sandboxId, path);
|
const key = getCacheKey(sandboxId, path);
|
||||||
|
|
||||||
|
@ -389,6 +405,8 @@ export const FileCache = {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
||||||
|
|
||||||
|
// Properly encode the path parameter for UTF-8 support
|
||||||
url.searchParams.append('path', normalizedPath);
|
url.searchParams.append('path', normalizedPath);
|
||||||
|
|
||||||
const response = await fetch(url.toString(), {
|
const response = await fetch(url.toString(), {
|
||||||
|
@ -491,6 +509,11 @@ export async function getCachedFile(
|
||||||
? 'blob'
|
? 'blob'
|
||||||
: (options.contentType || 'text');
|
: (options.contentType || 'text');
|
||||||
|
|
||||||
|
// First ensure the file path has any Unicode escape sequences properly handled
|
||||||
|
filePath = filePath.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||||
|
return String.fromCharCode(parseInt(hexCode, 16));
|
||||||
|
});
|
||||||
|
|
||||||
const key = getCacheKey(sandboxId, filePath);
|
const key = getCacheKey(sandboxId, filePath);
|
||||||
const startTime = performance.now();
|
const startTime = performance.now();
|
||||||
|
|
||||||
|
@ -535,7 +558,10 @@ export async function getCachedFile(
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
||||||
url.searchParams.append('path', normalizePath(filePath));
|
const normalizedPath = normalizePath(filePath);
|
||||||
|
|
||||||
|
// Properly encode the path parameter for UTF-8 characters
|
||||||
|
url.searchParams.append('path', normalizedPath);
|
||||||
|
|
||||||
const response = await fetch(url.toString(), {
|
const response = await fetch(url.toString(), {
|
||||||
headers: {
|
headers: {
|
||||||
|
|
|
@ -1135,6 +1135,19 @@ export const createSandboxFileJson = async (
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Helper function to normalize file paths with Unicode characters
|
||||||
|
function normalizePathWithUnicode(path: string): string {
|
||||||
|
try {
|
||||||
|
// Replace escaped Unicode sequences with actual characters
|
||||||
|
return path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||||
|
return String.fromCharCode(parseInt(hexCode, 16));
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Error processing Unicode escapes in path:', e);
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export const listSandboxFiles = async (
|
export const listSandboxFiles = async (
|
||||||
sandboxId: string,
|
sandboxId: string,
|
||||||
path: string,
|
path: string,
|
||||||
|
@ -1146,7 +1159,12 @@ export const listSandboxFiles = async (
|
||||||
} = await supabase.auth.getSession();
|
} = await supabase.auth.getSession();
|
||||||
|
|
||||||
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files`);
|
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files`);
|
||||||
url.searchParams.append('path', path);
|
|
||||||
|
// Normalize the path to handle Unicode escape sequences
|
||||||
|
const normalizedPath = normalizePathWithUnicode(path);
|
||||||
|
|
||||||
|
// Properly encode the path parameter for UTF-8 support
|
||||||
|
url.searchParams.append('path', normalizedPath);
|
||||||
|
|
||||||
const headers: Record<string, string> = {};
|
const headers: Record<string, string> = {};
|
||||||
if (session?.access_token) {
|
if (session?.access_token) {
|
||||||
|
@ -1189,7 +1207,12 @@ export const getSandboxFileContent = async (
|
||||||
} = await supabase.auth.getSession();
|
} = await supabase.auth.getSession();
|
||||||
|
|
||||||
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files/content`);
|
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files/content`);
|
||||||
url.searchParams.append('path', path);
|
|
||||||
|
// Normalize the path to handle Unicode escape sequences
|
||||||
|
const normalizedPath = normalizePathWithUnicode(path);
|
||||||
|
|
||||||
|
// Properly encode the path parameter for UTF-8 support
|
||||||
|
url.searchParams.append('path', normalizedPath);
|
||||||
|
|
||||||
const headers: Record<string, string> = {};
|
const headers: Record<string, string> = {};
|
||||||
if (session?.access_token) {
|
if (session?.access_token) {
|
||||||
|
|
Loading…
Reference in New Issue