mirror of https://github.com/kortix-ai/suna.git
Merge pull request #292 from kubet/feat/attachment-rework
fix: normalize path
This commit is contained in:
commit
e58447c4bf
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import urllib.parse
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, UploadFile, File, HTTPException, APIRouter, Form, Depends, Request
|
||||
|
@ -30,6 +31,41 @@ class FileInfo(BaseModel):
|
|||
mod_time: str
|
||||
permissions: Optional[str] = None
|
||||
|
||||
def normalize_path(path: str) -> str:
|
||||
"""
|
||||
Normalize a path to ensure proper UTF-8 encoding and handling.
|
||||
|
||||
Args:
|
||||
path: The file path, potentially containing URL-encoded characters
|
||||
|
||||
Returns:
|
||||
Normalized path with proper UTF-8 encoding
|
||||
"""
|
||||
try:
|
||||
# First, ensure the path is properly URL-decoded
|
||||
decoded_path = urllib.parse.unquote(path)
|
||||
|
||||
# Handle Unicode escape sequences like \u0308
|
||||
try:
|
||||
# Replace Python-style Unicode escapes (\u0308) with actual characters
|
||||
# This handles cases where the Unicode escape sequence is part of the URL
|
||||
import re
|
||||
unicode_pattern = re.compile(r'\\u([0-9a-fA-F]{4})')
|
||||
|
||||
def replace_unicode(match):
|
||||
hex_val = match.group(1)
|
||||
return chr(int(hex_val, 16))
|
||||
|
||||
decoded_path = unicode_pattern.sub(replace_unicode, decoded_path)
|
||||
except Exception as unicode_err:
|
||||
logger.warning(f"Error processing Unicode escapes in path '{path}': {str(unicode_err)}")
|
||||
|
||||
logger.debug(f"Normalized path from '{path}' to '{decoded_path}'")
|
||||
return decoded_path
|
||||
except Exception as e:
|
||||
logger.error(f"Error normalizing path '{path}': {str(e)}")
|
||||
return path # Return original path if decoding fails
|
||||
|
||||
async def verify_sandbox_access(client, sandbox_id: str, user_id: Optional[str] = None):
|
||||
"""
|
||||
Verify that a user has access to a specific sandbox based on account membership.
|
||||
|
@ -114,6 +150,9 @@ async def create_file(
|
|||
user_id: Optional[str] = Depends(get_optional_user_id)
|
||||
):
|
||||
"""Create a file in the sandbox using direct file upload"""
|
||||
# Normalize the path to handle UTF-8 encoding correctly
|
||||
path = normalize_path(path)
|
||||
|
||||
logger.info(f"Received file upload request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
||||
client = await db.client
|
||||
|
||||
|
@ -144,6 +183,9 @@ async def list_files(
|
|||
user_id: Optional[str] = Depends(get_optional_user_id)
|
||||
):
|
||||
"""List files and directories at the specified path"""
|
||||
# Normalize the path to handle UTF-8 encoding correctly
|
||||
path = normalize_path(path)
|
||||
|
||||
logger.info(f"Received list files request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
||||
client = await db.client
|
||||
|
||||
|
@ -186,7 +228,14 @@ async def read_file(
|
|||
user_id: Optional[str] = Depends(get_optional_user_id)
|
||||
):
|
||||
"""Read a file from the sandbox"""
|
||||
# Normalize the path to handle UTF-8 encoding correctly
|
||||
original_path = path
|
||||
path = normalize_path(path)
|
||||
|
||||
logger.info(f"Received file read request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
|
||||
if original_path != path:
|
||||
logger.info(f"Normalized path from '{original_path}' to '{path}'")
|
||||
|
||||
client = await db.client
|
||||
|
||||
# Verify the user has access to this sandbox
|
||||
|
@ -196,17 +245,59 @@ async def read_file(
|
|||
# Get sandbox using the safer method
|
||||
sandbox = await get_sandbox_by_id_safely(client, sandbox_id)
|
||||
|
||||
# Verify the file exists first
|
||||
try:
|
||||
filename = os.path.basename(path)
|
||||
parent_dir = os.path.dirname(path)
|
||||
|
||||
# List files in the parent directory to check if the file exists
|
||||
files_in_dir = sandbox.fs.list_files(parent_dir)
|
||||
|
||||
# Look for the target file with exact name match
|
||||
file_exists = any(file.name == filename for file in files_in_dir)
|
||||
|
||||
if not file_exists:
|
||||
logger.warning(f"File not found: {path} in sandbox {sandbox_id}")
|
||||
|
||||
# Try to find similar files to help diagnose
|
||||
close_matches = [file.name for file in files_in_dir if filename.lower() in file.name.lower()]
|
||||
error_detail = f"File '{filename}' not found in directory '{parent_dir}'"
|
||||
|
||||
if close_matches:
|
||||
error_detail += f". Similar files in the directory: {', '.join(close_matches)}"
|
||||
|
||||
raise HTTPException(status_code=404, detail=error_detail)
|
||||
except Exception as list_err:
|
||||
# If we can't list files, continue with the download attempt
|
||||
logger.warning(f"Error checking if file exists: {str(list_err)}")
|
||||
|
||||
# Read file
|
||||
content = sandbox.fs.download_file(path)
|
||||
try:
|
||||
content = sandbox.fs.download_file(path)
|
||||
except Exception as download_err:
|
||||
logger.error(f"Error downloading file {path} from sandbox {sandbox_id}: {str(download_err)}")
|
||||
raise HTTPException(
|
||||
status_code=404,
|
||||
detail=f"Failed to download file: {str(download_err)}"
|
||||
)
|
||||
|
||||
# Return a Response object with the content directly
|
||||
filename = os.path.basename(path)
|
||||
logger.info(f"Successfully read file {filename} from sandbox {sandbox_id}")
|
||||
|
||||
# Ensure proper encoding by explicitly using UTF-8 for the filename in Content-Disposition header
|
||||
# This applies RFC 5987 encoding for the filename to support non-ASCII characters
|
||||
encoded_filename = filename.encode('utf-8').decode('latin-1')
|
||||
content_disposition = f"attachment; filename*=UTF-8''{encoded_filename}"
|
||||
|
||||
return Response(
|
||||
content=content,
|
||||
media_type="application/octet-stream",
|
||||
headers={"Content-Disposition": f"attachment; filename={filename}"}
|
||||
headers={"Content-Disposition": content_disposition}
|
||||
)
|
||||
except HTTPException:
|
||||
# Re-raise HTTP exceptions without wrapping
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading file in sandbox {sandbox_id}: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
|
|
@ -9,9 +9,9 @@ import { FileCache } from '@/hooks/use-cached-file';
|
|||
import { useAuth } from '@/components/AuthProvider';
|
||||
import { Project } from '@/lib/api';
|
||||
import {
|
||||
extractPrimaryParam,
|
||||
getToolIcon,
|
||||
safeJsonParse,
|
||||
extractPrimaryParam,
|
||||
getToolIcon,
|
||||
safeJsonParse,
|
||||
} from '@/components/thread/utils';
|
||||
|
||||
// Define the set of tags whose raw XML should be hidden during streaming
|
||||
|
|
|
@ -124,7 +124,19 @@ function getFileUrl(sandboxId: string | undefined, path: string): string {
|
|||
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
|
||||
}
|
||||
|
||||
// Handle any potential Unicode escape sequences
|
||||
try {
|
||||
// Replace escaped Unicode sequences with actual characters
|
||||
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||
return String.fromCharCode(parseInt(hexCode, 16));
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('Error processing Unicode escapes in path:', e);
|
||||
}
|
||||
|
||||
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
||||
|
||||
// Properly encode the path parameter for UTF-8 support
|
||||
url.searchParams.append('path', path);
|
||||
|
||||
return url.toString();
|
||||
|
|
|
@ -24,6 +24,15 @@ function normalizePath(path: string): string {
|
|||
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
|
||||
}
|
||||
|
||||
// Handle Unicode escape sequences like \u0308
|
||||
try {
|
||||
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||
return String.fromCharCode(parseInt(hexCode, 16));
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('Error processing Unicode escapes in path:', e);
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
|
@ -111,6 +120,8 @@ export function useCachedFile<T = string>(
|
|||
const normalizedPath = normalizePath(filePath || '');
|
||||
|
||||
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
||||
|
||||
// Properly encode the path parameter for UTF-8 support
|
||||
url.searchParams.append('path', normalizedPath);
|
||||
|
||||
// Fetch with authentication
|
||||
|
@ -377,6 +388,11 @@ export const FileCache = {
|
|||
console.log(`[FILE CACHE] Preloading ${filePaths.length} files for sandbox ${sandboxId}`);
|
||||
|
||||
return Promise.all(filePaths.map(async (path) => {
|
||||
// Handle Unicode escape sequences in paths
|
||||
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||
return String.fromCharCode(parseInt(hexCode, 16));
|
||||
});
|
||||
|
||||
const normalizedPath = normalizePath(path);
|
||||
const key = getCacheKey(sandboxId, path);
|
||||
|
||||
|
@ -389,6 +405,8 @@ export const FileCache = {
|
|||
|
||||
try {
|
||||
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
||||
|
||||
// Properly encode the path parameter for UTF-8 support
|
||||
url.searchParams.append('path', normalizedPath);
|
||||
|
||||
const response = await fetch(url.toString(), {
|
||||
|
@ -491,6 +509,11 @@ export async function getCachedFile(
|
|||
? 'blob'
|
||||
: (options.contentType || 'text');
|
||||
|
||||
// First ensure the file path has any Unicode escape sequences properly handled
|
||||
filePath = filePath.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||
return String.fromCharCode(parseInt(hexCode, 16));
|
||||
});
|
||||
|
||||
const key = getCacheKey(sandboxId, filePath);
|
||||
const startTime = performance.now();
|
||||
|
||||
|
@ -535,7 +558,10 @@ export async function getCachedFile(
|
|||
|
||||
try {
|
||||
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
|
||||
url.searchParams.append('path', normalizePath(filePath));
|
||||
const normalizedPath = normalizePath(filePath);
|
||||
|
||||
// Properly encode the path parameter for UTF-8 characters
|
||||
url.searchParams.append('path', normalizedPath);
|
||||
|
||||
const response = await fetch(url.toString(), {
|
||||
headers: {
|
||||
|
|
|
@ -1135,6 +1135,19 @@ export const createSandboxFileJson = async (
|
|||
}
|
||||
};
|
||||
|
||||
// Helper function to normalize file paths with Unicode characters
|
||||
function normalizePathWithUnicode(path: string): string {
|
||||
try {
|
||||
// Replace escaped Unicode sequences with actual characters
|
||||
return path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
|
||||
return String.fromCharCode(parseInt(hexCode, 16));
|
||||
});
|
||||
} catch (e) {
|
||||
console.error('Error processing Unicode escapes in path:', e);
|
||||
return path;
|
||||
}
|
||||
}
|
||||
|
||||
export const listSandboxFiles = async (
|
||||
sandboxId: string,
|
||||
path: string,
|
||||
|
@ -1146,7 +1159,12 @@ export const listSandboxFiles = async (
|
|||
} = await supabase.auth.getSession();
|
||||
|
||||
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files`);
|
||||
url.searchParams.append('path', path);
|
||||
|
||||
// Normalize the path to handle Unicode escape sequences
|
||||
const normalizedPath = normalizePathWithUnicode(path);
|
||||
|
||||
// Properly encode the path parameter for UTF-8 support
|
||||
url.searchParams.append('path', normalizedPath);
|
||||
|
||||
const headers: Record<string, string> = {};
|
||||
if (session?.access_token) {
|
||||
|
@ -1189,7 +1207,12 @@ export const getSandboxFileContent = async (
|
|||
} = await supabase.auth.getSession();
|
||||
|
||||
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files/content`);
|
||||
url.searchParams.append('path', path);
|
||||
|
||||
// Normalize the path to handle Unicode escape sequences
|
||||
const normalizedPath = normalizePathWithUnicode(path);
|
||||
|
||||
// Properly encode the path parameter for UTF-8 support
|
||||
url.searchParams.append('path', normalizedPath);
|
||||
|
||||
const headers: Record<string, string> = {};
|
||||
if (session?.access_token) {
|
||||
|
|
Loading…
Reference in New Issue