fix: normalize path

This commit is contained in:
Vukasin 2025-05-12 22:09:14 +02:00
parent 49e0f2ea8c
commit 6caa0e3adf
4 changed files with 157 additions and 5 deletions

View File

@ -1,4 +1,5 @@
import os import os
import urllib.parse
from typing import Optional from typing import Optional
from fastapi import FastAPI, UploadFile, File, HTTPException, APIRouter, Form, Depends, Request from fastapi import FastAPI, UploadFile, File, HTTPException, APIRouter, Form, Depends, Request
@ -30,6 +31,41 @@ class FileInfo(BaseModel):
mod_time: str mod_time: str
permissions: Optional[str] = None permissions: Optional[str] = None
def normalize_path(path: str) -> str:
"""
Normalize a path to ensure proper UTF-8 encoding and handling.
Args:
path: The file path, potentially containing URL-encoded characters
Returns:
Normalized path with proper UTF-8 encoding
"""
try:
# First, ensure the path is properly URL-decoded
decoded_path = urllib.parse.unquote(path)
# Handle Unicode escape sequences like \u0308
try:
# Replace Python-style Unicode escapes (\u0308) with actual characters
# This handles cases where the Unicode escape sequence is part of the URL
import re
unicode_pattern = re.compile(r'\\u([0-9a-fA-F]{4})')
def replace_unicode(match):
hex_val = match.group(1)
return chr(int(hex_val, 16))
decoded_path = unicode_pattern.sub(replace_unicode, decoded_path)
except Exception as unicode_err:
logger.warning(f"Error processing Unicode escapes in path '{path}': {str(unicode_err)}")
logger.debug(f"Normalized path from '{path}' to '{decoded_path}'")
return decoded_path
except Exception as e:
logger.error(f"Error normalizing path '{path}': {str(e)}")
return path # Return original path if decoding fails
async def verify_sandbox_access(client, sandbox_id: str, user_id: Optional[str] = None): async def verify_sandbox_access(client, sandbox_id: str, user_id: Optional[str] = None):
""" """
Verify that a user has access to a specific sandbox based on account membership. Verify that a user has access to a specific sandbox based on account membership.
@ -114,6 +150,9 @@ async def create_file(
user_id: Optional[str] = Depends(get_optional_user_id) user_id: Optional[str] = Depends(get_optional_user_id)
): ):
"""Create a file in the sandbox using direct file upload""" """Create a file in the sandbox using direct file upload"""
# Normalize the path to handle UTF-8 encoding correctly
path = normalize_path(path)
logger.info(f"Received file upload request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}") logger.info(f"Received file upload request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
client = await db.client client = await db.client
@ -144,6 +183,9 @@ async def list_files(
user_id: Optional[str] = Depends(get_optional_user_id) user_id: Optional[str] = Depends(get_optional_user_id)
): ):
"""List files and directories at the specified path""" """List files and directories at the specified path"""
# Normalize the path to handle UTF-8 encoding correctly
path = normalize_path(path)
logger.info(f"Received list files request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}") logger.info(f"Received list files request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
client = await db.client client = await db.client
@ -186,7 +228,14 @@ async def read_file(
user_id: Optional[str] = Depends(get_optional_user_id) user_id: Optional[str] = Depends(get_optional_user_id)
): ):
"""Read a file from the sandbox""" """Read a file from the sandbox"""
# Normalize the path to handle UTF-8 encoding correctly
original_path = path
path = normalize_path(path)
logger.info(f"Received file read request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}") logger.info(f"Received file read request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
if original_path != path:
logger.info(f"Normalized path from '{original_path}' to '{path}'")
client = await db.client client = await db.client
# Verify the user has access to this sandbox # Verify the user has access to this sandbox
@ -196,17 +245,59 @@ async def read_file(
# Get sandbox using the safer method # Get sandbox using the safer method
sandbox = await get_sandbox_by_id_safely(client, sandbox_id) sandbox = await get_sandbox_by_id_safely(client, sandbox_id)
# Verify the file exists first
try:
filename = os.path.basename(path)
parent_dir = os.path.dirname(path)
# List files in the parent directory to check if the file exists
files_in_dir = sandbox.fs.list_files(parent_dir)
# Look for the target file with exact name match
file_exists = any(file.name == filename for file in files_in_dir)
if not file_exists:
logger.warning(f"File not found: {path} in sandbox {sandbox_id}")
# Try to find similar files to help diagnose
close_matches = [file.name for file in files_in_dir if filename.lower() in file.name.lower()]
error_detail = f"File '{filename}' not found in directory '{parent_dir}'"
if close_matches:
error_detail += f". Similar files in the directory: {', '.join(close_matches)}"
raise HTTPException(status_code=404, detail=error_detail)
except Exception as list_err:
# If we can't list files, continue with the download attempt
logger.warning(f"Error checking if file exists: {str(list_err)}")
# Read file # Read file
content = sandbox.fs.download_file(path) try:
content = sandbox.fs.download_file(path)
except Exception as download_err:
logger.error(f"Error downloading file {path} from sandbox {sandbox_id}: {str(download_err)}")
raise HTTPException(
status_code=404,
detail=f"Failed to download file: {str(download_err)}"
)
# Return a Response object with the content directly # Return a Response object with the content directly
filename = os.path.basename(path) filename = os.path.basename(path)
logger.info(f"Successfully read file {filename} from sandbox {sandbox_id}") logger.info(f"Successfully read file {filename} from sandbox {sandbox_id}")
# Ensure proper encoding by explicitly using UTF-8 for the filename in Content-Disposition header
# This applies RFC 5987 encoding for the filename to support non-ASCII characters
encoded_filename = filename.encode('utf-8').decode('latin-1')
content_disposition = f"attachment; filename*=UTF-8''{encoded_filename}"
return Response( return Response(
content=content, content=content,
media_type="application/octet-stream", media_type="application/octet-stream",
headers={"Content-Disposition": f"attachment; filename={filename}"} headers={"Content-Disposition": content_disposition}
) )
except HTTPException:
# Re-raise HTTP exceptions without wrapping
raise
except Exception as e: except Exception as e:
logger.error(f"Error reading file in sandbox {sandbox_id}: {str(e)}") logger.error(f"Error reading file in sandbox {sandbox_id}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))

View File

@ -124,7 +124,19 @@ function getFileUrl(sandboxId: string | undefined, path: string): string {
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`; path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
} }
// Handle any potential Unicode escape sequences
try {
// Replace escaped Unicode sequences with actual characters
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
} catch (e) {
console.error('Error processing Unicode escapes in path:', e);
}
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`); const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', path); url.searchParams.append('path', path);
return url.toString(); return url.toString();

View File

@ -24,6 +24,15 @@ function normalizePath(path: string): string {
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`; path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
} }
// Handle Unicode escape sequences like \u0308
try {
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
} catch (e) {
console.error('Error processing Unicode escapes in path:', e);
}
return path; return path;
} }
@ -111,6 +120,8 @@ export function useCachedFile<T = string>(
const normalizedPath = normalizePath(filePath || ''); const normalizedPath = normalizePath(filePath || '');
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`); const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', normalizedPath); url.searchParams.append('path', normalizedPath);
// Fetch with authentication // Fetch with authentication
@ -377,6 +388,11 @@ export const FileCache = {
console.log(`[FILE CACHE] Preloading ${filePaths.length} files for sandbox ${sandboxId}`); console.log(`[FILE CACHE] Preloading ${filePaths.length} files for sandbox ${sandboxId}`);
return Promise.all(filePaths.map(async (path) => { return Promise.all(filePaths.map(async (path) => {
// Handle Unicode escape sequences in paths
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
const normalizedPath = normalizePath(path); const normalizedPath = normalizePath(path);
const key = getCacheKey(sandboxId, path); const key = getCacheKey(sandboxId, path);
@ -389,6 +405,8 @@ export const FileCache = {
try { try {
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`); const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', normalizedPath); url.searchParams.append('path', normalizedPath);
const response = await fetch(url.toString(), { const response = await fetch(url.toString(), {
@ -491,6 +509,11 @@ export async function getCachedFile(
? 'blob' ? 'blob'
: (options.contentType || 'text'); : (options.contentType || 'text');
// First ensure the file path has any Unicode escape sequences properly handled
filePath = filePath.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
const key = getCacheKey(sandboxId, filePath); const key = getCacheKey(sandboxId, filePath);
const startTime = performance.now(); const startTime = performance.now();
@ -535,7 +558,10 @@ export async function getCachedFile(
try { try {
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`); const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
url.searchParams.append('path', normalizePath(filePath)); const normalizedPath = normalizePath(filePath);
// Properly encode the path parameter for UTF-8 characters
url.searchParams.append('path', normalizedPath);
const response = await fetch(url.toString(), { const response = await fetch(url.toString(), {
headers: { headers: {

View File

@ -1135,6 +1135,19 @@ export const createSandboxFileJson = async (
} }
}; };
// Helper function to normalize file paths with Unicode characters
function normalizePathWithUnicode(path: string): string {
try {
// Replace escaped Unicode sequences with actual characters
return path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
} catch (e) {
console.error('Error processing Unicode escapes in path:', e);
return path;
}
}
export const listSandboxFiles = async ( export const listSandboxFiles = async (
sandboxId: string, sandboxId: string,
path: string, path: string,
@ -1146,7 +1159,12 @@ export const listSandboxFiles = async (
} = await supabase.auth.getSession(); } = await supabase.auth.getSession();
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files`); const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files`);
url.searchParams.append('path', path);
// Normalize the path to handle Unicode escape sequences
const normalizedPath = normalizePathWithUnicode(path);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', normalizedPath);
const headers: Record<string, string> = {}; const headers: Record<string, string> = {};
if (session?.access_token) { if (session?.access_token) {
@ -1189,7 +1207,12 @@ export const getSandboxFileContent = async (
} = await supabase.auth.getSession(); } = await supabase.auth.getSession();
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files/content`); const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files/content`);
url.searchParams.append('path', path);
// Normalize the path to handle Unicode escape sequences
const normalizedPath = normalizePathWithUnicode(path);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', normalizedPath);
const headers: Record<string, string> = {}; const headers: Record<string, string> = {};
if (session?.access_token) { if (session?.access_token) {