Merge pull request #292 from kubet/feat/attachment-rework

fix: normalize path
This commit is contained in:
Marko Kraemer 2025-05-13 20:37:07 +02:00 committed by GitHub
commit e58447c4bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 160 additions and 8 deletions

View File

@ -1,4 +1,5 @@
import os
import urllib.parse
from typing import Optional
from fastapi import FastAPI, UploadFile, File, HTTPException, APIRouter, Form, Depends, Request
@ -30,6 +31,41 @@ class FileInfo(BaseModel):
mod_time: str
permissions: Optional[str] = None
def normalize_path(path: str) -> str:
"""
Normalize a path to ensure proper UTF-8 encoding and handling.
Args:
path: The file path, potentially containing URL-encoded characters
Returns:
Normalized path with proper UTF-8 encoding
"""
try:
# First, ensure the path is properly URL-decoded
decoded_path = urllib.parse.unquote(path)
# Handle Unicode escape sequences like \u0308
try:
# Replace Python-style Unicode escapes (\u0308) with actual characters
# This handles cases where the Unicode escape sequence is part of the URL
import re
unicode_pattern = re.compile(r'\\u([0-9a-fA-F]{4})')
def replace_unicode(match):
hex_val = match.group(1)
return chr(int(hex_val, 16))
decoded_path = unicode_pattern.sub(replace_unicode, decoded_path)
except Exception as unicode_err:
logger.warning(f"Error processing Unicode escapes in path '{path}': {str(unicode_err)}")
logger.debug(f"Normalized path from '{path}' to '{decoded_path}'")
return decoded_path
except Exception as e:
logger.error(f"Error normalizing path '{path}': {str(e)}")
return path # Return original path if decoding fails
async def verify_sandbox_access(client, sandbox_id: str, user_id: Optional[str] = None):
"""
Verify that a user has access to a specific sandbox based on account membership.
@ -114,6 +150,9 @@ async def create_file(
user_id: Optional[str] = Depends(get_optional_user_id)
):
"""Create a file in the sandbox using direct file upload"""
# Normalize the path to handle UTF-8 encoding correctly
path = normalize_path(path)
logger.info(f"Received file upload request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
client = await db.client
@ -144,6 +183,9 @@ async def list_files(
user_id: Optional[str] = Depends(get_optional_user_id)
):
"""List files and directories at the specified path"""
# Normalize the path to handle UTF-8 encoding correctly
path = normalize_path(path)
logger.info(f"Received list files request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
client = await db.client
@ -186,7 +228,14 @@ async def read_file(
user_id: Optional[str] = Depends(get_optional_user_id)
):
"""Read a file from the sandbox"""
# Normalize the path to handle UTF-8 encoding correctly
original_path = path
path = normalize_path(path)
logger.info(f"Received file read request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}")
if original_path != path:
logger.info(f"Normalized path from '{original_path}' to '{path}'")
client = await db.client
# Verify the user has access to this sandbox
@ -196,17 +245,59 @@ async def read_file(
# Get sandbox using the safer method
sandbox = await get_sandbox_by_id_safely(client, sandbox_id)
# Verify the file exists first
try:
filename = os.path.basename(path)
parent_dir = os.path.dirname(path)
# List files in the parent directory to check if the file exists
files_in_dir = sandbox.fs.list_files(parent_dir)
# Look for the target file with exact name match
file_exists = any(file.name == filename for file in files_in_dir)
if not file_exists:
logger.warning(f"File not found: {path} in sandbox {sandbox_id}")
# Try to find similar files to help diagnose
close_matches = [file.name for file in files_in_dir if filename.lower() in file.name.lower()]
error_detail = f"File '{filename}' not found in directory '{parent_dir}'"
if close_matches:
error_detail += f". Similar files in the directory: {', '.join(close_matches)}"
raise HTTPException(status_code=404, detail=error_detail)
except Exception as list_err:
# If we can't list files, continue with the download attempt
logger.warning(f"Error checking if file exists: {str(list_err)}")
# Read file
content = sandbox.fs.download_file(path)
try:
content = sandbox.fs.download_file(path)
except Exception as download_err:
logger.error(f"Error downloading file {path} from sandbox {sandbox_id}: {str(download_err)}")
raise HTTPException(
status_code=404,
detail=f"Failed to download file: {str(download_err)}"
)
# Return a Response object with the content directly
filename = os.path.basename(path)
logger.info(f"Successfully read file {filename} from sandbox {sandbox_id}")
# Ensure proper encoding by explicitly using UTF-8 for the filename in Content-Disposition header
# This applies RFC 5987 encoding for the filename to support non-ASCII characters
encoded_filename = filename.encode('utf-8').decode('latin-1')
content_disposition = f"attachment; filename*=UTF-8''{encoded_filename}"
return Response(
content=content,
media_type="application/octet-stream",
headers={"Content-Disposition": f"attachment; filename={filename}"}
headers={"Content-Disposition": content_disposition}
)
except HTTPException:
# Re-raise HTTP exceptions without wrapping
raise
except Exception as e:
logger.error(f"Error reading file in sandbox {sandbox_id}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))

View File

@ -9,9 +9,9 @@ import { FileCache } from '@/hooks/use-cached-file';
import { useAuth } from '@/components/AuthProvider';
import { Project } from '@/lib/api';
import {
extractPrimaryParam,
getToolIcon,
safeJsonParse,
extractPrimaryParam,
getToolIcon,
safeJsonParse,
} from '@/components/thread/utils';
// Define the set of tags whose raw XML should be hidden during streaming

View File

@ -124,7 +124,19 @@ function getFileUrl(sandboxId: string | undefined, path: string): string {
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
}
// Handle any potential Unicode escape sequences
try {
// Replace escaped Unicode sequences with actual characters
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
} catch (e) {
console.error('Error processing Unicode escapes in path:', e);
}
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', path);
return url.toString();

View File

@ -24,6 +24,15 @@ function normalizePath(path: string): string {
path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`;
}
// Handle Unicode escape sequences like \u0308
try {
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
} catch (e) {
console.error('Error processing Unicode escapes in path:', e);
}
return path;
}
@ -111,6 +120,8 @@ export function useCachedFile<T = string>(
const normalizedPath = normalizePath(filePath || '');
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', normalizedPath);
// Fetch with authentication
@ -377,6 +388,11 @@ export const FileCache = {
console.log(`[FILE CACHE] Preloading ${filePaths.length} files for sandbox ${sandboxId}`);
return Promise.all(filePaths.map(async (path) => {
// Handle Unicode escape sequences in paths
path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
const normalizedPath = normalizePath(path);
const key = getCacheKey(sandboxId, path);
@ -389,6 +405,8 @@ export const FileCache = {
try {
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', normalizedPath);
const response = await fetch(url.toString(), {
@ -491,6 +509,11 @@ export async function getCachedFile(
? 'blob'
: (options.contentType || 'text');
// First ensure the file path has any Unicode escape sequences properly handled
filePath = filePath.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
const key = getCacheKey(sandboxId, filePath);
const startTime = performance.now();
@ -535,7 +558,10 @@ export async function getCachedFile(
try {
const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`);
url.searchParams.append('path', normalizePath(filePath));
const normalizedPath = normalizePath(filePath);
// Properly encode the path parameter for UTF-8 characters
url.searchParams.append('path', normalizedPath);
const response = await fetch(url.toString(), {
headers: {

View File

@ -1135,6 +1135,19 @@ export const createSandboxFileJson = async (
}
};
// Helper function to normalize file paths with Unicode characters
function normalizePathWithUnicode(path: string): string {
try {
// Replace escaped Unicode sequences with actual characters
return path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => {
return String.fromCharCode(parseInt(hexCode, 16));
});
} catch (e) {
console.error('Error processing Unicode escapes in path:', e);
return path;
}
}
export const listSandboxFiles = async (
sandboxId: string,
path: string,
@ -1146,7 +1159,12 @@ export const listSandboxFiles = async (
} = await supabase.auth.getSession();
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files`);
url.searchParams.append('path', path);
// Normalize the path to handle Unicode escape sequences
const normalizedPath = normalizePathWithUnicode(path);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', normalizedPath);
const headers: Record<string, string> = {};
if (session?.access_token) {
@ -1189,7 +1207,12 @@ export const getSandboxFileContent = async (
} = await supabase.auth.getSession();
const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files/content`);
url.searchParams.append('path', path);
// Normalize the path to handle Unicode escape sequences
const normalizedPath = normalizePathWithUnicode(path);
// Properly encode the path parameter for UTF-8 support
url.searchParams.append('path', normalizedPath);
const headers: Record<string, string> = {};
if (session?.access_token) {