From 6caa0e3adfa6028bde943a717de92a35aa11692d Mon Sep 17 00:00:00 2001 From: Vukasin Date: Mon, 12 May 2025 22:09:14 +0200 Subject: [PATCH 1/2] fix: normalize path --- backend/sandbox/api.py | 95 ++++++++++++++++++- .../src/components/thread/file-attachment.tsx | 12 +++ frontend/src/hooks/use-cached-file.ts | 28 +++++- frontend/src/lib/api.ts | 27 +++++- 4 files changed, 157 insertions(+), 5 deletions(-) diff --git a/backend/sandbox/api.py b/backend/sandbox/api.py index 068c3824..4dafe5ff 100644 --- a/backend/sandbox/api.py +++ b/backend/sandbox/api.py @@ -1,4 +1,5 @@ import os +import urllib.parse from typing import Optional from fastapi import FastAPI, UploadFile, File, HTTPException, APIRouter, Form, Depends, Request @@ -30,6 +31,41 @@ class FileInfo(BaseModel): mod_time: str permissions: Optional[str] = None +def normalize_path(path: str) -> str: + """ + Normalize a path to ensure proper UTF-8 encoding and handling. + + Args: + path: The file path, potentially containing URL-encoded characters + + Returns: + Normalized path with proper UTF-8 encoding + """ + try: + # First, ensure the path is properly URL-decoded + decoded_path = urllib.parse.unquote(path) + + # Handle Unicode escape sequences like \u0308 + try: + # Replace Python-style Unicode escapes (\u0308) with actual characters + # This handles cases where the Unicode escape sequence is part of the URL + import re + unicode_pattern = re.compile(r'\\u([0-9a-fA-F]{4})') + + def replace_unicode(match): + hex_val = match.group(1) + return chr(int(hex_val, 16)) + + decoded_path = unicode_pattern.sub(replace_unicode, decoded_path) + except Exception as unicode_err: + logger.warning(f"Error processing Unicode escapes in path '{path}': {str(unicode_err)}") + + logger.debug(f"Normalized path from '{path}' to '{decoded_path}'") + return decoded_path + except Exception as e: + logger.error(f"Error normalizing path '{path}': {str(e)}") + return path # Return original path if decoding fails + async def verify_sandbox_access(client, sandbox_id: str, user_id: Optional[str] = None): """ Verify that a user has access to a specific sandbox based on account membership. @@ -114,6 +150,9 @@ async def create_file( user_id: Optional[str] = Depends(get_optional_user_id) ): """Create a file in the sandbox using direct file upload""" + # Normalize the path to handle UTF-8 encoding correctly + path = normalize_path(path) + logger.info(f"Received file upload request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}") client = await db.client @@ -144,6 +183,9 @@ async def list_files( user_id: Optional[str] = Depends(get_optional_user_id) ): """List files and directories at the specified path""" + # Normalize the path to handle UTF-8 encoding correctly + path = normalize_path(path) + logger.info(f"Received list files request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}") client = await db.client @@ -186,7 +228,14 @@ async def read_file( user_id: Optional[str] = Depends(get_optional_user_id) ): """Read a file from the sandbox""" + # Normalize the path to handle UTF-8 encoding correctly + original_path = path + path = normalize_path(path) + logger.info(f"Received file read request for sandbox {sandbox_id}, path: {path}, user_id: {user_id}") + if original_path != path: + logger.info(f"Normalized path from '{original_path}' to '{path}'") + client = await db.client # Verify the user has access to this sandbox @@ -196,17 +245,59 @@ async def read_file( # Get sandbox using the safer method sandbox = await get_sandbox_by_id_safely(client, sandbox_id) + # Verify the file exists first + try: + filename = os.path.basename(path) + parent_dir = os.path.dirname(path) + + # List files in the parent directory to check if the file exists + files_in_dir = sandbox.fs.list_files(parent_dir) + + # Look for the target file with exact name match + file_exists = any(file.name == filename for file in files_in_dir) + + if not file_exists: + logger.warning(f"File not found: {path} in sandbox {sandbox_id}") + + # Try to find similar files to help diagnose + close_matches = [file.name for file in files_in_dir if filename.lower() in file.name.lower()] + error_detail = f"File '{filename}' not found in directory '{parent_dir}'" + + if close_matches: + error_detail += f". Similar files in the directory: {', '.join(close_matches)}" + + raise HTTPException(status_code=404, detail=error_detail) + except Exception as list_err: + # If we can't list files, continue with the download attempt + logger.warning(f"Error checking if file exists: {str(list_err)}") + # Read file - content = sandbox.fs.download_file(path) + try: + content = sandbox.fs.download_file(path) + except Exception as download_err: + logger.error(f"Error downloading file {path} from sandbox {sandbox_id}: {str(download_err)}") + raise HTTPException( + status_code=404, + detail=f"Failed to download file: {str(download_err)}" + ) # Return a Response object with the content directly filename = os.path.basename(path) logger.info(f"Successfully read file {filename} from sandbox {sandbox_id}") + + # Ensure proper encoding by explicitly using UTF-8 for the filename in Content-Disposition header + # This applies RFC 5987 encoding for the filename to support non-ASCII characters + encoded_filename = filename.encode('utf-8').decode('latin-1') + content_disposition = f"attachment; filename*=UTF-8''{encoded_filename}" + return Response( content=content, media_type="application/octet-stream", - headers={"Content-Disposition": f"attachment; filename={filename}"} + headers={"Content-Disposition": content_disposition} ) + except HTTPException: + # Re-raise HTTP exceptions without wrapping + raise except Exception as e: logger.error(f"Error reading file in sandbox {sandbox_id}: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) diff --git a/frontend/src/components/thread/file-attachment.tsx b/frontend/src/components/thread/file-attachment.tsx index 41d0da1d..7f399f51 100644 --- a/frontend/src/components/thread/file-attachment.tsx +++ b/frontend/src/components/thread/file-attachment.tsx @@ -124,7 +124,19 @@ function getFileUrl(sandboxId: string | undefined, path: string): string { path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`; } + // Handle any potential Unicode escape sequences + try { + // Replace escaped Unicode sequences with actual characters + path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => { + return String.fromCharCode(parseInt(hexCode, 16)); + }); + } catch (e) { + console.error('Error processing Unicode escapes in path:', e); + } + const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`); + + // Properly encode the path parameter for UTF-8 support url.searchParams.append('path', path); return url.toString(); diff --git a/frontend/src/hooks/use-cached-file.ts b/frontend/src/hooks/use-cached-file.ts index 70de5fd1..a5cb571c 100644 --- a/frontend/src/hooks/use-cached-file.ts +++ b/frontend/src/hooks/use-cached-file.ts @@ -24,6 +24,15 @@ function normalizePath(path: string): string { path = `/workspace/${path.startsWith('/') ? path.substring(1) : path}`; } + // Handle Unicode escape sequences like \u0308 + try { + path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => { + return String.fromCharCode(parseInt(hexCode, 16)); + }); + } catch (e) { + console.error('Error processing Unicode escapes in path:', e); + } + return path; } @@ -111,6 +120,8 @@ export function useCachedFile( const normalizedPath = normalizePath(filePath || ''); const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`); + + // Properly encode the path parameter for UTF-8 support url.searchParams.append('path', normalizedPath); // Fetch with authentication @@ -377,6 +388,11 @@ export const FileCache = { console.log(`[FILE CACHE] Preloading ${filePaths.length} files for sandbox ${sandboxId}`); return Promise.all(filePaths.map(async (path) => { + // Handle Unicode escape sequences in paths + path = path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => { + return String.fromCharCode(parseInt(hexCode, 16)); + }); + const normalizedPath = normalizePath(path); const key = getCacheKey(sandboxId, path); @@ -389,6 +405,8 @@ export const FileCache = { try { const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`); + + // Properly encode the path parameter for UTF-8 support url.searchParams.append('path', normalizedPath); const response = await fetch(url.toString(), { @@ -491,6 +509,11 @@ export async function getCachedFile( ? 'blob' : (options.contentType || 'text'); + // First ensure the file path has any Unicode escape sequences properly handled + filePath = filePath.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => { + return String.fromCharCode(parseInt(hexCode, 16)); + }); + const key = getCacheKey(sandboxId, filePath); const startTime = performance.now(); @@ -535,7 +558,10 @@ export async function getCachedFile( try { const url = new URL(`${process.env.NEXT_PUBLIC_BACKEND_URL}/sandboxes/${sandboxId}/files/content`); - url.searchParams.append('path', normalizePath(filePath)); + const normalizedPath = normalizePath(filePath); + + // Properly encode the path parameter for UTF-8 characters + url.searchParams.append('path', normalizedPath); const response = await fetch(url.toString(), { headers: { diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index d52e0b5c..137aa1a3 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -1135,6 +1135,19 @@ export const createSandboxFileJson = async ( } }; +// Helper function to normalize file paths with Unicode characters +function normalizePathWithUnicode(path: string): string { + try { + // Replace escaped Unicode sequences with actual characters + return path.replace(/\\u([0-9a-fA-F]{4})/g, (_, hexCode) => { + return String.fromCharCode(parseInt(hexCode, 16)); + }); + } catch (e) { + console.error('Error processing Unicode escapes in path:', e); + return path; + } +} + export const listSandboxFiles = async ( sandboxId: string, path: string, @@ -1146,7 +1159,12 @@ export const listSandboxFiles = async ( } = await supabase.auth.getSession(); const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files`); - url.searchParams.append('path', path); + + // Normalize the path to handle Unicode escape sequences + const normalizedPath = normalizePathWithUnicode(path); + + // Properly encode the path parameter for UTF-8 support + url.searchParams.append('path', normalizedPath); const headers: Record = {}; if (session?.access_token) { @@ -1189,7 +1207,12 @@ export const getSandboxFileContent = async ( } = await supabase.auth.getSession(); const url = new URL(`${API_URL}/sandboxes/${sandboxId}/files/content`); - url.searchParams.append('path', path); + + // Normalize the path to handle Unicode escape sequences + const normalizedPath = normalizePathWithUnicode(path); + + // Properly encode the path parameter for UTF-8 support + url.searchParams.append('path', normalizedPath); const headers: Record = {}; if (session?.access_token) { From 69cd04e3b8ae6fccb078aeb5d1867de484d61d77 Mon Sep 17 00:00:00 2001 From: Vukasin Date: Tue, 13 May 2025 18:14:45 +0200 Subject: [PATCH 2/2] fix: removed leftover import --- frontend/src/components/thread/content/ThreadContent.tsx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/frontend/src/components/thread/content/ThreadContent.tsx b/frontend/src/components/thread/content/ThreadContent.tsx index 166ab524..fb2dd698 100644 --- a/frontend/src/components/thread/content/ThreadContent.tsx +++ b/frontend/src/components/thread/content/ThreadContent.tsx @@ -4,15 +4,14 @@ import { ArrowDown, CircleDashed } from 'lucide-react'; import { Button } from '@/components/ui/button'; import { Markdown } from '@/components/ui/markdown'; import { UnifiedMessage, ParsedContent, ParsedMetadata } from '@/components/thread/types'; -import { safeJsonParse } from '@/components/thread/utils'; import { FileAttachmentGrid } from '@/components/thread/file-attachment'; import { FileCache } from '@/hooks/use-cached-file'; import { useAuth } from '@/components/AuthProvider'; import { Project } from '@/lib/api'; import { - extractPrimaryParam, - getToolIcon, - safeJsonParse, + extractPrimaryParam, + getToolIcon, + safeJsonParse, } from '@/components/thread/utils'; // Define the set of tags whose raw XML should be hidden during streaming