fix: filetype and visuals

This commit is contained in:
Vukasin 2025-09-18 22:36:28 +02:00
parent 44caf0460b
commit e462a7ae00
4 changed files with 121 additions and 24 deletions

View File

@ -44,6 +44,20 @@ class FileProcessor:
return f"{name}{ext}"
def _is_likely_text_file(self, file_content: bytes) -> bool:
"""Check if file content is likely text-based."""
try:
# Try to decode as text
detected = chardet.detect(file_content[:1024]) # Check first 1KB
if detected.get('confidence', 0) > 0.7:
decoded = file_content[:1024].decode(detected.get('encoding', 'utf-8'))
# Check if most characters are printable
printable_ratio = len([c for c in decoded if c.isprintable() or c.isspace()]) / len(decoded)
return printable_ratio > 0.8
except:
pass
return False
async def process_file(
self,
account_id: str,
@ -57,7 +71,16 @@ class FileProcessor:
raise ValueError(f"File too large: {len(file_content)} bytes")
file_extension = Path(filename).suffix.lower()
if file_extension not in self.SUPPORTED_EXTENSIONS:
# Check if it's text-based first
is_text_based = (
mime_type.startswith('text/') or
mime_type in ['application/json', 'application/xml', 'text/xml'] or
self._is_likely_text_file(file_content)
)
# If not text-based, check allowed extensions
if not is_text_based and file_extension not in self.SUPPORTED_EXTENSIONS:
raise ValueError(f"Unsupported file type: {file_extension}")
# Generate unique entry ID
@ -77,7 +100,8 @@ class FileProcessor:
# Extract content for summary
content = self._extract_content(file_content, filename, mime_type)
if not content:
raise ValueError("No extractable content found")
# If no content could be extracted, create a basic file info summary
content = f"File: {filename} ({len(file_content)} bytes, {mime_type})"
# Generate LLM summary
summary = await self._generate_summary(content, filename)
@ -256,7 +280,11 @@ Keep it under 200 words and make it actionable for context injection."""
file_extension = Path(filename).suffix.lower()
try:
if file_extension == '.txt' or mime_type.startswith('text/'):
# Handle text-based files (including JSON, XML, CSV, etc.)
if (file_extension in ['.txt', '.json', '.xml', '.csv', '.yml', '.yaml', '.md', '.log', '.ini', '.cfg', '.conf']
or mime_type.startswith('text/')
or mime_type in ['application/json', 'application/xml', 'text/xml']):
detected = chardet.detect(file_content)
encoding = detected.get('encoding', 'utf-8')
try:
@ -272,8 +300,21 @@ Keep it under 200 words and make it actionable for context injection."""
doc = docx.Document(io.BytesIO(file_content))
return '\n'.join(paragraph.text for paragraph in doc.paragraphs)
return ""
# For any other file type, try to decode as text (fallback)
else:
try:
detected = chardet.detect(file_content)
encoding = detected.get('encoding', 'utf-8')
content = file_content.decode(encoding)
# Only return if it seems to be mostly text content
if len([c for c in content[:1000] if c.isprintable() or c.isspace()]) > 800:
return content
except:
pass
# If we can't extract text content, return a placeholder
return f"[Binary file: {filename}] - Content cannot be extracted as text, but file is stored and available for download."
except Exception as e:
logger.error(f"Error extracting content from {filename}: {str(e)}")
return ""
return f"[Error extracting content from {filename}] - File is stored but content extraction failed: {str(e)}"

View File

@ -12,7 +12,7 @@ class SandboxKbTool(SandboxToolsBase):
def __init__(self, project_id: str, thread_manager: ThreadManager):
super().__init__(project_id, thread_manager)
self.kb_version = "0.1.0"
self.kb_version = "0.1.1"
self.kb_download_url = f"https://github.com/kortix-ai/kb-fusion/releases/download/v{self.kb_version}/kb"
async def _execute_kb_command(self, command: str) -> dict:
@ -640,6 +640,11 @@ Agent ID: {agent_id}
mime_type=mime_type
)
# Check if processing was successful
if not result.get('success', False):
error_msg = result.get('error', 'Unknown processing error')
return self.fail_response(f"Failed to process file: {error_msg}")
response_data = {
"message": f"Successfully uploaded '{final_filename}' to folder '{folder_name}'",
"entry_id": result['entry_id'],

View File

@ -48,7 +48,7 @@ import {
DropdownMenuItem,
DropdownMenuTrigger,
} from '@/components/ui/dropdown-menu';
import { SharedTreeItem } from '@/components/knowledge-base/shared-kb-tree';
import { SharedTreeItem, FileDragOverlay } from '@/components/knowledge-base/shared-kb-tree';
// Get backend URL from environment variables
const API_URL = process.env.NEXT_PUBLIC_BACKEND_URL || '';
@ -830,9 +830,11 @@ export function KnowledgeBasePage() {
const fileInfo = getFileTypeInfo(file.filename);
return (
<div key={file.entry_id} className="group cursor-pointer">
<div className="bg-muted/20 rounded-xl p-4 mb-2 transition-colors group-hover:bg-muted/30">
<div className={`w-12 h-14 mx-auto rounded-lg border-2 flex items-center justify-center ${fileInfo.colorClass}`}>
<span className="text-xs font-bold">{fileInfo.extension}</span>
<div className="bg-muted/70 rounded-xl h-22 mb-2 transition-colors flex items-center justify-center">
<div className='relative flex flex-col items-center justify-center'>
<FileIcon className="h-12 w-12 opacity-55" />
<span className="text-[10px] font-bold absolute top-7 left-4">{fileInfo.extension.slice(0, 4)}</span>
</div>
</div>
<div className="text-center">
@ -904,16 +906,36 @@ export function KnowledgeBasePage() {
</SortableContext>
<DragOverlay>
{activeId ? (
<div className="bg-background border rounded-lg p-3">
<div className="flex items-center gap-2">
<FolderIcon className="h-4 w-4 text-blue-500" />
<span className="font-medium text-sm">
{treeData.find(item => item.id === activeId)?.name}
</span>
</div>
</div>
) : null}
{activeId ? (() => {
// Find the active item in the tree data
const findActiveItem = (items: any[]): any => {
for (const item of items) {
if (item.id === activeId) return item;
if (item.children) {
const found = findActiveItem(item.children);
if (found) return found;
}
}
return null;
};
const activeItem = findActiveItem(treeData);
if (activeItem?.type === 'file') {
return <FileDragOverlay item={activeItem} />;
} else {
return (
<div className="bg-background border rounded-lg p-3">
<div className="flex items-center gap-2">
<FolderIcon className="h-4 w-4 text-blue-500" />
<span className="font-medium text-sm">
{activeItem?.name}
</span>
</div>
</div>
);
}
})() : null}
</DragOverlay>
</DndContext>
</div>

View File

@ -26,6 +26,7 @@ import {
} from '@dnd-kit/sortable';
import {
useDroppable,
DragOverlay,
} from '@dnd-kit/core';
import { CSS } from '@dnd-kit/utilities';
@ -190,9 +191,9 @@ export function SharedTreeItem({
};
return (
<div ref={combinedRef} style={style} className="select-none">
<div ref={combinedRef} style={style} className="select-none my-1">
{item.type === 'folder' ? (
<div className="mb-1">
<div>
{/* Folder Row - Using div instead of button to avoid nesting */}
<div
className={`flex items-center w-full text-sm h-8 px-3 py-5 rounded-md hover:bg-accent hover:text-accent-foreground cursor-pointer ${(isOver && enableDnd) || isDragOverNative
@ -328,7 +329,7 @@ export function SharedTreeItem({
{/* Files (when expanded) */}
{item.expanded && item.children && (
<div className="space-y-0">
<div className="gap-1 flex flex-col">
{item.children.map((file) => (
<SharedTreeItem
key={file.id}
@ -360,7 +361,7 @@ export function SharedTreeItem({
/* File Row - Using div instead of button to avoid nesting */
<div
ref={setNodeRef}
className={`group flex items-center w-full text-sm h-8 px-3 py-5 rounded-md hover:bg-accent hover:text-accent-foreground mb-1 ${isDragging ? 'opacity-50' : ''
className={`group flex items-center w-full text-sm h-8 px-3 py-5 rounded-md hover:bg-accent hover:text-accent-foreground ${isDragging ? 'opacity-50' : ''
}`}
style={{
paddingLeft: `${level * 16 + 20}px`,
@ -433,4 +434,32 @@ export function SharedTreeItem({
)}
</div>
);
}
// Custom drag overlay component that matches the file row styling
export function FileDragOverlay({ item }: { item: TreeItem }) {
const formatFileSize = (bytes: number) => {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
};
return (
<div className="flex items-center w-full text-sm h-8 px-3 py-5 rounded-md bg-accent text-accent-foreground border shadow-lg">
{/* File Icon */}
<div className="w-8 h-8 mr-3 bg-background border border-border rounded-md flex items-center justify-center shrink-0">
<FileIcon className="h-4 w-4 text-foreground/60" />
</div>
{/* File Details */}
<div className="flex-1 text-left min-w-0">
<div className="font-medium truncate">{item.name}</div>
<div className="text-xs text-muted-foreground">
{formatFileSize(item.data?.file_size || 0)}
</div>
</div>
</div>
);
}