From c2f0e58fc13ff8849fbdd462b93cee27250a2dd5 Mon Sep 17 00:00:00 2001 From: sharath <29162020+tnfssc@users.noreply.github.com> Date: Mon, 2 Jun 2025 19:23:49 +0000 Subject: [PATCH 1/2] fix(file-process): improve llm instructions to read full file when the file is small --- backend/agent/prompt.py | 15 ++++++++++++++- backend/agentpress/response_processor.py | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/backend/agent/prompt.py b/backend/agent/prompt.py index 762a9d11..cbf8d0b5 100644 --- a/backend/agent/prompt.py +++ b/backend/agent/prompt.py @@ -232,8 +232,18 @@ You have the ability to execute operations using both Python and CLI tools: 4. xls2csv: Convert Excel to CSV ### 4.1.2 TEXT & DATA PROCESSING -- Text Processing: +IMPORTANT: Use the `cat` command to view contents of small files (less than 100 kb) whenever possible. Only use other commands and processing when absolutely necessary. +- Distinguish between small and large text files + 1. ls -lh: Get file size + - Use `ls -lh ` to get file size +- Small text files (less than 100 kb) + 1. cat: View contents of small files + - Use `cat ` to view contents of small files +- Large text files processing (more than 100 kb): + Don't use `cat` to view contents of large files. + Use the following commands instead. You may also use Python once you determine how to process the file. 1. grep: Pattern matching + - Use -n to get line numbers - Use -i for case-insensitive - Use -r for recursive search - Use -A, -B, -C for context @@ -243,6 +253,7 @@ You have the ability to execute operations using both Python and CLI tools: 3. sed: Stream editing - Use for text replacement - Use for pattern matching + - Use `sed -n 'start,endp'` to get a specific range of lines. You may extract upto 1000 lines at a time. - File Analysis: 1. file: Determine file type 2. wc: Count words/lines @@ -380,6 +391,8 @@ You have the ability to execute operations using both Python and CLI tools: - Lengthy documentation or guides - Detailed content across multiple sources 3. Never use scrape-webpage when: + - You can get the same information from a data provider + - You can download the file and directly use it like a csv, json, txt or pdf - Web-search already answers the query - Only basic facts or information are needed - Only a high-level overview is needed diff --git a/backend/agentpress/response_processor.py b/backend/agentpress/response_processor.py index 8b7af0d7..2213fd54 100644 --- a/backend/agentpress/response_processor.py +++ b/backend/agentpress/response_processor.py @@ -1619,7 +1619,7 @@ class ResponseProcessor: # return summary summary_output = result.output if hasattr(result, 'output') else str(result) - success_status = structured_result["tool_execution"]["result"]["success"] + # success_status = structured_result["tool_execution"]["result"]["success"] # Create a more comprehensive summary for the LLM if xml_tag_name: From 9547774dfc6152d7158bc53d20f8ae5fc3c283ac Mon Sep 17 00:00:00 2001 From: sharath <29162020+tnfssc@users.noreply.github.com> Date: Sun, 8 Jun 2025 22:41:42 +0000 Subject: [PATCH 2/2] refactor(prompt): update guidelines for processing small and large text files --- backend/agent/prompt.py | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/backend/agent/prompt.py b/backend/agent/prompt.py index 28234cdd..9bd0dc22 100644 --- a/backend/agent/prompt.py +++ b/backend/agent/prompt.py @@ -232,33 +232,21 @@ You have the ability to execute operations using both Python and CLI tools: 4. xls2csv: Convert Excel to CSV ### 4.1.2 TEXT & DATA PROCESSING -IMPORTANT: Use the `cat` command to view contents of small files (less than 100 kb) whenever possible. Only use other commands and processing when absolutely necessary. -- Distinguish between small and large text files +IMPORTANT: Use the `cat` command to view contents of small files (100 kb or less). For files larger than 100 kb, do not use `cat` to read the entire file; instead, use commands like `head`, `tail`, or similar to preview or read only part of the file. Only use other commands and processing when absolutely necessary for data extraction or transformation. +- Distinguish between small and large text files: 1. ls -lh: Get file size - Use `ls -lh ` to get file size -- Small text files (less than 100 kb) +- Small text files (100 kb or less): 1. cat: View contents of small files - - Use `cat ` to view contents of small files -- Large text files processing (more than 100 kb): - Don't use `cat` to view contents of large files. - Use the following commands instead. You may also use Python once you determine how to process the file. - 1. grep: Pattern matching - - Use -n to get line numbers - - Use -i for case-insensitive - - Use -r for recursive search - - Use -A, -B, -C for context - 2. awk: Column processing - - Use for structured data - - Use for data transformation - 3. sed: Stream editing - - Use for text replacement - - Use for pattern matching - - Use `sed -n 'start,endp'` to get a specific range of lines. You may extract upto 1000 lines at a time. + - Use `cat ` to view the entire file +- Large text files (over 100 kb): + 1. head/tail: View file parts + - Use `head ` or `tail ` to preview content + 2. less: View large files interactively + 3. grep, awk, sed: For searching, extracting, or transforming data in large files - File Analysis: 1. file: Determine file type 2. wc: Count words/lines - 3. head/tail: View file parts - 4. less: View large files - Data Processing: 1. jq: JSON processing - Use for JSON extraction @@ -279,7 +267,7 @@ IMPORTANT: Use the `cat` command to view contents of small files (less than 100 - Use -l to list matching files - Use -n to show line numbers - Use -A, -B, -C for context lines - 2. head/tail: View file beginnings/endings + 2. head/tail: View file beginnings/endings (for large files) - Use -n to specify number of lines - Use -f to follow file changes 3. awk: Pattern scanning and processing @@ -300,7 +288,7 @@ IMPORTANT: Use the `cat` command to view contents of small files (less than 100 5. Use extended regex (-E) for complex patterns - Data Processing Workflow: 1. Use grep to locate relevant files - 2. Use head/tail to preview content + 2. Use cat for small files (<=100kb) or head/tail for large files (>100kb) to preview content 3. Use awk for data extraction 4. Use wc to verify results 5. Chain commands with pipes for efficiency