using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ExcelHelper.Utils;
public class Excel2Prompt
{
///
/// 将excel数据转化为Ai可读的Prompt格式
///
///
///
/// A
///
public static string ConverterToPrompt(IEnumerable excelData, IEnumerable columns, string startCell)
{
/*
示例:
- Column '系统单号' (Excel Column A)
- Type: object
- Unique values: 30462
- Sample values: ['103526033584143963', '103526050570323185', '103525904231101189']
- Missing values: 8278
- Column '参考号' (Excel Column B)
- Type: object
- Unique values: 11911
- Sample values: ['200012590592083', '200012844542760', '200012745140484']
- Missing values: 26899
- Column '状态' (Excel Column C)
- Type: object
- Unique values: 8
- Sample values: ['待审核发货', '待审核发货', '待审核发货']
- Missing values: 7007
*/
var prompts = new List();
prompts.Add("This Excel file contains the following columns:");
var dataList = excelData.ToList();
// 解析 startCell,获取起始列的索引
int startColumnIndex = GetColumnIndexFromCell(startCell);
int columnIndex = startColumnIndex;
foreach (var column in columns)
{
columnIndex++;
string columnLetter = GetExcelColumnName(columnIndex);
var columnData = dataList.Select(row =>
{
var rowDict = (IDictionary)row;
return rowDict.ContainsKey(column) ? rowDict[column] : null;
}).ToList();
var nonNullData = columnData.Where(value => value != null).ToList();
var missingValues = columnData.Count - nonNullData.Count;
var uniqueValues = nonNullData.Distinct().Count();
var sampleValues = nonNullData.Take(3).Select(value => value.ToString().Replace("\n", " ")).ToArray();
// 自动识别数据类型
var dataType = GetColumnDataType(nonNullData);
var prompt = $"- Column '{column.Replace("\n", " ")}' (Excel Column {columnLetter})" +
$" - Type: {dataType}" +
$" - Sample values: ['{string.Join("', '", sampleValues)}']";
prompts.Add(prompt);
}
return string.Join(Environment.NewLine + Environment.NewLine, prompts);
}
private static int GetColumnIndexFromCell(string cell)
{
// 提取列部分
string columnPart = new string(cell.Where(char.IsLetter).ToArray());
int columnIndex = 0;
int factor = 1;
for (int i = columnPart.Length - 1; i >= 0; i--)
{
columnIndex += (columnPart[i] - 'A' + 1) * factor;
factor *= 26;
}
// 最终结果需要包含StartCell
return columnIndex - 1;
}
private static string GetExcelColumnName(int columnNumber)
{
int dividend = columnNumber;
string columnName = string.Empty;
while (dividend > 0)
{
int modulo = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modulo) + columnName;
dividend = (dividend - modulo) / 26;
}
return columnName;
}
private static string GetColumnDataType(List