ExcelHelper/Utils/Excel2Prompt.cs

153 lines
4.8 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ExcelHelper.Utils;
public class Excel2Prompt
{
/// <summary>
/// 将excel数据转化为Ai可读的Prompt格式
/// </summary>
/// <param name="excelData"></param>
/// <param name="columns"></param>
/// <param name="startCell">A</param>
/// <returns></returns>
public static string ConverterToPrompt(IEnumerable<dynamic> excelData, IEnumerable<string> columns, string startCell)
{
/*
示例:
- Column '系统单号' (Excel Column A)
- Type: object
- Unique values: 30462
- Sample values: ['103526033584143963', '103526050570323185', '103525904231101189']
- Missing values: 8278
- Column '参考号' (Excel Column B)
- Type: object
- Unique values: 11911
- Sample values: ['200012590592083', '200012844542760', '200012745140484']
- Missing values: 26899
- Column '状态' (Excel Column C)
- Type: object
- Unique values: 8
- Sample values: ['待审核发货', '待审核发货', '待审核发货']
- Missing values: 7007
*/
var prompts = new List<string>();
prompts.Add("This Excel file contains the following columns:");
var dataList = excelData.ToList();
// 解析 startCell获取起始列的索引
int startColumnIndex = GetColumnIndexFromCell(startCell);
int columnIndex = startColumnIndex;
foreach (var column in columns)
{
columnIndex++;
string columnLetter = GetExcelColumnName(columnIndex);
var columnData = dataList.Select(row =>
{
var rowDict = (IDictionary<string, object>)row;
return rowDict.ContainsKey(column) ? rowDict[column] : null;
}).ToList();
var nonNullData = columnData.Where(value => value != null).ToList();
var missingValues = columnData.Count - nonNullData.Count;
var uniqueValues = nonNullData.Distinct().Count();
var sampleValues = nonNullData.Take(3).Select(value => value.ToString().Replace("\n", " ")).ToArray();
// 自动识别数据类型
var dataType = GetColumnDataType(nonNullData);
var prompt = $"- Column '{column.Replace("\n", " ")}' (Excel Column {columnLetter})" +
$" - Type: {dataType}" +
$" - Sample values: ['{string.Join("', '", sampleValues)}']";
prompts.Add(prompt);
}
return string.Join(Environment.NewLine + Environment.NewLine, prompts);
}
private static int GetColumnIndexFromCell(string cell)
{
// 提取列部分
string columnPart = new string(cell.Where(char.IsLetter).ToArray());
int columnIndex = 0;
int factor = 1;
for (int i = columnPart.Length - 1; i >= 0; i--)
{
columnIndex += (columnPart[i] - 'A' + 1) * factor;
factor *= 26;
}
// 最终结果需要包含StartCell
return columnIndex - 1;
}
private static string GetExcelColumnName(int columnNumber)
{
int dividend = columnNumber;
string columnName = string.Empty;
while (dividend > 0)
{
int modulo = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modulo) + columnName;
dividend = (dividend - modulo) / 26;
}
return columnName;
}
private static string GetColumnDataType(List<object> data)
{
var isInt = true;
var isLong = true;
var isDouble = true;
var isDateTime = true;
var isBool = true;
foreach (var item in data)
{
var str = item.ToString();
// 检查数据类型
if (!int.TryParse(str, out _))
{
isInt = false;
}
if (!long.TryParse(str, out _))
{
isLong = false;
}
if (!double.TryParse(str, out _))
{
isDouble = false;
}
if (!DateTime.TryParse(str, out _))
{
isDateTime = false;
}
if (!bool.TryParse(str, out _))
{
isBool = false;
}
}
return true switch
{
bool _ when isInt => "int",
bool _ when isLong => "int64",
bool _ when isDouble => "double",
bool _ when isDateTime => "datetime",
bool _ when isBool => "bool",
_ => "string",
};
}
}