ExcelHelper/Utils/Excel2Prompt.cs

118 lines
3.7 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace ExcelHelper.Utils;
public class Excel2Prompt
{
/// <summary>
/// 将excel数据转化为Ai可读的Prompt格式
/// </summary>
/// <param name="excelData"></param>
/// <param name="columns"></param>
/// <returns></returns>
public static string ConverterToPrompt(IEnumerable<dynamic> excelData, IEnumerable<string> columns)
{
/*
示例:
- Column '系统单号' (Excel Column A)
- Type: object
- Unique values: 30462
- Sample values: ['103526033584143963', '103526050570323185', '103525904231101189']
- Missing values: 8278
- Column '参考号' (Excel Column B)
- Type: object
- Unique values: 11911
- Sample values: ['200012590592083', '200012844542760', '200012745140484']
- Missing values: 26899
- Column '状态' (Excel Column C)
- Type: object
- Unique values: 8
- Sample values: ['待审核发货', '待审核发货', '待审核发货']
- Missing values: 7007
*/
var prompts = new List<string>();
prompts.Add("This Excel file contains the following columns:" );
var dataList = excelData.ToList();
foreach (var column in columns)
{
var columnData = dataList.Select(row =>
{
var rowDict = (IDictionary<string, object>)row;
return rowDict.ContainsKey(column) ? rowDict[column] : null;
}).ToList();
var nonNullData = columnData.Where(value => value != null).ToList();
var missingValues = columnData.Count - nonNullData.Count;
var uniqueValues = nonNullData.Distinct().Count();
var sampleValues = nonNullData.Take(3).Select(value => value.ToString().Replace("\n"," ")).ToArray();
// 自动识别数据类型
var dataType = GetColumnDataType(nonNullData);
var prompt = $"- Column '{column.Replace("\n", " ")}'\n" +
$" - Type: {dataType}\n" +
$" - Sample values: ['{string.Join("', '", sampleValues)}']";
//$" - Unique values: {uniqueValues}\n" +
//$" - Missing values: {missingValues}\n";
prompts.Add(prompt);
}
return string.Join(Environment.NewLine + Environment.NewLine, prompts);
}
private static string GetColumnDataType(List<object> data)
{
bool isInt = true;
bool isLong = true;
bool isDouble = true;
bool isDateTime = true;
bool isBool = true;
foreach (var item in data)
{
var str = item.ToString();
// 检查数据类型
if (!int.TryParse(str, out _))
{
isInt = false;
}
if (!long.TryParse(str, out _))
{
isLong = false;
}
if (!double.TryParse(str, out _))
{
isDouble = false;
}
if (!DateTime.TryParse(str, out _))
{
isDateTime = false;
}
if (!bool.TryParse(str, out _))
{
isBool = false;
}
}
return true switch
{
bool _ when isInt => "int",
bool _ when isLong => "int64",
bool _ when isDouble => "double",
bool _ when isDateTime => "datetime",
bool _ when isBool => "bool",
_ => "string",
};
}
}