2025-02-13 16:49:24 +08:00
|
|
|
|
using System;
|
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
|
using System.Linq;
|
|
|
|
|
|
using System.Text;
|
|
|
|
|
|
using System.Threading.Tasks;
|
|
|
|
|
|
|
|
|
|
|
|
namespace ExcelHelper.Utils;
|
|
|
|
|
|
public class Excel2Prompt
|
|
|
|
|
|
{
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// 将excel数据转化为Ai可读的Prompt格式
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="excelData"></param>
|
|
|
|
|
|
/// <param name="columns"></param>
|
2025-02-15 10:41:05 +08:00
|
|
|
|
/// <param name="startCell">A</param>
|
2025-02-13 16:49:24 +08:00
|
|
|
|
/// <returns></returns>
|
2025-02-15 10:41:05 +08:00
|
|
|
|
public static string ConverterToPrompt(IEnumerable<dynamic> excelData, IEnumerable<string> columns, string startCell)
|
2025-02-13 16:49:24 +08:00
|
|
|
|
{
|
|
|
|
|
|
/*
|
|
|
|
|
|
示例:
|
|
|
|
|
|
- Column '系统单号' (Excel Column A)
|
|
|
|
|
|
- Type: object
|
|
|
|
|
|
- Unique values: 30462
|
|
|
|
|
|
- Sample values: ['103526033584143963', '103526050570323185', '103525904231101189']
|
|
|
|
|
|
- Missing values: 8278
|
|
|
|
|
|
|
|
|
|
|
|
- Column '参考号' (Excel Column B)
|
|
|
|
|
|
- Type: object
|
|
|
|
|
|
- Unique values: 11911
|
|
|
|
|
|
- Sample values: ['200012590592083', '200012844542760', '200012745140484']
|
|
|
|
|
|
- Missing values: 26899
|
|
|
|
|
|
|
|
|
|
|
|
- Column '状态' (Excel Column C)
|
|
|
|
|
|
- Type: object
|
|
|
|
|
|
- Unique values: 8
|
|
|
|
|
|
- Sample values: ['待审核发货', '待审核发货', '待审核发货']
|
|
|
|
|
|
- Missing values: 7007
|
|
|
|
|
|
*/
|
|
|
|
|
|
var prompts = new List<string>();
|
2025-02-15 10:41:05 +08:00
|
|
|
|
prompts.Add("This Excel file contains the following columns:");
|
2025-02-13 16:49:24 +08:00
|
|
|
|
var dataList = excelData.ToList();
|
|
|
|
|
|
|
2025-02-15 10:41:05 +08:00
|
|
|
|
// 解析 startCell,获取起始列的索引
|
|
|
|
|
|
int startColumnIndex = GetColumnIndexFromCell(startCell);
|
|
|
|
|
|
|
|
|
|
|
|
int columnIndex = startColumnIndex;
|
2025-02-13 16:49:24 +08:00
|
|
|
|
foreach (var column in columns)
|
|
|
|
|
|
{
|
2025-02-15 10:41:05 +08:00
|
|
|
|
columnIndex++;
|
|
|
|
|
|
string columnLetter = GetExcelColumnName(columnIndex);
|
|
|
|
|
|
|
2025-02-13 16:49:24 +08:00
|
|
|
|
var columnData = dataList.Select(row =>
|
|
|
|
|
|
{
|
|
|
|
|
|
var rowDict = (IDictionary<string, object>)row;
|
|
|
|
|
|
return rowDict.ContainsKey(column) ? rowDict[column] : null;
|
|
|
|
|
|
}).ToList();
|
|
|
|
|
|
|
|
|
|
|
|
var nonNullData = columnData.Where(value => value != null).ToList();
|
|
|
|
|
|
var missingValues = columnData.Count - nonNullData.Count;
|
|
|
|
|
|
var uniqueValues = nonNullData.Distinct().Count();
|
2025-02-15 10:41:05 +08:00
|
|
|
|
var sampleValues = nonNullData.Take(3).Select(value => value.ToString().Replace("\n", " ")).ToArray();
|
2025-02-13 16:49:24 +08:00
|
|
|
|
|
|
|
|
|
|
// 自动识别数据类型
|
|
|
|
|
|
var dataType = GetColumnDataType(nonNullData);
|
|
|
|
|
|
|
2025-02-27 14:05:21 +08:00
|
|
|
|
var prompt = $"- Column '{column.Replace("\n", " ")}' (Excel Column {columnLetter})" +
|
|
|
|
|
|
$" - Type: {dataType}" +
|
2025-02-15 10:41:05 +08:00
|
|
|
|
$" - Sample values: ['{string.Join("', '", sampleValues)}']";
|
2025-02-13 16:49:24 +08:00
|
|
|
|
prompts.Add(prompt);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return string.Join(Environment.NewLine + Environment.NewLine, prompts);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-02-15 10:41:05 +08:00
|
|
|
|
private static int GetColumnIndexFromCell(string cell)
|
|
|
|
|
|
{
|
|
|
|
|
|
// 提取列部分
|
|
|
|
|
|
string columnPart = new string(cell.Where(char.IsLetter).ToArray());
|
|
|
|
|
|
int columnIndex = 0;
|
|
|
|
|
|
int factor = 1;
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = columnPart.Length - 1; i >= 0; i--)
|
|
|
|
|
|
{
|
|
|
|
|
|
columnIndex += (columnPart[i] - 'A' + 1) * factor;
|
|
|
|
|
|
factor *= 26;
|
|
|
|
|
|
}
|
|
|
|
|
|
// 最终结果需要包含StartCell
|
|
|
|
|
|
return columnIndex - 1;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private static string GetExcelColumnName(int columnNumber)
|
|
|
|
|
|
{
|
|
|
|
|
|
int dividend = columnNumber;
|
|
|
|
|
|
string columnName = string.Empty;
|
|
|
|
|
|
|
|
|
|
|
|
while (dividend > 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
int modulo = (dividend - 1) % 26;
|
|
|
|
|
|
columnName = Convert.ToChar(65 + modulo) + columnName;
|
|
|
|
|
|
dividend = (dividend - modulo) / 26;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return columnName;
|
|
|
|
|
|
}
|
2025-02-13 16:49:24 +08:00
|
|
|
|
|
|
|
|
|
|
private static string GetColumnDataType(List<object> data)
|
|
|
|
|
|
{
|
2025-02-27 14:04:28 +08:00
|
|
|
|
var isInt = true;
|
|
|
|
|
|
var isLong = true;
|
|
|
|
|
|
var isDouble = true;
|
|
|
|
|
|
var isDateTime = true;
|
|
|
|
|
|
var isBool = true;
|
2025-02-13 16:49:24 +08:00
|
|
|
|
|
|
|
|
|
|
foreach (var item in data)
|
|
|
|
|
|
{
|
|
|
|
|
|
var str = item.ToString();
|
|
|
|
|
|
|
2025-02-14 14:04:42 +08:00
|
|
|
|
// 检查数据类型
|
2025-02-13 16:49:24 +08:00
|
|
|
|
if (!int.TryParse(str, out _))
|
2025-02-14 14:04:42 +08:00
|
|
|
|
{
|
2025-02-13 16:49:24 +08:00
|
|
|
|
isInt = false;
|
2025-02-14 14:04:42 +08:00
|
|
|
|
}
|
2025-02-13 16:49:24 +08:00
|
|
|
|
if (!long.TryParse(str, out _))
|
2025-02-14 14:04:42 +08:00
|
|
|
|
{
|
2025-02-13 16:49:24 +08:00
|
|
|
|
isLong = false;
|
2025-02-14 14:04:42 +08:00
|
|
|
|
}
|
2025-02-13 16:49:24 +08:00
|
|
|
|
if (!double.TryParse(str, out _))
|
2025-02-14 14:04:42 +08:00
|
|
|
|
{
|
2025-02-13 16:49:24 +08:00
|
|
|
|
isDouble = false;
|
2025-02-14 14:04:42 +08:00
|
|
|
|
}
|
2025-02-13 16:49:24 +08:00
|
|
|
|
if (!DateTime.TryParse(str, out _))
|
2025-02-14 14:04:42 +08:00
|
|
|
|
{
|
2025-02-13 16:49:24 +08:00
|
|
|
|
isDateTime = false;
|
2025-02-14 14:04:42 +08:00
|
|
|
|
}
|
2025-02-13 16:49:24 +08:00
|
|
|
|
if (!bool.TryParse(str, out _))
|
2025-02-14 14:04:42 +08:00
|
|
|
|
{
|
2025-02-13 16:49:24 +08:00
|
|
|
|
isBool = false;
|
2025-02-14 14:04:42 +08:00
|
|
|
|
}
|
2025-02-13 16:49:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
2025-02-14 14:04:42 +08:00
|
|
|
|
return true switch
|
|
|
|
|
|
{
|
|
|
|
|
|
bool _ when isInt => "int",
|
|
|
|
|
|
bool _ when isLong => "int64",
|
|
|
|
|
|
bool _ when isDouble => "double",
|
|
|
|
|
|
bool _ when isDateTime => "datetime",
|
|
|
|
|
|
bool _ when isBool => "bool",
|
|
|
|
|
|
_ => "string",
|
|
|
|
|
|
};
|
2025-02-13 16:49:24 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
}
|