using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; namespace ExcelHelper.Utils; public class Excel2Prompt { /// /// 将excel数据转化为Ai可读的Prompt格式 /// /// /// /// A /// public static string ConverterToPrompt(IEnumerable excelData, IEnumerable columns, string startCell) { /* 示例: - Column '系统单号' (Excel Column A) - Type: object - Unique values: 30462 - Sample values: ['103526033584143963', '103526050570323185', '103525904231101189'] - Missing values: 8278 - Column '参考号' (Excel Column B) - Type: object - Unique values: 11911 - Sample values: ['200012590592083', '200012844542760', '200012745140484'] - Missing values: 26899 - Column '状态' (Excel Column C) - Type: object - Unique values: 8 - Sample values: ['待审核发货', '待审核发货', '待审核发货'] - Missing values: 7007 */ var prompts = new List(); prompts.Add("This Excel file contains the following columns:"); var dataList = excelData.ToList(); // 解析 startCell,获取起始列的索引 int startColumnIndex = GetColumnIndexFromCell(startCell); int columnIndex = startColumnIndex; foreach (var column in columns) { columnIndex++; string columnLetter = GetExcelColumnName(columnIndex); var columnData = dataList.Select(row => { var rowDict = (IDictionary)row; return rowDict.ContainsKey(column) ? rowDict[column] : null; }).ToList(); var nonNullData = columnData.Where(value => value != null).ToList(); var missingValues = columnData.Count - nonNullData.Count; var uniqueValues = nonNullData.Distinct().Count(); var sampleValues = nonNullData.Take(3).Select(value => value.ToString().Replace("\n", " ")).ToArray(); // 自动识别数据类型 var dataType = GetColumnDataType(nonNullData); var prompt = $"- Column '{column.Replace("\n", " ")}' (Excel Column {columnLetter})" + $" - Type: {dataType}" + $" - Sample values: ['{string.Join("', '", sampleValues)}']"; prompts.Add(prompt); } return string.Join(Environment.NewLine + Environment.NewLine, prompts); } private static int GetColumnIndexFromCell(string cell) { // 提取列部分 string columnPart = new string(cell.Where(char.IsLetter).ToArray()); int columnIndex = 0; int factor = 1; for (int i = columnPart.Length - 1; i >= 0; i--) { columnIndex += (columnPart[i] - 'A' + 1) * factor; factor *= 26; } // 最终结果需要包含StartCell return columnIndex - 1; } private static string GetExcelColumnName(int columnNumber) { int dividend = columnNumber; string columnName = string.Empty; while (dividend > 0) { int modulo = (dividend - 1) % 26; columnName = Convert.ToChar(65 + modulo) + columnName; dividend = (dividend - modulo) / 26; } return columnName; } private static string GetColumnDataType(List data) { var isInt = true; var isLong = true; var isDouble = true; var isDateTime = true; var isBool = true; foreach (var item in data) { var str = item.ToString(); // 检查数据类型 if (!int.TryParse(str, out _)) { isInt = false; } if (!long.TryParse(str, out _)) { isLong = false; } if (!double.TryParse(str, out _)) { isDouble = false; } if (!DateTime.TryParse(str, out _)) { isDateTime = false; } if (!bool.TryParse(str, out _)) { isBool = false; } } return true switch { bool _ when isInt => "int", bool _ when isLong => "int64", bool _ when isDouble => "double", bool _ when isDateTime => "datetime", bool _ when isBool => "bool", _ => "string", }; } }