From 4d1ccb16aa3179c180117b1a25cdd634109a1fd6 Mon Sep 17 00:00:00 2001 From: Huoji's <1296564236@qq.com> Date: Sun, 9 Mar 2025 03:29:14 +0800 Subject: [PATCH] Implement Rich Header parsing for PE file analysis - Added ParseRichHeader method to extract Rich header information from PE files - Defined RichEntry and RichHeaderInfo structures to store Rich header details - Implemented decoding of Rich header entries with checksum XOR technique - Updated ml.h and ml.cpp to support Rich header parsing - Improved error handling and logging in ProcessDirectory method - Translated some log messages to English for consistency --- ai_anti_malware/ml.cpp | 74 +++++++++++++++++++++++++++++++++++++++--- ai_anti_malware/ml.h | 11 ++++++- 2 files changed, 79 insertions(+), 6 deletions(-) diff --git a/ai_anti_malware/ml.cpp b/ai_anti_malware/ml.cpp index 861af55..16c8320 100644 --- a/ai_anti_malware/ml.cpp +++ b/ai_anti_malware/ml.cpp @@ -179,6 +179,70 @@ MachineLearning::~MachineLearning() { // 析构函数,清理资源(如有必要) } +bool MachineLearning::ParseRichHeader(const uint8_t* peBuffer, + RichHeaderInfo& richInfo) { + PIMAGE_DOS_HEADER dosHeader = (PIMAGE_DOS_HEADER)(peBuffer); + + // 检查DOS头部有效性 + if (!dosHeader || dosHeader->e_magic != IMAGE_DOS_SIGNATURE) { + return false; + } + + // 搜索范围是DOS头后到PE头前 + const uint32_t* scanPtr = + reinterpret_cast(peBuffer + sizeof(IMAGE_DOS_HEADER)); + size_t maxItems = + (dosHeader->e_lfanew - sizeof(IMAGE_DOS_HEADER)) / sizeof(uint32_t); + + // 查找DanS标记 + size_t dansIndex = 0; + for (; dansIndex < maxItems - 1; dansIndex++) { + if (scanPtr[dansIndex] == 0x536E6144) { // "DanS" + break; + } + } + + if (dansIndex >= maxItems - 1) { + return false; // 没找到DanS + } + + // 获取校验和 + uint32_t checksum = scanPtr[dansIndex + 1]; + richInfo.checksum = checksum; + + // 找Rich标记 + size_t richIndex = 0; + for (richIndex = dansIndex + 2; richIndex < maxItems; richIndex++) { + if ((scanPtr[richIndex] ^ checksum) == + 0x68636952) { // "Rich" ^ checksum + break; + } + } + + if (richIndex >= maxItems) { + return false; // 没找到Rich + } + + // 解析Rich条目 + // DanS之前的数据是Rich条目,每个条目占用2个DWORD + size_t entryCount = (richIndex - dansIndex - 2) / 2; + richInfo.entries.reserve(entryCount); + + for (size_t i = 0; i < entryCount; i++) { + size_t entryPos = richIndex - 2 * (i + 1); + uint32_t dword1 = scanPtr[entryPos] ^ checksum; + uint32_t dword2 = scanPtr[entryPos + 1] ^ checksum; + + RichEntry entry; + entry.productId = dword1 & 0xFFFF; // 低16位是ProductId + entry.buildId = (dword1 >> 16) & 0xFFFF; // 高16位是BuildId + entry.useCount = dword2; // 使用次数 + + richInfo.entries.push_back(entry); + } + + return true; +} std::vector MachineLearning::ExtractFeatures(const uint8_t* buffer, size_t bufferSize) { // 使用libpeconv解析PE文件 @@ -672,7 +736,7 @@ bool MachineLearning::ProcessDirectory(const std::string& directoryPath, // 处理文件 std::vector fileBuffer = ReadFileToBuffer(currentPath); if (fileBuffer.empty()) { - std::cerr << "跳过文件: " << currentPath << " (读取失败)" + std::cerr << "skip file: " << currentPath << " (read failed)" << std::endl; failedCount++; continue; @@ -682,8 +746,8 @@ bool MachineLearning::ProcessDirectory(const std::string& directoryPath, std::vector features = ExtractFeatures(fileBuffer.data(), fileBuffer.size()); if (features.empty()) { - std::cerr << "跳过文件: " << currentPath << " (特征提取失败)" - << std::endl; + std::cerr << "skip file: " << currentPath + << " (can't get feature)" << std::endl; failedCount++; continue; } @@ -697,8 +761,8 @@ bool MachineLearning::ProcessDirectory(const std::string& directoryPath, processedCount++; if (processedCount % 100 == 0) { - std::cout << "已处理 " << processedCount << " 个文件..." - << std::endl; + std::cout << "a ready processed " << processedCount + << " files..." << std::endl; } } } while (FindNextFileA(hFind, &findData)); diff --git a/ai_anti_malware/ml.h b/ai_anti_malware/ml.h index cd6646d..631bce6 100644 --- a/ai_anti_malware/ml.h +++ b/ai_anti_malware/ml.h @@ -15,7 +15,16 @@ struct PeInfo; struct SectionInfo; class BasicPeInfo; +struct RichEntry { + uint16_t productId; // 组件ID + uint16_t buildId; // 版本号 + uint32_t useCount; // 使用次数 +}; +struct RichHeaderInfo { + uint32_t checksum; // 校验和 + std::vector entries; // Rich头条目 +}; // RVA转换为内存中的指针的辅助函数 inline BYTE* RvaToPtr(DWORD rva, BYTE* peBuffer) { if (!peBuffer || rva == 0) return nullptr; @@ -61,7 +70,7 @@ class MachineLearning { public: MachineLearning(); ~MachineLearning(); - + bool ParseRichHeader(const uint8_t* peBuffer, RichHeaderInfo& richInfo); // 提取特征并返回特征向量 std::vector ExtractFeatures(const uint8_t* buffer, size_t bufferSize);