diff --git a/.vscode/settings.json b/.vscode/settings.json index bb019c5..c144282 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -56,6 +56,8 @@ "xtr1common": "cpp", "xtree": "cpp", "xutility": "cpp", - "functional": "cpp" + "functional": "cpp", + "array": "cpp", + "numeric": "cpp" } } \ No newline at end of file diff --git a/ai_anti_malware/ai_anti_malware.cpp b/ai_anti_malware/ai_anti_malware.cpp index ab3089c..85bf1f1 100644 --- a/ai_anti_malware/ai_anti_malware.cpp +++ b/ai_anti_malware/ai_anti_malware.cpp @@ -46,7 +46,9 @@ int main() { if (peBuffer) { printf("peBuffer: %p\n", peBuffer.get()); printf("peSize: %d\n", peSize); - peconv::dump_to_file("z:\\dumped_main.exe", peBuffer.get(), peSize); + // peconv::dump_to_file("z:\\dumped_main.exe", peBuffer.get(), peSize); + MachineLearning ml; + ml.ExtractFeatures(peBuffer.get(), peSize, "z:\\features.txt"); } peBuffer.release(); system("pause"); diff --git a/ai_anti_malware/ai_anti_malware.vcxproj b/ai_anti_malware/ai_anti_malware.vcxproj index 78124b2..4d6a08a 100644 --- a/ai_anti_malware/ai_anti_malware.vcxproj +++ b/ai_anti_malware/ai_anti_malware.vcxproj @@ -170,6 +170,7 @@ + @@ -178,6 +179,7 @@ + diff --git a/ai_anti_malware/ai_anti_malware.vcxproj.filters b/ai_anti_malware/ai_anti_malware.vcxproj.filters index 0deb7f8..c5f746d 100644 --- a/ai_anti_malware/ai_anti_malware.vcxproj.filters +++ b/ai_anti_malware/ai_anti_malware.vcxproj.filters @@ -22,6 +22,12 @@ {38ea362d-55dc-410e-92f1-3a44ced4dc2d} + + {2b38b24a-cb8f-41db-bd53-4a25f8152c17} + + + {65a79261-ea29-4842-b41c-7983eddbdc85} + @@ -117,6 +123,9 @@ 源文件\sandbox + + 源文件\machine_learning + @@ -137,5 +146,8 @@ 头文件\sandbox + + 头文件\machine_learning + \ No newline at end of file diff --git a/ai_anti_malware/head.h b/ai_anti_malware/head.h index adc6ef8..dda12c1 100644 --- a/ai_anti_malware/head.h +++ b/ai_anti_malware/head.h @@ -29,3 +29,4 @@ struct BasicPeInfo { PIMAGE_NT_HEADERS32 ntHead32; }; #include "sandbox.h" +#include "ml.h" \ No newline at end of file diff --git a/ai_anti_malware/ml.cpp b/ai_anti_malware/ml.cpp new file mode 100644 index 0000000..3ac220b --- /dev/null +++ b/ai_anti_malware/ml.cpp @@ -0,0 +1,591 @@ +#include "ml.h" +#include +#include +#include +#include +#include +#include +#include +#include + +// 确保std命名空间中的函数可用 +using std::max; +using std::min; + +MachineLearning::MachineLearning() { + // 初始化属性列表 + _properties = {"has_configuration", "has_debug", "has_exceptions", + "has_exports", "has_imports", "has_nx", + "has_relocations", "has_resources", "has_signatures", + "has_tls", "has_entry_iat", "has_image_base", + "has_delay_imports", "has_rich"}; + + // 初始化库列表 + _libraries = {"libssp-0", + "kernel32", + "user32", + "advapi32", + "oleaut32", + "shell32", + "ole32", + "gdi32", + "comctl32", + "version", + "msvcrt", + "comdlg32", + "shlwapi", + "wininet", + "ws2_32", + "winmm", + "winspool.drv", + "wsock32", + "msvbvm60", + "rpcrt4", + "mpr", + "psapi", + "iphlpapi", + "ntdll", + "msimg32", + "mscoree", + "crypt32", + "gdiplus", + "userenv", + "crtdll", + "oledlg", + "mfc42", + "urlmon", + "imm32", + "rtl100.bpl", + "netapi32", + "wintrust", + "vcl100.bpl", + "vcl50.bpl", + "uxtheme", + "setupapi", + "ntoskrnl.pe", + "msi", + "msvcp60", + "lz32", + "winhttp", + "hal", + "core.bpl", + "rbrcl1416.bpl", + "dbghelp", + "api-ms-win-crt-runtime-l1-1-0", + "api-ms-win-crt-heap-l1-1-0", + "api-ms-win-crt-math-l1-1-0", + "api-ms-win-crt-stdio-l1-1-0", + "api-ms-win-crt-locale-l1-1-0", + "oleacc", + "komponentyd17.bpl", + "job.bpl", + "cam.bpl", + "vcruntime140", + "secur32", + "msvcr100", + "cxeditorsrs17.bpl", + "rasapi32", + "api-ms-win-crt-string-l1-1-0", + "wtsapi32", + "imagehlp", + "msvcp140", + "cnc.bpl", + "indyprotocols190.bpl", + "api-ms-win-crt-convert-l1-1-0", + "msvcr120", + "vcl60.bpl", + "rbrcl210.bpl", + "rtl170.bpl", + "rbide1416.bpl", + "rtl60.bpl", + "vcl170.bpl", + "wldap32", + "shfolder", + "cxlibraryrs17.bpl", + "msvcirt", + "report.bpl", + "rtl190.bpl", + "msvcr90", + "api-ms-win-crt-filesystem-l1-1-0", + "cxeditorsrs16.bpl", + "avifil32", + "api-ms-win-crt-time-l1-1-0", + "jli", + "graphic.bpl", + "olepro32", + "rtl160.bpl", + "spmmachine.bpl", + "cabinet", + "indycore190.bpl", + "sacom210.bpl", + "rbrtl1416.bpl", + "api-ms-win-crt-utility-l1-1-0", + "vcl160.bpl", + "api-ms-win-crt-environment-l1-1-0", + "zcomponent170.bpl", + "msvfw32", + "libadm_coreutils6", + "rbsha", + "dxpscorers16.bpl", + "msacm32", + "vcl70.bpl", + "applicationmanagement.bpl", + "jobgui.bpl", + "indyprotocols170.bpl", + "rtl70.bpl", + "cxed210.bpl", + "msvcr80", + "libadm_coretinypy6", + "ucrtbased", + "vcruntime140d", + "msvcp120", + "msvcp140d", + "dinput8", + "gui.bpl", + "maincontrols.bpl", + "rtl120.bpl", + "jcl170.bpl", + "frx17.bpl", + "fs17.bpl", + "vcl190.bpl", + "sdl2", + "machine.bpl", + "mfc42u", + "normaliz", + "sdl2_gfx", + "sdl2_ttf", + "sdl2_mixer", + "msvcp80", + "cxgridrs17.bpl", + "cxeditorsvcld7.bpl", + "libeay32", + "cxlibraryd11.bpl", + "vcl120.bpl", + "gr32_d6.bpl", + "cxlibraryrs16.bpl", + "cxgridrs16.bpl", + "vcl40.bpl", + "opengl32", + "qt5core", + "qtcore4", + "wdfldr.sys", + "nesting.bpl", + "fltmgr.sys"}; +} + +MachineLearning::~MachineLearning() { + // 析构函数,清理资源(如有必要) +} + +bool MachineLearning::ExtractFeatures(const uint8_t* buffer, size_t bufferSize, + const std::string& outputPath) { + // 使用libpeconv解析PE文件 + size_t v_size = 0; + BYTE* peBuffer = peconv::load_pe_module(const_cast(buffer), + bufferSize, v_size, false, false); + if (!peBuffer) { + std::cerr << "无法加载PE文件" << std::endl; + return false; + } + + // 解析PE信息 + PeInfo peInfo; + std::vector sections; + std::vector importedLibraries; + std::vector entrypointBytes; + + // 检查是否为64位PE + peInfo.isX64 = peconv::is64bit(peBuffer); + + // 获取PE头信息 + PIMAGE_NT_HEADERS ntHeaders = + (PIMAGE_NT_HEADERS)peconv::get_nt_hdrs(peBuffer); + if (!ntHeaders) { + peconv::free_pe_buffer(peBuffer); + return false; + } + + // 从NT头部获取信息 + if (peInfo.isX64) { + // 64位PE文件 + PIMAGE_NT_HEADERS64 ntHeaders64 = (PIMAGE_NT_HEADERS64)ntHeaders; + peInfo.addressOfEntryPoint = + ntHeaders64->OptionalHeader.AddressOfEntryPoint; + peInfo.baseOfCode = ntHeaders64->OptionalHeader.BaseOfCode; + peInfo.sizeOfCode = ntHeaders64->OptionalHeader.SizeOfCode; + peInfo.sizeOfImage = ntHeaders64->OptionalHeader.SizeOfImage; + peInfo.sizeOfHeaders = ntHeaders64->OptionalHeader.SizeOfHeaders; + peInfo.characteristics = ntHeaders64->FileHeader.Characteristics; + peInfo.dllCharacteristics = + ntHeaders64->OptionalHeader.DllCharacteristics; + } else { + // 32位PE文件 + PIMAGE_NT_HEADERS32 ntHeaders32 = (PIMAGE_NT_HEADERS32)ntHeaders; + peInfo.addressOfEntryPoint = + ntHeaders32->OptionalHeader.AddressOfEntryPoint; + peInfo.baseOfCode = ntHeaders32->OptionalHeader.BaseOfCode; + peInfo.sizeOfCode = ntHeaders32->OptionalHeader.SizeOfCode; + peInfo.sizeOfImage = ntHeaders32->OptionalHeader.SizeOfImage; + peInfo.sizeOfHeaders = ntHeaders32->OptionalHeader.SizeOfHeaders; + peInfo.characteristics = ntHeaders32->FileHeader.Characteristics; + peInfo.dllCharacteristics = + ntHeaders32->OptionalHeader.DllCharacteristics; + } + + // 检查PE目录 + IMAGE_DATA_DIRECTORY* dataDir = peconv::get_directory_entry( + peBuffer, IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR); + peInfo.hasConfiguration = dataDir && dataDir->VirtualAddress != 0; + + dataDir = + peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_DEBUG); + peInfo.hasDebug = dataDir && dataDir->VirtualAddress != 0; + + dataDir = + peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_EXCEPTION); + peInfo.hasExceptions = dataDir && dataDir->VirtualAddress != 0; + + dataDir = + peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_EXPORT); + peInfo.hasExports = dataDir && dataDir->VirtualAddress != 0; + + dataDir = + peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_IMPORT); + peInfo.hasImports = dataDir && dataDir->VirtualAddress != 0; + + // NX标志检查 + peInfo.hasNx = + (peInfo.dllCharacteristics & IMAGE_DLLCHARACTERISTICS_NX_COMPAT) != 0; + + dataDir = + peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_BASERELOC); + peInfo.hasRelocations = dataDir && dataDir->VirtualAddress != 0; + + dataDir = + peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_RESOURCE); + peInfo.hasResources = dataDir && dataDir->VirtualAddress != 0; + + dataDir = + peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_SECURITY); + peInfo.hasSignatures = dataDir && dataDir->VirtualAddress != 0; + + dataDir = peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_TLS); + peInfo.hasTls = dataDir && dataDir->VirtualAddress != 0; + + dataDir = peconv::get_directory_entry(peBuffer, + IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT); + peInfo.hasDelayImports = dataDir && dataDir->VirtualAddress != 0; + + peInfo.hasImageBase = true; // PE文件都有ImageBase + + dataDir = peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_IAT); + peInfo.hasEntryIat = dataDir && dataDir->VirtualAddress != 0; + + // Rich头部检测 + peInfo.hasRich = false; + PIMAGE_DOS_HEADER dosHeader = reinterpret_cast(peBuffer); + if (dosHeader && dosHeader->e_magic == IMAGE_DOS_SIGNATURE) { + const uint32_t* richPtr = reinterpret_cast( + peBuffer + sizeof(IMAGE_DOS_HEADER)); + size_t maxLen = dosHeader->e_lfanew - sizeof(IMAGE_DOS_HEADER); + for (size_t i = 0; i < maxLen / 4 - 1; i++) { + if (richPtr[i] == 0x68636952) { // "Rich" + peInfo.hasRich = true; + break; + } + } + } + + // 获取导入DLL列表 + if (peInfo.hasImports) { + size_t impRva = 0; + IMAGE_DATA_DIRECTORY* impDir = + peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_IMPORT); + if (impDir) { + impRva = impDir->VirtualAddress; + IMAGE_IMPORT_DESCRIPTOR* impDesc = + reinterpret_cast( + RvaToPtr(impRva, peBuffer)); + while (impDesc && impDesc->Name != 0) { + char* libName = + reinterpret_cast(RvaToPtr(impDesc->Name, peBuffer)); + if (libName) { + std::string libNameStr = libName; + std::transform(libNameStr.begin(), libNameStr.end(), + libNameStr.begin(), [](unsigned char c) { + return std::tolower(c); + }); + importedLibraries.push_back(libNameStr); + } + impDesc++; + } + } + } + + // 获取节区信息 + size_t sectionsCount = peconv::get_sections_count(peBuffer, bufferSize); + for (size_t i = 0; i < sectionsCount; i++) { + PIMAGE_SECTION_HEADER section = + peconv::get_section_hdr(peBuffer, bufferSize, i); + if (!section) continue; + + SectionInfo secInfo; + secInfo.characteristics = section->Characteristics; + secInfo.sizeOfRawData = section->SizeOfRawData; + secInfo.virtualSize = section->Misc.VirtualSize; + + // 计算节区熵 + BYTE* sectionData = RvaToPtr(section->VirtualAddress, peBuffer); + secInfo.entropy = + (sectionData && section->SizeOfRawData > 0) + ? CalculateEntropy(sectionData, section->SizeOfRawData) + : 0.0; + + sections.push_back(secInfo); + } + + // 获取入口点前255字节 + if (peInfo.addressOfEntryPoint > 0) { + BYTE* epPtr = RvaToPtr(peInfo.addressOfEntryPoint, peBuffer); + if (epPtr) { + // 确保不会越界 + size_t maxBytes = + std::min(255, bufferSize - (epPtr - peBuffer)); + entrypointBytes.assign(epPtr, epPtr + maxBytes); + } + } + + // 提取所有特征 + std::vector allFeatures; + + // 1. PE段属性 + std::vector propFeatures = + EncodeProperties(peInfo, importedLibraries); + allFeatures.insert(allFeatures.end(), propFeatures.begin(), + propFeatures.end()); + + // 2. 导入DLL检测 + std::vector libFeatures = EncodeLibraries(importedLibraries); + allFeatures.insert(allFeatures.end(), libFeatures.begin(), + libFeatures.end()); + + // 3. 文件熵 + double fileEntropy = CalculateEntropy(buffer, bufferSize); + allFeatures.push_back(fileEntropy); + + // 4. 入口点前255字节 + std::vector epFeatures = EncodeEntrypoint(entrypointBytes); + allFeatures.insert(allFeatures.end(), epFeatures.begin(), epFeatures.end()); + + // 5. 节区信息 + std::vector secFeatures = EncodeSections(sections, peInfo.isX64); + allFeatures.insert(allFeatures.end(), secFeatures.begin(), + secFeatures.end()); + + // 6. 文件和代码段的比率 + double codeRatio = + (peInfo.sizeOfCode > 0 && peInfo.sizeOfImage > 0) + ? static_cast(peInfo.sizeOfCode) / peInfo.sizeOfImage + : 0.0; + allFeatures.push_back(codeRatio); + + // 7. 节区数量 + allFeatures.push_back(static_cast(sections.size())); + + // 导出特征到CSV + bool result = ExportToCSV(allFeatures, outputPath); + + // 清理资源 + peconv::free_pe_buffer(peBuffer); + + return result; +} + +std::vector MachineLearning::EncodeProperties( + const PeInfo& peInfo, const std::vector& dllTables) { + std::vector features; + + // 添加各属性的布尔值(转为double: 1.0=true, 0.0=false) + features.push_back(peInfo.hasConfiguration ? 1.0 : 0.0); + features.push_back(peInfo.hasDebug ? 1.0 : 0.0); + features.push_back(peInfo.hasExceptions ? 1.0 : 0.0); + features.push_back(peInfo.hasExports ? 1.0 : 0.0); + features.push_back(peInfo.hasImports ? 1.0 : 0.0); + features.push_back(peInfo.hasNx ? 1.0 : 0.0); + features.push_back(peInfo.hasRelocations ? 1.0 : 0.0); + features.push_back(peInfo.hasResources ? 1.0 : 0.0); + features.push_back(peInfo.hasSignatures ? 1.0 : 0.0); + features.push_back(peInfo.hasTls ? 1.0 : 0.0); + features.push_back(peInfo.hasEntryIat ? 1.0 : 0.0); + features.push_back(peInfo.hasImageBase ? 1.0 : 0.0); + features.push_back(peInfo.hasDelayImports ? 1.0 : 0.0); + features.push_back(peInfo.hasRich ? 1.0 : 0.0); + + return features; +} + +std::vector MachineLearning::EncodeEntrypoint( + const std::vector& epBytes) { + std::vector features; + + // 原始字节转为浮点值(按Python代码中的normalize处理) + for (const auto& byte : epBytes) { + features.push_back(static_cast(byte) / 255.0); + } + + // 填充至64字节长度 + while (features.size() < 64) { + features.push_back(0.0); + } + + return features; +} + +std::vector MachineLearning::EncodeHistogram(const uint8_t* data, + size_t size) { + std::vector features(256, 0.0); + + if (data && size > 0) { + // 统计字节频率 + for (size_t i = 0; i < size; i++) { + features[data[i]]++; + } + + // 归一化频率 + for (auto& freq : features) { + freq /= static_cast(size); + } + } + + return features; +} + +std::vector MachineLearning::EncodeLibraries( + const std::vector& importedLibraries) { + std::vector features(_libraries.size(), 0.0); + + // 检查每个库是否被导入 + for (size_t i = 0; i < _libraries.size(); i++) { + const std::string& lib = _libraries[i]; + for (const auto& imported : importedLibraries) { + if (imported.find(lib) != std::string::npos) { + features[i] = 1.0; + break; + } + } + } + + return features; +} + +std::vector MachineLearning::EncodeSections( + const std::vector& sections, bool isX64) { + std::vector features; + size_t numSections = sections.size(); + if (numSections == 0) { + return std::vector(5, 0.0); // 返回全零特征 + } + + // 计算熵特征 + double totalEntropy = 0.0; + double maxEntropy = 0.0; + for (const auto& sec : sections) { + totalEntropy += sec.entropy; + if (sec.entropy > maxEntropy) { + maxEntropy = sec.entropy; + } + } + double avgEntropy = totalEntropy / numSections; + double normAvgEntropy = (maxEntropy > 0) ? avgEntropy / maxEntropy : 0.0; + + // 计算大小比率 + double maxSize = 0.0; + double minVSize = DBL_MAX; + for (const auto& sec : sections) { + if (static_cast(sec.sizeOfRawData) > maxSize) { + maxSize = static_cast(sec.sizeOfRawData); + } + if (sec.virtualSize > 0 && + static_cast(sec.virtualSize) < minVSize) { + minVSize = static_cast(sec.virtualSize); + } + } + + // 根据PE文件类型调整计算方式 + double normSize = 0.0; + if (minVSize > 0 && minVSize != DBL_MAX) { + if (isX64) { + // 64位PE文件可能有更大的对齐要求 + normSize = maxSize / (minVSize * 2.0); + } else { + // 32位PE文件的处理方式 + normSize = maxSize / minVSize; + } + } + + // 返回特征 + features.push_back(static_cast(numSections)); + features.push_back(avgEntropy); + features.push_back(maxEntropy); + features.push_back(normAvgEntropy); + features.push_back(normSize); + + return features; +} + +double MachineLearning::CalculateEntropy(const uint8_t* data, size_t size) { + if (!data || size == 0) { + return 0.0; + } + + std::array frequencies = {}; + + // 统计每个字节的频率 + for (size_t i = 0; i < size; i++) { + frequencies[data[i]] += 1.0; + } + + // 计算香农熵 + double entropy = 0.0; + for (const auto& freq : frequencies) { + if (freq > 0) { + double p = freq / static_cast(size); + entropy -= p * std::log2(p); + } + } + + return entropy; +} + +bool MachineLearning::ExportToCSV(const std::vector& features, + const std::string& outputPath) { + std::ofstream outFile(outputPath); + if (!outFile.is_open()) { + std::cerr << "无法打开输出文件: " << outputPath << std::endl; + return false; + } + + // 写入特征 + for (size_t i = 0; i < features.size(); i++) { + outFile << std::fixed << std::setprecision(6) << features[i]; + if (i < features.size() - 1) { + outFile << ","; + } + } + outFile << std::endl; + + outFile.close(); + return true; +} + +int MachineLearning::GetOpcodeType(const void* code, bool isX64) { + // 此函数未使用,但保留实现接口 + return 0; +} + +std::tuple, std::vector> +MachineLearning::GetOpcodeStatistics(const uint8_t* data, size_t dataSize, + bool isX64, const PeInfo& peInfo) { + // 此函数未使用,但保留实现接口 + return std::make_tuple(std::vector(), std::vector()); +} \ No newline at end of file diff --git a/ai_anti_malware/ml.h b/ai_anti_malware/ml.h new file mode 100644 index 0000000..4dbed93 --- /dev/null +++ b/ai_anti_malware/ml.h @@ -0,0 +1,128 @@ +#pragma once +#include "head.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// 前向声明 +struct PeInfo; +struct SectionInfo; +class BasicPeInfo; + +// RVA转换为内存中的指针的辅助函数 +inline BYTE* RvaToPtr(DWORD rva, BYTE* peBuffer) { + if (!peBuffer || rva == 0) return nullptr; + + PIMAGE_NT_HEADERS ntHeaders = + (PIMAGE_NT_HEADERS)peconv::get_nt_hdrs(peBuffer); + if (!ntHeaders) return nullptr; + + PIMAGE_SECTION_HEADER section = IMAGE_FIRST_SECTION(ntHeaders); + WORD numSections = ntHeaders->FileHeader.NumberOfSections; + + for (WORD i = 0; i < numSections; i++, section++) { + // 检查RVA是否在这个节区范围内 + if (rva >= section->VirtualAddress && + rva < section->VirtualAddress + section->Misc.VirtualSize) { + // 计算文件偏移 + DWORD offset = + rva - section->VirtualAddress + section->PointerToRawData; + return peBuffer + offset; + } + } + + // 如果RVA在PE头部内 + DWORD sizeOfHeaders = 0; + bool isX64 = peconv::is64bit(peBuffer); + + if (isX64) { + PIMAGE_NT_HEADERS64 ntHeaders64 = (PIMAGE_NT_HEADERS64)ntHeaders; + sizeOfHeaders = ntHeaders64->OptionalHeader.SizeOfHeaders; + } else { + PIMAGE_NT_HEADERS32 ntHeaders32 = (PIMAGE_NT_HEADERS32)ntHeaders; + sizeOfHeaders = ntHeaders32->OptionalHeader.SizeOfHeaders; + } + + if (rva < sizeOfHeaders) { + return peBuffer + rva; + } + + return nullptr; +} + +class MachineLearning { + public: + MachineLearning(); + ~MachineLearning(); + + // 主函数:提取特征并导出到CSV + bool ExtractFeatures(const uint8_t* buffer, size_t bufferSize, + const std::string& outputPath); + + private: + // 特征提取辅助函数 + std::vector EncodeProperties( + const PeInfo& peInfo, const std::vector& dllTables); + std::vector EncodeEntrypoint(const std::vector& epBytes); + std::vector EncodeHistogram(const uint8_t* data, size_t size); + std::vector EncodeLibraries( + const std::vector& dllTable); + std::vector EncodeSections(const std::vector& sections, + bool isX64); + std::tuple, std::vector> GetOpcodeStatistics( + const uint8_t* data, size_t dataSize, bool isX64, const PeInfo& peInfo); + int GetOpcodeType(const void* code, bool isX64); + double CalculateEntropy(const uint8_t* data, size_t size); + + // 将特征导出到CSV + bool ExportToCSV(const std::vector& features, + const std::string& outputPath); + + // 常量定义 + std::vector _properties; + std::vector _libraries; + std::unordered_map _opcodeTypeDict; +}; + +// PE文件信息结构 +struct PeInfo { + uint32_t addressOfEntryPoint; + uint32_t baseOfCode; + uint32_t sizeOfCode; + uint32_t sizeOfImage; + uint32_t sizeOfHeaders; + uint32_t characteristics; + uint32_t dllCharacteristics; + bool isX64; + + // PE目录标志 + bool hasConfiguration; + bool hasDebug; + bool hasExceptions; + bool hasExports; + bool hasImports; + bool hasNx; // NX兼容标志 + bool hasRelocations; + bool hasResources; + bool hasSignatures; + bool hasTls; + bool hasDelayImports; + bool hasImageBase; + bool hasEntryIat; + bool hasRich; +}; + +// 节区信息结构 +struct SectionInfo { + uint32_t characteristics; + double entropy; + uint32_t sizeOfRawData; + uint32_t virtualSize; +}; \ No newline at end of file diff --git a/ai_anti_malware/sandbox.cpp b/ai_anti_malware/sandbox.cpp index 8c5aa26..0fa8ebc 100644 --- a/ai_anti_malware/sandbox.cpp +++ b/ai_anti_malware/sandbox.cpp @@ -164,7 +164,44 @@ class cFixImprot : public peconv::t_function_resolver { }; Sandbox::Sandbox() {} -Sandbox::~Sandbox() {} +Sandbox::~Sandbox() { + // 1. 先清理高层资源 + m_crossSectionExecution.clear(); + envStrings.clear(); + api_map.clear(); + m_moduleList.clear(); + m_impFuncDict.clear(); + m_exportFuncDict.clear(); + + // 2. 清理内存映射 + if (m_ucEngine) { + uc_close(m_ucEngine); + m_ucEngine = nullptr; + } + + // 3. 清理堆内存 + for (auto& [address, segment] : m_heapSegments) { + HeapBlock* current = segment->blocks; + while (current) { + HeapBlock* next = current->next; + delete current; + current = next; + } + delete segment; + } + m_heapSegments.clear(); + + // 4. 清理栈内存 + if (m_stackBuffer) { + free(m_stackBuffer); + m_stackBuffer = nullptr; + } + + // 5. 最后清理底层资源 + if (m_csHandle) { + cs_close(&m_csHandle); + } +} auto Sandbox::PushModuleToVM(const char* dllName, uint64_t moduleBase) -> void { for (auto module : m_moduleList) { @@ -401,9 +438,9 @@ auto Sandbox::SetupVirtualMachine() -> void { /* 映射 m_KSharedUserDataBase */ - uint64_t m_KSharedUserDataBase = 0x7FFE0000; + m_KSharedUserDataBase = 0x7FFE0000; uint64_t m_KSharedUserDataEnd = 0x7FFE0FFF; // 0x7FFE2000 - uint64_t m_KSharedUserDataSize = AlignToSectionAlignment( + m_KSharedUserDataSize = AlignToSectionAlignment( m_KSharedUserDataEnd - m_KSharedUserDataBase, PAGE_SIZE); uc_mem_map(m_ucEngine, m_KSharedUserDataBase, m_KSharedUserDataSize, @@ -663,29 +700,9 @@ auto Sandbox::Run() -> void { InitApiHooks(); std::cout << "Starting execution at " << std::hex << entryPoint << std::endl; - err = uc_emu_start(m_ucEngine, entryPoint, m_peInfo->imageEnd, 0, 0); - if (err != UC_ERR_OK) { - std::cerr << "Emulation error: " << uc_strerror(err) << std::endl; - - // 32位环境下的错误处理 - if (!m_peInfo->isX64) { - uint32_t eip; - uc_reg_read(m_ucEngine, UC_X86_REG_EIP, &eip); - std::cerr << "Error occurred at EIP: 0x" << std::hex << eip - << std::endl; - - // 尝试读取当前指令 - uint8_t instruction[16]; - if (uc_mem_read(m_ucEngine, eip, instruction, - sizeof(instruction)) == UC_ERR_OK) { - std::cerr << "Instruction bytes: "; - for (int i = 0; i < 16; i++) { - printf("%02X ", instruction[i]); - } - std::cerr << std::endl; - } - } - } + uint64_t timeout = 60 * 1000; + err = uc_emu_start(m_ucEngine, entryPoint, m_peInfo->imageEnd, timeout, 0); + std::cerr << "Emulation error: " << uc_strerror(err) << std::endl; } auto Sandbox::GetEnvString() -> std::vector { @@ -909,11 +926,11 @@ auto Sandbox::DumpPE() -> std::pair, size_t> { reinterpret_cast(moduleBuffer.get()), module->base); } - //这里有一个严重的问题,就懒得处理了: - //壳里面吐出来的代码的导入表和壳的导入表不是同样一个. - //这个修的是壳的 导入表,所以导入表 修 不 全 - //有个很简单的办法,需要搜索IAT结构,然后修改脱壳后的IAT的字段到壳的字段里面,然后再执行一次fix_imports - //懒得写了,家庭作业.自己完成 + // 这里有一个严重的问题,就懒得处理了: + // 壳里面吐出来的代码的导入表和壳的导入表不是同样一个. + // 这个修的是壳的 导入表,所以导入表 修 不 全 + // 有个很简单的办法,需要搜索IAT结构,然后修改脱壳后的IAT的字段到壳的字段里面,然后再执行一次fix_imports + // 懒得写了,家庭作业.自己完成 bool importsFixed = peconv::fix_imports( resultBuffer.get(), virtualMemorySize, exportsMap, nullptr); if (importsFixed) { diff --git a/ai_anti_malware/sandbox.h b/ai_anti_malware/sandbox.h index eca9e49..16caa59 100644 --- a/ai_anti_malware/sandbox.h +++ b/ai_anti_malware/sandbox.h @@ -217,4 +217,6 @@ class Sandbox { auto InitCommandLine(std::string commandLine) -> void; std::vector m_crossSectionExecution; // 记录跨区段执行地址 uint64_t m_lastExecuteSectionIndex = 0; // 上次执行的区段索引 + uint64_t m_KSharedUserDataBase{0}; + uint64_t m_KSharedUserDataSize{0}; };