This commit is contained in:
Huoji's
2025-03-09 14:57:42 +08:00
parent 10c56952c6
commit 51f929abfa
5 changed files with 527 additions and 50 deletions

View File

@@ -340,6 +340,7 @@ std::vector<double> MachineLearning::ExtractFeatures(const uint8_t* buffer,
peInfo.characteristics = ntHeaders64->FileHeader.Characteristics;
peInfo.dllCharacteristics =
ntHeaders64->OptionalHeader.DllCharacteristics;
peInfo.hasImageBase = ntHeaders64->OptionalHeader.ImageBase != 0;
} else {
// 32位PE文件
PIMAGE_NT_HEADERS32 ntHeaders32 = (PIMAGE_NT_HEADERS32)ntHeaders;
@@ -352,6 +353,7 @@ std::vector<double> MachineLearning::ExtractFeatures(const uint8_t* buffer,
peInfo.characteristics = ntHeaders32->FileHeader.Characteristics;
peInfo.dllCharacteristics =
ntHeaders32->OptionalHeader.DllCharacteristics;
peInfo.hasImageBase = ntHeaders32->OptionalHeader.ImageBase != 0;
}
// 检查PE目录
@@ -398,8 +400,6 @@ std::vector<double> MachineLearning::ExtractFeatures(const uint8_t* buffer,
IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT);
peInfo.hasDelayImports = dataDir && dataDir->VirtualAddress != 0;
peInfo.hasImageBase = true; // PE文件都有ImageBase
dataDir = peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_IAT);
peInfo.hasEntryIat = dataDir && dataDir->VirtualAddress != 0;
@@ -544,9 +544,12 @@ std::vector<double> MachineLearning::EncodeEntrypoint(
const std::vector<uint8_t>& epBytes) {
std::vector<double> features;
// 只使用前64个字节确保特征数量固定
size_t bytesToUse = std::min<size_t>(64, epBytes.size());
// 原始字节转为浮点值按Python代码中的normalize处理
for (const auto& byte : epBytes) {
features.push_back(static_cast<double>(byte) / 255.0);
for (size_t i = 0; i < bytesToUse; i++) {
features.push_back(static_cast<double>(epBytes[i]) / 255.0);
}
// 填充至64字节长度
@@ -743,34 +746,49 @@ std::vector<uint8_t> MachineLearning::ReadFileToBuffer(
bool MachineLearning::ProcessDirectory(const std::string& directoryPath,
const std::string& outputCsvPath) {
// 打开CSV文件用于写入
std::ofstream csvFile(outputCsvPath);
// 检查文件是否已存在
bool fileExists = std::filesystem::exists(outputCsvPath);
// 打开CSV文件用于写入如果文件已存在则使用追加模式
std::ofstream csvFile;
if (fileExists) {
csvFile.open(outputCsvPath, std::ios::app);
} else {
csvFile.open(outputCsvPath);
}
if (!csvFile.is_open()) {
std::cerr << "无法创建CSV文件: " << outputCsvPath << std::endl;
std::cerr << "无法创建或打开CSV文件: " << outputCsvPath << std::endl;
return false;
}
// 仅在文件不存在时写入CSV标题行
/*
// 写入CSV标题行
csvFile << "文件路径";
for (size_t i = 0; i < _properties.size(); i++) {
csvFile << ",属性_" << i;
if (!fileExists) {
// 写入CSV标题行
csvFile << "文件路径";
for (size_t i = 0; i < _properties.size(); i++) {
csvFile << ",属性_" << i;
}
for (size_t i = 0; i < _libraries.size(); i++) {
csvFile << ",库_" << i;
}
csvFile << ",文件熵";
for (size_t i = 0; i < 64; i++) { // 前64个字节特征
csvFile << ",EP_" << i;
}
csvFile << ",节区数";
csvFile << ",平均熵";
csvFile << ",最大熵";
csvFile << ",归一化平均熵";
csvFile << ",节区大小比率";
csvFile << ",代码比率";
csvFile << ",节区计数";
csvFile << std::endl;
}
for (size_t i = 0; i < _libraries.size(); i++) {
csvFile << ",库_" << i;
}
csvFile << ",文件熵";
for (size_t i = 0; i < 64; i++) { // 前64个字节特征
csvFile << ",EP_" << i;
}
csvFile << ",节区数";
csvFile << ",平均熵";
csvFile << ",最大熵";
csvFile << ",归一化平均熵";
csvFile << ",节区大小比率";
csvFile << ",代码比率";
csvFile << ",节区计数";
csvFile << std::endl;
*/
*/
// 递归遍历目录
WIN32_FIND_DATAA findData;
std::string searchPath = directoryPath + "\\*";