1
This commit is contained in:
@@ -340,6 +340,7 @@ std::vector<double> MachineLearning::ExtractFeatures(const uint8_t* buffer,
|
||||
peInfo.characteristics = ntHeaders64->FileHeader.Characteristics;
|
||||
peInfo.dllCharacteristics =
|
||||
ntHeaders64->OptionalHeader.DllCharacteristics;
|
||||
peInfo.hasImageBase = ntHeaders64->OptionalHeader.ImageBase != 0;
|
||||
} else {
|
||||
// 32位PE文件
|
||||
PIMAGE_NT_HEADERS32 ntHeaders32 = (PIMAGE_NT_HEADERS32)ntHeaders;
|
||||
@@ -352,6 +353,7 @@ std::vector<double> MachineLearning::ExtractFeatures(const uint8_t* buffer,
|
||||
peInfo.characteristics = ntHeaders32->FileHeader.Characteristics;
|
||||
peInfo.dllCharacteristics =
|
||||
ntHeaders32->OptionalHeader.DllCharacteristics;
|
||||
peInfo.hasImageBase = ntHeaders32->OptionalHeader.ImageBase != 0;
|
||||
}
|
||||
|
||||
// 检查PE目录
|
||||
@@ -398,8 +400,6 @@ std::vector<double> MachineLearning::ExtractFeatures(const uint8_t* buffer,
|
||||
IMAGE_DIRECTORY_ENTRY_DELAY_IMPORT);
|
||||
peInfo.hasDelayImports = dataDir && dataDir->VirtualAddress != 0;
|
||||
|
||||
peInfo.hasImageBase = true; // PE文件都有ImageBase
|
||||
|
||||
dataDir = peconv::get_directory_entry(peBuffer, IMAGE_DIRECTORY_ENTRY_IAT);
|
||||
peInfo.hasEntryIat = dataDir && dataDir->VirtualAddress != 0;
|
||||
|
||||
@@ -544,9 +544,12 @@ std::vector<double> MachineLearning::EncodeEntrypoint(
|
||||
const std::vector<uint8_t>& epBytes) {
|
||||
std::vector<double> features;
|
||||
|
||||
// 只使用前64个字节,确保特征数量固定
|
||||
size_t bytesToUse = std::min<size_t>(64, epBytes.size());
|
||||
|
||||
// 原始字节转为浮点值(按Python代码中的normalize处理)
|
||||
for (const auto& byte : epBytes) {
|
||||
features.push_back(static_cast<double>(byte) / 255.0);
|
||||
for (size_t i = 0; i < bytesToUse; i++) {
|
||||
features.push_back(static_cast<double>(epBytes[i]) / 255.0);
|
||||
}
|
||||
|
||||
// 填充至64字节长度
|
||||
@@ -743,34 +746,49 @@ std::vector<uint8_t> MachineLearning::ReadFileToBuffer(
|
||||
|
||||
bool MachineLearning::ProcessDirectory(const std::string& directoryPath,
|
||||
const std::string& outputCsvPath) {
|
||||
// 打开CSV文件用于写入
|
||||
std::ofstream csvFile(outputCsvPath);
|
||||
// 检查文件是否已存在
|
||||
bool fileExists = std::filesystem::exists(outputCsvPath);
|
||||
|
||||
// 打开CSV文件用于写入,如果文件已存在则使用追加模式
|
||||
std::ofstream csvFile;
|
||||
if (fileExists) {
|
||||
csvFile.open(outputCsvPath, std::ios::app);
|
||||
} else {
|
||||
csvFile.open(outputCsvPath);
|
||||
}
|
||||
|
||||
if (!csvFile.is_open()) {
|
||||
std::cerr << "无法创建CSV文件: " << outputCsvPath << std::endl;
|
||||
std::cerr << "无法创建或打开CSV文件: " << outputCsvPath << std::endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// 仅在文件不存在时写入CSV标题行
|
||||
/*
|
||||
// 写入CSV标题行
|
||||
csvFile << "文件路径";
|
||||
for (size_t i = 0; i < _properties.size(); i++) {
|
||||
csvFile << ",属性_" << i;
|
||||
if (!fileExists) {
|
||||
|
||||
// 写入CSV标题行
|
||||
csvFile << "文件路径";
|
||||
for (size_t i = 0; i < _properties.size(); i++) {
|
||||
csvFile << ",属性_" << i;
|
||||
}
|
||||
for (size_t i = 0; i < _libraries.size(); i++) {
|
||||
csvFile << ",库_" << i;
|
||||
}
|
||||
csvFile << ",文件熵";
|
||||
for (size_t i = 0; i < 64; i++) { // 前64个字节特征
|
||||
csvFile << ",EP_" << i;
|
||||
}
|
||||
csvFile << ",节区数";
|
||||
csvFile << ",平均熵";
|
||||
csvFile << ",最大熵";
|
||||
csvFile << ",归一化平均熵";
|
||||
csvFile << ",节区大小比率";
|
||||
csvFile << ",代码比率";
|
||||
csvFile << ",节区计数";
|
||||
csvFile << std::endl;
|
||||
|
||||
}
|
||||
for (size_t i = 0; i < _libraries.size(); i++) {
|
||||
csvFile << ",库_" << i;
|
||||
}
|
||||
csvFile << ",文件熵";
|
||||
for (size_t i = 0; i < 64; i++) { // 前64个字节特征
|
||||
csvFile << ",EP_" << i;
|
||||
}
|
||||
csvFile << ",节区数";
|
||||
csvFile << ",平均熵";
|
||||
csvFile << ",最大熵";
|
||||
csvFile << ",归一化平均熵";
|
||||
csvFile << ",节区大小比率";
|
||||
csvFile << ",代码比率";
|
||||
csvFile << ",节区计数";
|
||||
csvFile << std::endl;
|
||||
*/
|
||||
*/
|
||||
// 递归遍历目录
|
||||
WIN32_FIND_DATAA findData;
|
||||
std::string searchPath = directoryPath + "\\*";
|
||||
|
||||
Reference in New Issue
Block a user