Update project documentation and enhance malware detection engine

- Completely rewrite README.md with comprehensive project overview and technical details
- Add detailed explanation of antivirus engine architecture and detection strategies
- Implement multi-stage malware detection with machine learning, sandbox, and PE structure analysis
- Update project configuration and add new source files for enhanced detection capabilities
- Integrate XGBoost machine learning model with C++ export functionality
- Improve sandbox environment with advanced module and LDR data table handling
- Remove legacy Python prediction and training scripts in favor of C++ implementation
This commit is contained in:
Huoji's
2025-03-09 21:59:22 +08:00
parent 51f929abfa
commit 60c4ef5f58
23 changed files with 46102 additions and 1717 deletions

View File

@@ -2,6 +2,13 @@
//
#include "head.h"
enum class DetectEngineType {
kNone,
kMachineLearning,
kSandbox,
kPeStruct,
kYaraScan
};
auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
auto sampleInfo = std::make_shared<BasicPeInfo>();
@@ -10,6 +17,9 @@ auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
sampleInfo->peBuffer =
peconv::load_pe_module((const char*)sampleInfo->inputFilePath.c_str(),
sampleInfo->peSize, false, false);
if (sampleInfo->peBuffer == nullptr) {
return nullptr;
}
sampleInfo->ntHead64 = peconv::get_nt_hdrs64((BYTE*)sampleInfo->peBuffer);
sampleInfo->ntHead32 = peconv::get_nt_hdrs32((BYTE*)sampleInfo->peBuffer);
sampleInfo->isX64 = peconv::is64bit((BYTE*)sampleInfo->peBuffer);
@@ -39,6 +49,7 @@ auto getPeInfo(std::string inputFilePath) -> std::shared_ptr<BasicPeInfo> {
sampleInfo->peSize = (sampleInfo->peSize + 0xFFF) & ~0xFFF;
return sampleInfo;
}
// 搜集恶意软件特征的.
int doMl(int argc, char* argv[]) {
// 检查命令行参数
if (argc < 3) {
@@ -98,31 +109,210 @@ int doMl(int argc, char* argv[]) {
}
return 0;
};
int main(int argc, char* argv[]) {
doMl(argc, argv);
/*
auto sampleInfo = getPeInfo(
"E:\\对战平台\\CrowAntiCheat\\CrowAntiCheat\\client\\Console_"
"Test\\Release\\Console_Test.exe");
// auto sampleInfo = getPeInfo("C:\\ConsoleApplication1.exe");
printf("input new file %s \n", sampleInfo->inputFilePath);
printf("is x64: %d\n", sampleInfo->isX64);
printf("is relocated: %d\n", sampleInfo->isRelocated);
printf("RecImageBase: %llx\n", sampleInfo->RecImageBase);
auto sandbox = std::make_shared<Sandbox>();
sandbox->InitEnv(sampleInfo);
sandbox->Run();
auto [peBuffer, peSize] = sandbox->DumpPE();
int doPredict(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "用法: " << argv[0] << " <文件路径>" << std::endl;
return 1;
}
std::string filePath = argv[1];
MachineLearning ml;
double score = 1 - ml.PredictMalwareFromFile(filePath);
if (score >= 0) {
std::cout << "文件 " << filePath << " 的恶意软件得分: " << score
<< std::endl;
if (score > 0.5) {
std::cout << "警告: 这个文件可能是恶意软件!" << std::endl;
} else {
std::cout << "这个文件可能是安全的。" << std::endl;
}
} else {
std::cout << "无法分析文件。" << std::endl;
}
}
class PeStructAnalyzer {
public:
PeStructAnalyzer() = default;
~PeStructAnalyzer() = default;
if (peBuffer) {
printf("peBuffer: %p\n", peBuffer.get());
printf("peSize: %d\n", peSize);
// peconv::dump_to_file("z:\\dumped_main.exe", peBuffer.get(), peSize);
MachineLearning ml;
ml.ExtractFeatures(peBuffer.get(), peSize);
}
peBuffer.release();
*/
system("pause");
bool AnalyzePe(const std::shared_ptr<BasicPeInfo>& peInfo) {
if (!peInfo || !peInfo->peBuffer) {
return false;
}
bool isSuspicious = false;
// 检查导入表
if (HasNoImports(peInfo)) {
std::cout << "警告: 未发现导入表,这是一个可疑特征" << std::endl;
isSuspicious = true;
}
// 检查节表异常
auto [hasSuspiciousSections, suspiciousReason] =
AnalyzeSections(peInfo);
if (hasSuspiciousSections) {
std::cout << "警告: " << suspiciousReason << std::endl;
isSuspicious = true;
}
return isSuspicious;
}
private:
static constexpr DWORD MAX_REASONABLE_SECTION_COUNT = 20; // 最大合理区段数
static constexpr DWORD MAX_EXECUTABLE_SECTIONS = 3; // 最大可执行区段数
static constexpr DWORD MAX_SECTION_SIZE = 0x10000000; // 256MB
static constexpr DWORD SECTION_ALIGNMENT = 0x1000; // 4KB对齐
static constexpr DWORD SUSPICIOUS_ENTROPY_THRESHOLD = 7; // 熵值阈值
bool HasNoImports(const std::shared_ptr<BasicPeInfo>& peInfo) {
PIMAGE_DATA_DIRECTORY importDir = nullptr;
if (peInfo->isX64) {
importDir = &peInfo->ntHead64->OptionalHeader
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
} else {
importDir = &peInfo->ntHead32->OptionalHeader
.DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT];
}
return (importDir->VirtualAddress == 0 || importDir->Size == 0);
}
std::pair<bool, std::string> AnalyzeSections(
const std::shared_ptr<BasicPeInfo>& peInfo) {
PIMAGE_SECTION_HEADER firstSection = nullptr;
WORD numberOfSections = 0;
if (peInfo->isX64) {
firstSection = IMAGE_FIRST_SECTION(peInfo->ntHead64);
numberOfSections = peInfo->ntHead64->FileHeader.NumberOfSections;
} else {
firstSection = IMAGE_FIRST_SECTION(peInfo->ntHead32);
numberOfSections = peInfo->ntHead32->FileHeader.NumberOfSections;
}
// 检查区段数量是否异常
if (numberOfSections > MAX_REASONABLE_SECTION_COUNT) {
return {true, "区段数量异常: " + std::to_string(numberOfSections) +
" > " +
std::to_string(MAX_REASONABLE_SECTION_COUNT)};
}
// 统计可执行区段数量
int executableSections = 0;
bool hasWritableExecutableSection = false;
bool hasZeroSizedSection = false;
bool hasOversizedSection = false;
bool hasMisalignedSection = false;
for (WORD i = 0; i < numberOfSections; i++) {
const auto& section = firstSection[i];
// 检查区段属性
if (section.Characteristics & IMAGE_SCN_MEM_EXECUTE) {
executableSections++;
// 检查是否同时具有可写和可执行属性
if (section.Characteristics & IMAGE_SCN_MEM_WRITE) {
hasWritableExecutableSection = true;
}
}
// 检查区段大小
if (section.SizeOfRawData == 0 && section.Misc.VirtualSize > 0) {
hasZeroSizedSection = true;
}
if (section.SizeOfRawData > MAX_SECTION_SIZE) {
hasOversizedSection = true;
}
// 检查对齐
if (section.VirtualAddress % SECTION_ALIGNMENT != 0) {
hasMisalignedSection = true;
}
}
// 返回检测结果
if (executableSections > MAX_EXECUTABLE_SECTIONS) {
return {true, "可执行区段数量过多: " +
std::to_string(executableSections)};
}
if (hasWritableExecutableSection) {
return {true, "发现同时具有可写和可执行属性的区段"};
}
if (hasZeroSizedSection) {
return {true, "发现大小异常的区段"};
}
if (hasOversizedSection) {
return {true, "发现过大的区段"};
}
if (hasMisalignedSection) {
return {true, "发现未正确对齐的区段"};
}
return {false, ""};
}
};
class DetectEngine {
public:
DetectEngine();
~DetectEngine();
DetectEngineType DetectMalware(std::string filePath);
};
DetectEngine::DetectEngine() {}
DetectEngine::~DetectEngine() {}
DetectEngineType DetectEngine::DetectMalware(std::string filePath) {
auto peInfo = getPeInfo(filePath);
if (peInfo == nullptr) {
return DetectEngineType::kNone;
}
// PE结构分析
PeStructAnalyzer peAnalyzer;
if (peAnalyzer.AnalyzePe(peInfo)) {
return DetectEngineType::kPeStruct;
}
// 先机器学习引擎
MachineLearning ml;
double score = 1 - ml.PredictMalwareFromFile(filePath);
if (score >= 0) {
printf("machine learning score: %f\n", score);
if (score > 0.5) {
return DetectEngineType::kMachineLearning;
}
}
// 再沙盒引擎
Sandbox se;
se.InitEnv(peInfo);
se.Run();
if (se.GetMalwareAnalysisType() == MalwareAnalysisType::kSuspicious ||
se.GetMalwareAnalysisType() == MalwareAnalysisType::kMalware) {
return DetectEngineType::kSandbox;
}
return DetectEngineType::kNone;
}
auto doMalwareScan(int argc, char* argv[]) -> void {
DetectEngine scanner;
if (argc < 2) {
std::cout << "用法: " << argv[0] << " <文件路径>" << std::endl;
return;
}
std::string filePath = argv[1];
auto sampleType = scanner.DetectMalware(filePath);
printf("sample type: %d \n", sampleType);
}
int main(int argc, char* argv[]) {
// doMl(argc, argv);
// doPredict(argc, argv);
doMalwareScan(argc, argv);
return 0;
}