A deep learning-based malware detection system using Convolutional Neural Networks (CNNs) implemented in PyTorch. This project provides an end-to-end pipeline for training, validation, and evaluation of malware detection models with comprehensive visualization and metrics reporting.
torch>=2.0.0
torchvision>=0.15.0
numpy>=1.24.0
pandas>=2.0.0
scikit-learn>=1.3.0
matplotlib>=3.7.0
seaborn>=0.12.0
plotly>=5.15.0
tqdm>=4.65.0
git clone https://github.com/NahomMA/malware_detection.git
cd malware_detection
python3 -m venv .venv
source .venv/bin/activate # On Windows: .venv\Scripts\activate
pip install --upgrade pip
pip install -r requirements.txtpython main.py
All hyperparameters and settings are centralized in config.py. Key configuration options include:
Performance
from config import cfg
# Modify configuration for different experiments
cfg['model']['num_classes'] = 6
cfg['model']['input_length'] = 1024
cfg['training']['num_epochs'] = 50
cfg['training']['learning_rate'] = 0.0005
cfg['data']['batch_size'] = 32
# Access configuration values
print(f"Training for {cfg['training']['num_epochs']} epochs")
print(f"Model has {cfg['model']['num_classes']} classes")import torch
from torch.utils.data import DataLoader
from torch import nn, optim
from src.data_loader import MalwareDataset
from src.model import MalwareDetector
from src.train import train_model
from config import cfg
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Load datasets
train_dataset = MalwareDataset(data_path=cfg['data']['train_path'], input_length=1024)
val_dataset = MalwareDataset(data_path=cfg['data']['val_path'], input_length=1024)
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=cfg['data']['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=cfg['data']['batch_size'], shuffle=False)
# Initialize model
model = MalwareDetector(
input_length=cfg['model']['input_length'],
num_classes=cfg['model']['num_classes']
).to(device)
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=cfg['training']['learning_rate'])
# Train model
train_model(model, train_loader, val_loader, criterion, optimizer,
num_epochs=cfg['training']['num_epochs'], device=device)from src.evaluate import ModelPerformance
# Get class names from dataset
class_names = train_dataset.classes
print(f"Detected classes: {class_names}")
# Initialize model performance evaluator
model_performance = ModelPerformance(
model=model,
data_loader=val_loader,
device=device,
class_names=class_names
)
# Evaluate model and calculate metrics
model_performance.evaluate_model()
model_performance.calculate_metrics()If you use this project in your research, please cite:
@software{malware_detection_cnn,
author = {NahomMA},
title = {Malware Detection with CNNs (PyTorch)},
url = {https://github.com/NahomMA/malware_detection},
year = {2025}
}