Tested models with filtered and unfiltered training data

This commit is contained in:
Adog64 2024-03-05 13:31:22 -05:00
parent b36fd83da4
commit c0372d8e8f
24 changed files with 402 additions and 87 deletions

View File

@ -10,7 +10,7 @@ from mnist import Net
from pykuwahara import kuwahara
epsilons = np.arange(0.05,0.35,0.05)
pretrained_model = "data/lenet_mnist_model.pth"
pretrained_model = "mnist_cnn_unfiltered.pt"
use_cuda=False
torch.manual_seed(69)
@ -27,15 +27,22 @@ print("CUDA Available: ", torch.cuda.is_available())
device = torch.device("cuda" if use_cuda and torch.cuda.is_available() else "cpu")
model = Net().to(device)
print(type(model))
model.load_state_dict(torch.load(pretrained_model, map_location=device))
model.eval()
def fgsm_attack(image, epsilon, data_grad):
# Collect the element-wise sign of the data gradient
sign_data_grad = data_grad.sign()
# Create the perturbed image by adjusting each pixel of the input image
perturbed_image = image + epsilon*sign_data_grad
# Adding clipping to maintain [0, 1] range
perturbed_image = torch.clamp(perturbed_image, 0, 1)
return perturbed_image
def denorm(batch, mean=[0.1307], std=[0.3081]):
@ -59,11 +66,14 @@ def denorm(batch, mean=[0.1307], std=[0.3081]):
return batch * std.view(1, -1, 1, 1) + mean.view(1, -1, 1, 1)
def test(model, device, test_loader, epsilon):
# Original dataset correct classifications
orig_correct = 0
# Attacked dataset correct classifications
attacked_correct = 0
# Filtered attacked dataset correct classifications
filtered_correct = 0
adv_examples = []
incorrect=0
for data, target in test_loader:
data, target = data.to(device), target.to(device)
@ -72,29 +82,29 @@ def test(model, device, test_loader, epsilon):
output_orig = model(data)
orig_pred = output_orig.max(1, keepdim=True)[1]
# Calculate the loss
loss = F.nll_loss(output_orig, target)
# Zero all existing gradients
model.zero_grad()
# Calculate gradients of model in backward pass
loss.backward()
# Collect ''datagrad''
data_grad = data.grad.data
# Restore the data to its original scale
data_denorm = denorm(data)
# Apply the FGSM attack
perturbed_data = fgsm_attack(data_denorm, epsilon, data_grad)
# Reapply normalization
perturbed_data_normalized = transforms.Normalize((0.1307,), (0.3081,))(perturbed_data)
# convert the perturbed data tensor to a cv2 readable image
image = perturbed_data_normalized.detach().numpy().transpose(0,2,3,1)
# apply the Kuwahara filter
filtered_image = np.ndarray((1,28,28,1))
filtered_image[0] = kuwahara(image[0], method='gaussian', radius=3, image_2d=image[0])
# convert the cv2 image back to a torch tensor
filtered_image = filtered_image.transpose(0,3,1,2)
perturbed_data_filtered = torch.tensor(filtered_image).float()
# Filter the attacked image
perturbed_data_filtered = filtered(perturbed_data_normalized, len(perturbed_data_normalized))
# evaluate the model on the attacked and filtered images
output_attacked = model(perturbed_data_normalized)
@ -105,8 +115,6 @@ def test(model, device, test_loader, epsilon):
if orig_pred.item() == target.item():
orig_correct += 1
else:
incorrect += 1
if attacked_pred.item() == target.item():
attacked_correct += 1
@ -129,13 +137,51 @@ def test(model, device, test_loader, epsilon):
print(f"Original Accuracy = {orig_correct} / {len(test_loader)} = {orig_acc}")
print(f"Attacked Accuracy = {attacked_correct} / {len(test_loader)} = {attacked_acc}")
print(f"Filtered Accuracy = {filtered_correct} / {len(test_loader)} = {filtered_acc}")
print(f"Filtered:Attacked = {filtered_acc} / {attacked_acc} = {filtered_acc/attacked_acc}")
return attacked_acc, adv_examples
return attacked_acc, filtered_acc, adv_examples
accuracies = []
def filtered(data, batch_size=64):
# Turn the tensor into an image
images = None
try:
images = data.numpy().transpose(0,2,3,1)
except RuntimeError:
images = data.detach().numpy().transpose(0,2,3,1)
# Apply the Kuwahara filter
filtered_images = np.ndarray((batch_size,28,28,1))
for i in range(batch_size):
filtered_images[i] = kuwahara(images[i], method='gaussian', radius=5, image_2d=images[i])
# Modify the data with the filtered image
filtered_images = filtered_images.transpose(0,3,1,2)
return torch.tensor(filtered_images).float()
attacked_accuracies = []
filtered_accuracies = []
ratios = []
examples = []
print(f"Model: {pretrained_model}")
for eps in epsilons:
acc, ex = test(model, device, test_loader, eps)
accuracies.append(acc)
aacc, facc, ex = test(model, device, test_loader, eps)
attacked_accuracies.append(aacc)
filtered_accuracies.append(facc)
ratios.append(facc/aacc)
examples.append(ex)
# Plot the results
plt.subplot(121)
plt.plot(epsilons, attacked_accuracies, label="Attacked Accuracy")
plt.plot(epsilons, filtered_accuracies, label="Filtered Accuracy")
plt.legend()
plt.subplot(122)
plt.plot(epsilons, ratios, label="Filtered:Attacked")
plt.legend()
plt.show()

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

View File

@ -1,5 +1,6 @@
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
@ -7,6 +8,13 @@ import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
import numpy as np
import matplotlib.pyplot as plt
import cv2
from pykuwahara import kuwahara
class Net(nn.Module):
def __init__(self):
@ -37,14 +45,120 @@ def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
# Apply Kuwahara filter to training data on a batch-by-batch basis
if args.filter:
data = filtered(data, len(data))
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch.idx*len(data), len(train_loader.dataset), 100.*batch_idx/len(train_loader), loss.item()))
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx*len(data), len(train_loader.dataset), 100.*batch_idx/len(train_loader), loss.item()))
if args.dry_run:
break
def test(args, model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
# Apply Kuwahara filter to test data on a batch-by-batch basis
if args.filter:
data = filtered(data, len(data))
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
def main():
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
help='input batch size for training (default: 64)')
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
help='input batch size for testing (default: 1000)')
parser.add_argument('--epochs', type=int, default=14, metavar='N',
help='number of epochs to train (default: 14)')
parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
help='learning rate (default: 1.0)')
parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
help='Learning rate step gamma (default: 0.7)')
parser.add_argument('--no-cuda', action='store_true', default=False,
help='disables CUDA training')
parser.add_argument('--no-mps', action='store_true', default=False,
help='disables macOS GPU training')
parser.add_argument('--dry-run', action='store_true', default=False,
help='quickly check a single pass')
parser.add_argument('--seed', type=int, default=1, metavar='S',
help='random seed (default: 1)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--save-model', action='store_true', default=False,
help='For Saving the current Model')
parser.add_argument('--filter', action='store_true', default=False,
help='Apply Kuwahara filter at runtime')
args = parser.parse_args()
train_kwargs = {'batch_size': args.batch_size}
test_kwargs = {'batch_size': args.test_batch_size}
torch.manual_seed(args.seed)
device = torch.device("cpu")
transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] )
dataset1 = datasets.MNIST('../data', train=True, download=True, transform=transform)
dataset2 = datasets.MNIST('../data', train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
print(f'Kuwahara filter: {args.filter}')
model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
for epoch in range(1, args.epochs + 1):
print(f"===== EPOCH {epoch}/{args.epochs} =====")
train(args, model, device, train_loader, optimizer, epoch)
test(args, model, device, test_loader)
scheduler.step()
if args.save_model:
if args.filter:
torch.save(model.state_dict(), "mnist_cnn_filtered.pt")
else:
torch.save(model.state_dict(), "mnist_cnn_unfiltered.pt")
def filtered(data, batch_size=64):
# Turn the tensor into an image
images = data.numpy().transpose(0,2,3,1)
# Apply the Kuwahara filter
filtered_images = np.ndarray((batch_size,28,28,1))
for i in range(batch_size):
filtered_images[i] = kuwahara(images[i], method='gaussian', radius=5, image_2d=images[i])
# Modify the data with the filtered image
filtered_images = filtered_images.transpose(0,3,1,2)
return torch.tensor(filtered_images).float()
if __name__ == "__main__":
main()

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 69 KiB

View File

@ -1,4 +1,4 @@
= The Approach =
# The Approach
The goal is to use a filtering algorithm such as the [[https://en.wikipedia.org/wiki/Kuwahara_filter#|Kuwahara Filter]] to

View File

@ -0,0 +1,8 @@
# Mitigating Gradient Attacks using Denoising Filters
## Contents
- [[Tests]]
- [[Approach]]
- [[Rationale]]
- [[Notes]]
- [[Timeline]]

View File

@ -1,7 +0,0 @@
= Halting Gradient Attacks with Non-Gradient Defenses =
== Contents ==
- [[Tests]]
- [[Approach]]
- [[Rationale]]
- [[Notes]]

View File

@ -0,0 +1,32 @@
# Notes on Filter-Based Defenses
## Engineering Design Principles
1. Clearly defined problem
- Defending gradient-based attacks using denoising filters as a buffer between an attacked image and a classifier
2. Requirements
3. Constraints
- Computing power
4. Engineering standards
- [[https://peps.python.org/pep-0008/|PEP 8]]
5. Cite applicable references
- [[https://pytorch.org/tutorials/beginner/fgsm_tutorial.html|FGSM Attack]]
- [[https://github.com/pytorch/examples/blob/main/mnist/main.py|MNIST Model]]
- [[https://www.cs.toronto.edu/~kriz/cifar.html|CIFAR-10]]
6. Considered alternatives
a) Iterate on the design
i) Advantages
- Potentially more computationally efficient than an ML approach
ii) Disadvantages
- Potentially less effective than than an ML approach
iii) Risks
- Conventional algorithm may be more vulnerable to reverse engineering
7. Evaluation process
- Cross validation
- Effectiveness will be measured as the percent of correct classifications
- Testing clean vs. filtered training data
- Ablation variables:
- Different models
- Different datasets
- Different filters
-
8. Deliverables and timeline

View File

@ -1,18 +0,0 @@
= Notes on Filter-Based Defenses =
== Engineering Design Principles ==
1. Clearly defined problem
a) Defending gradient-based attacks using denoising filters as a buffer between an attacked image and a classifier
2. Requirements
3. Constraints
4. Engineering standards
5. Cite applicable references
6. Considered alternatives
a) Iterate on the design
i) Advantages
ii) Disadvantages
iii) Risks
7. Evaluation process
a) Validation
8. Deliverables and timeline
9.

View File

@ -0,0 +1,156 @@
# Test Process for Non-Gradient Filter Pipeline
For each attack, the following tests are to be evaluated. The performance of each attack should be evaluated using cross validation with $k=5$.
| Training | Test |
|----------|-------------------------|
| Clean | Clean |
| Clean | Attacked |
| Clean | Filtered (Not Attacked) |
| Clean | Filtered (Attacked) |
| Filtered | Filtered (Not Attacked) |
| Filtered | Filtered (Attacked) |
## Testing on Pretrained Model Trained on Unfiltered Data
Epsilon: 0.05
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 9605 / 10000 = 0.9605
Filtered Accuracy = 9522 / 10000 = 0.9522
Epsilon: 0.1
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 8743 / 10000 = 0.8743
Filtered Accuracy = 9031 / 10000 = 0.9031
Epsilon: 0.15000000000000002
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 7107 / 10000 = 0.7107
Filtered Accuracy = 8138 / 10000 = 0.8138
Epsilon: 0.2
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 4876 / 10000 = 0.4876
Filtered Accuracy = 6921 / 10000 = 0.6921
Epsilon: 0.25
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 2714 / 10000 = 0.2714
Filtered Accuracy = 5350 / 10000 = 0.535
Epsilon: 0.3
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 1418 / 10000 = 0.1418
Filtered Accuracy = 3605 / 10000 = 0.3605
### Observations
| $\epsilon$ | Attacked Accuracy | Filtered Accuracy | Ratio |
|------------|-------------------|-------------------|--------|
| 0.05 | 0.9605 | 0.9522 | 0.9914 |
| 0.1 | 0.8743 | 0.9031 | 1.0329 |
| 0.15 | 0.7107 | 0.8138 | 1.1451 |
| 0.2 | 0.4876 | 0.6921 | 1.4194 |
| 0.25 | 0.2714 | 0.5350 | 1.9713 |
| 0.3 | 0.1418 | 0.3605 | 2.5423 |
- Filter seems to consitently increase accuracy
- When epsilon is too low to have a significant imact on the accuracy, the filter is seems to be counterproductive
- This may be avoidable by training on filtered data
- Low values of epsilon will be tested on filtered model to test this hypothesis
## Testing on Model Trained with Filtered Data
CNN classifier trained on MNIST dataset with 14 epochs. Kuwahara filter applied at runtime for each batch of training and test data.
### Hypothesis
Adding a denoising filter will increase accuracy against FGSM attack
### Results
Epsilon: 0.05
Original Accuracy = 9793 / 10000 = 0.9793
Attacked Accuracy = 7288 / 10000 = 0.7288
Filtered Accuracy = 9575 / 10000 = 0.9575
Filtered:Attacked = 0.9575 / 0.7288 = 1.3138035126234906
Epsilon: 0.1
Original Accuracy = 9793 / 10000 = 0.9793
Attacked Accuracy = 2942 / 10000 = 0.2942
Filtered Accuracy = 8268 / 10000 = 0.8268
Filtered:Attacked = 0.8268 / 0.2942 = 2.8103331067301154
Epsilon: 0.15000000000000002
Original Accuracy = 9793 / 10000 = 0.9793
Attacked Accuracy = 1021 / 10000 = 0.1021
Filtered Accuracy = 5253 / 10000 = 0.5253
Filtered:Attacked = 0.5253 / 0.1021 = 5.144955925563173
Epsilon: 0.2
Original Accuracy = 9793 / 10000 = 0.9793
Attacked Accuracy = 404 / 10000 = 0.0404
Filtered Accuracy = 2833 / 10000 = 0.2833
Filtered:Attacked = 0.2833 / 0.0404 = 7.012376237623762
Epsilon: 0.25
Original Accuracy = 9793 / 10000 = 0.9793
Attacked Accuracy = 234 / 10000 = 0.0234
Filtered Accuracy = 1614 / 10000 = 0.1614
Filtered:Attacked = 0.1614 / 0.0234 = 6.897435897435897
Epsilon: 0.3
Original Accuracy = 9793 / 10000 = 0.9793
Attacked Accuracy = 161 / 10000 = 0.0161
Filtered Accuracy = 959 / 10000 = 0.0959
Filtered:Attacked = 0.0959 / 0.0161 = 5.956521739130435
### Observations
- Model is more susceptable to FGSM than pretrained model
- Model repsonds much better to filtered data than pretrained model
- Even for $\epsilon = 0.25$, the model does better than random guessing (10 classes)
- Potential for boost algorithm
- Filter is proportionally more effective for higher values of $\epsilon$ until $\epsilon=0.3$
## Testing on Model Trained with Unfiltered Data
CNN classifier, same as above, trained on 14 epochs of MNIST dataset without Kuwahara filtering.
### Hypothesis
Given how the attacked model trained on filtered data performed against the FGSM attack, we expect that the model trained on unfiletered data will pereform poorly.
### Results
Epsilon: 0.05
Original Accuracy = 9920 / 10000 = 0.992
Attacked Accuracy = 9600 / 10000 = 0.96
Filtered Accuracy = 8700 / 10000 = 0.87
Filtered:Attacked = 0.87 / 0.96 = 0.90625
Epsilon: 0.1
Original Accuracy = 9920 / 10000 = 0.992
Attacked Accuracy = 8753 / 10000 = 0.8753
Filtered Accuracy = 8123 / 10000 = 0.8123
Filtered:Attacked = 0.8123 / 0.8753 = 0.9280246772535131
Epsilon: 0.15000000000000002
Original Accuracy = 9920 / 10000 = 0.992
Attacked Accuracy = 7229 / 10000 = 0.7229
Filtered Accuracy = 7328 / 10000 = 0.7328
Filtered:Attacked = 0.7328 / 0.7229 = 1.013694840226864
Epsilon: 0.2
Original Accuracy = 9920 / 10000 = 0.992
Attacked Accuracy = 5008 / 10000 = 0.5008
Filtered Accuracy = 6301 / 10000 = 0.6301
Filtered:Attacked = 0.6301 / 0.5008 = 1.2581869009584663
Epsilon: 0.25
Original Accuracy = 9920 / 10000 = 0.992
Attacked Accuracy = 2922 / 10000 = 0.2922
Filtered Accuracy = 5197 / 10000 = 0.5197
Filtered:Attacked = 0.5197 / 0.2922 = 1.7785763175906915
Epsilon: 0.3
Original Accuracy = 9920 / 10000 = 0.992
Attacked Accuracy = 1599 / 10000 = 0.1599
Filtered Accuracy = 3981 / 10000 = 0.3981
Filtered:Attacked = 0.3981 / 0.1599 = 2.4896810506566607
### Observations
- The ratio of filtered to attacked performance is stricty increasing
- The unfiltered model seems to be less susceptable to the FGSM attack

View File

@ -1,42 +0,0 @@
= Test Process for Non-Gradient Filter Pipeline =
For each attack, the following tests are to be evaluated. The performance of each attack should be evaluated using cross validation with $k=5$.
| Training | Test |
|----------|-------------------------|
| Clean | Clean |
| Clean | Attacked |
| Clean | Filtered (Not Attacked) |
| Clean | Filtered (Attacked) |
| Filtered | Filtered (Not Attacked) |
| Filtered | Filtered (Attacked) |
Epsilon: 0.05
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 9605 / 10000 = 0.9605
Filtered Accuracy = 9522 / 10000 = 0.9522
Epsilon: 0.1
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 8743 / 10000 = 0.8743
Filtered Accuracy = 9031 / 10000 = 0.9031
Epsilon: 0.15000000000000002
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 7107 / 10000 = 0.7107
Filtered Accuracy = 8138 / 10000 = 0.8138
Epsilon: 0.2
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 4876 / 10000 = 0.4876
Filtered Accuracy = 6921 / 10000 = 0.6921
Epsilon: 0.25
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 2714 / 10000 = 0.2714
Filtered Accuracy = 5350 / 10000 = 0.535
Epsilon: 0.3
Original Accuracy = 9912 / 10000 = 0.9912
Attacked Accuracy = 1418 / 10000 = 0.1418
Filtered Accuracy = 3605 / 10000 = 0.3605

View File

@ -0,0 +1,26 @@
= Timeline of Progress =
== Tuesday, February 27th, 2024 ==
- Determined that lack of effectiveness for low values of epsilon for the FGSM attack is normal ([[https://pytorch.org/tutorials/beginner/fgsm_tutorial.html#accuracy-vs-epsilon|PyTorch Example Results]]).
- Finished trainable, saveable MNIST model
- Working on manipulating the MNIST dataset for cross validation and filtering
- Looking into implementing [[https://www.cs.toronto.edu/~kriz/cifar.html|CIFAR-10]] due to the model architecture and the nature of the images being classified
== Thursday, February 29th, 2024 ==
- Created functionality for Kuwahara filtering of batches of 64 images at runtime
- Encountering crash in last batch
== Monday, March 4th, 2024 ==
- Last batch of epoch doesn't have 64 images, batch size now variable
- Encountered crash when testing at end of epoch
- Fixed crash, testing required specifying batch size
- All 14 epochs train successfully on filtered data
- Added `--filter` option to enable filtering on training and test data
- Encountered crash, `args` not passed to `test` function
- `args` variable now passed to `test` function
- Filtered and unfiltered models saved to different files
- Tested filtered model with FGSM attack
- Got results inline with unfiltered model
- Realized that I forgot to save the filtered model
- Tested actually filtered model with FGSM attack
- Got really good results inline with hypothesis

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.