diff --git a/Filter_Analysis/mnist.py b/Filter_Analysis/mnist.py index 08931b1..9413d54 100644 --- a/Filter_Analysis/mnist.py +++ b/Filter_Analysis/mnist.py @@ -47,8 +47,8 @@ def train(args, model, device, train_loader, optimizer, epoch): data, target = data.to(device), target.to(device) # Apply Kuwahara filter to training data on a batch-by-batch basis - if args.filter: - data = filtered(data, len(data)) + if args.filter != 'none': + data = filtered(data, len(data), args.filter) optimizer.zero_grad() output = model(data) @@ -70,8 +70,8 @@ def test(args, model, device, test_loader): data, target = data.to(device), target.to(device) # Apply Kuwahara filter to test data on a batch-by-batch basis - if args.filter: - data = filtered(data, len(data)) + if args.filter != 'none': + data = filtered(data, len(data), args.filter) output = model(data) test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss @@ -108,8 +108,8 @@ def main(): help='how many batches to wait before logging training status') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') - parser.add_argument('--filter', action='store_true', default=False, - help='Apply Kuwahara filter at runtime') + parser.add_argument('--filter', type=str, metavar='S', default='none', + help='Apply a filter at runtime') args = parser.parse_args() train_kwargs = {'batch_size': args.batch_size} @@ -127,7 +127,7 @@ def main(): train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs) test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) - print(f'Kuwahara filter: {args.filter}') + print(f'Filter Type: {args.filter}') model = Net().to(device) optimizer = optim.Adadelta(model.parameters(), lr=args.lr) @@ -140,21 +140,40 @@ def main(): scheduler.step() if args.save_model: - if args.filter: - torch.save(model.state_dict(), "mnist_cnn_filtered.pt") - else: + if args.filter is None: torch.save(model.state_dict(), "mnist_cnn_unfiltered.pt") + else: + torch.save(model.state_dict(), f"mnist_cnn_{args.filter}.pt") -def filtered(data, batch_size=64): +def filtered(data, batch_size=64, filter="kuwahara"): # Turn the tensor into an image images = data.numpy().transpose(0,2,3,1) # Apply the Kuwahara filter filtered_images = np.ndarray((batch_size,28,28,1)) - for i in range(batch_size): - filtered_images[i] = kuwahara(images[i], method='gaussian', radius=5, image_2d=images[i]) + if filter == "kuwahara": + for i in range(batch_size): + filtered_images[i] = kuwahara(images[i], method='gaussian', radius=5, image_2d=images[i]) + elif filter == "aniso_diff": + for i in range(batch_size): + img_3ch = np.zeros((np.array(images[i]), np.array(images[i]).shape[1], 3)) + img_3ch[:,:,0] = images[i] + img_3ch[:,:,1] = images[i] + img_3ch[:,:,2] = images[i] + img_3ch_filtered = cv2.ximgproc.anisotropicDiffusion(img2, alpha=0.2, K=0.5, niters=5) + filtered_images[i] = cv2.cvtColor(img_3ch_filtered, cv2.COLOR_RGB2GRAY) + plt.imshow(filtered_images[i]) + plt.show() + elif filter == "noise": + pass + elif filter == "gaussian_blur": + for i in range(batch_size): + filtered_images[i] = cv2.GaussianBlur(images[i], ksize=(5,5), sigmaX=0).reshape(filtered_images[i].shape) + elif filter == "bilateral": + for i in range(batch_size): + filtered_images[i] = cv2.bilateralFilter(images[i], 5, 50, 50).reshape(filtered_images[i].shape) # Modify the data with the filtered image filtered_images = filtered_images.transpose(0,3,1,2) diff --git a/Filter_Analysis/mnist_cnn_bilateral.pt b/Filter_Analysis/mnist_cnn_bilateral.pt new file mode 100644 index 0000000..26d2620 Binary files /dev/null and b/Filter_Analysis/mnist_cnn_bilateral.pt differ diff --git a/Filter_Analysis/mnist_cnn_gaussian_blur.pt b/Filter_Analysis/mnist_cnn_gaussian_blur.pt new file mode 100644 index 0000000..0b53e4a Binary files /dev/null and b/Filter_Analysis/mnist_cnn_gaussian_blur.pt differ diff --git a/Filter_Analysis/wiki/DesignImpact.md b/Filter_Analysis/wiki/DesignImpact.md new file mode 100644 index 0000000..bca5e3b --- /dev/null +++ b/Filter_Analysis/wiki/DesignImpact.md @@ -0,0 +1,36 @@ +# Engineering Design Principles +1. Clearly defined problem + - Assess the efficacy of various denoising filters in preserving the accuracy of image classifier models under a noise-based attack. +2. Requirements + - Only algorithmic approach for defense + - Must be faster than auto-encoder +3. Constraints + - Computing power + - Memory usage + - Impossible to know who and how a model will be attacked +4. Engineering standards + - [[https://peps.python.org/pep-0008/|PEP 8]] + - +5. Cite applicable references + - [[https://pytorch.org/tutorials/beginner/fgsm_tutorial.html|FGSM Attack]] + - [[https://github.com/pytorch/examples/blob/main/mnist/main.py|MNIST Model]] + - [[https://www.cs.toronto.edu/~kriz/cifar.html|CIFAR-10]] +6. Considered alternatives + a) Iterate on the design + i) Advantages + - Potentially more computationally efficient than an ML approach + - Will likely use less memory than a model used to clean inputs + - No training (very computationally intense) stage + ii) Disadvantages + - Potentially less effective than than an ML approach + iii) Risks + - Conventional algorithm may be more vulnerable to reverse engineering +7. Evaluation process + - Cross validation + - Effectiveness will be measured as the percent of correct classifications + - Testing clean vs. filtered training data + - Ablation variables: + - Different models + - Different datasets + - Different filters +8. Deliverables and timeline diff --git a/Filter_Analysis/wiki/FilterAnalysis.md b/Filter_Analysis/wiki/FilterAnalysis.md index 5eb3c2f..6e7fe46 100644 --- a/Filter_Analysis/wiki/FilterAnalysis.md +++ b/Filter_Analysis/wiki/FilterAnalysis.md @@ -4,5 +4,5 @@ - [[Tests]] - [[Approach]] - [[Rationale]] -- [[Notes]] +- [[DesignImpact]] - [[Timeline]] diff --git a/Filter_Analysis/wiki/Results.wiki b/Filter_Analysis/wiki/Results.wiki new file mode 100644 index 0000000..0f85588 --- /dev/null +++ b/Filter_Analysis/wiki/Results.wiki @@ -0,0 +1,42 @@ += Experimental Results = + +== Model Trained on Unfiltered MNIST Dataset == +| Epsilon | Accuracy | +|---------|----------| +| 0.05 | 0.9600 | +| 0.10 | 0.8753 | +| 0.15 | 0.7228 | +| 0.20 | 0.5008 | +| 0.25 | 0.2922 | +| 0.30 | 0.1599 | + +== Model Trained on Kuwahara (R=5) Filtered MNIST Dataset == +| Epsilon | Attacked Accuracy | Filtered Accuracy | Ratio | +|---------|-------------------|-------------------|--------| +| 0.05 | 0.9605 | 0.9522 | 0.9914 | +| 0.1 | 0.8743 | 0.9031 | 1.0329 | +| 0.15 | 0.7107 | 0.8138 | 1.1451 | +| 0.2 | 0.4876 | 0.6921 | 1.4194 | +| 0.25 | 0.2714 | 0.5350 | 1.9713 | +| 0.3 | 0.1418 | 0.3605 | 2.5423 | + +== Model Trained on Gaussian Blurred (K-Size=5x5) MNIST Dataset == +| Epsilon | Attacked Accuracy | Filtered Accuracy | Ratio | +|---------|-------------------|-------------------|-------| +| 0.05 | 0.9192 | 0.9325 | 1.014 | +| 0.10 | 0.7629 | 0.8802 | 1.154 | +| 0.15 | 0.4871 | 0.7865 | 1.615 | +| 0.20 | 0.2435 | 0.6556 | 2.692 | +| 0.25 | 0.1093 | 0.5024 | 4.596 | +| 0.30 | 0.0544 | 0.3522 | 6.474 | + +== Model Trained on Bilateral Filtered (d=5) MNIST Dataset == +| Epsilon | Attacked Accuracy | Filtered Accuracy | Ratio | +|---------|-------------------|-------------------|-------| +| 0.05 | 0.9078 | 0.9287 | 1.023 | +| 0.10 | 0.7303 | 0.8611 | 1.179 | +| 0.15 | 0.4221 | 0.7501 | 1.777 | +| 0.20 | 0.1927 | 0.6007 | 3.117 | +| 0.25 | 0.0873 | 0.4433 | 5.078 | +| 0.30 | 0.0525 | 0.3023 | 5.758 | +