import torch

torch.manual_seed(0) # seed for reproducing

x = torch.tensor(1.0, requires_grad=True)
y = torch.tensor(2.0, requires_grad=True)
z = x ** 2 + 2*y ** 2

print(f"x.requires_grad: {x.requires_grad}")
print(f"y.requires_grad: {y.requires_grad}")
print(f"z.requires_grad: {z.requires_grad}")

x.requires_grad: True
y.requires_grad: True
z.requires_grad: True

z.backward()
print(f"Gradient of x: {x.grad}")
print(f"Gradient of y: {y.grad}")

Gradient of x: 2.0
Gradient of y: 8.0

x = torch.tensor(1.0, requires_grad=True)
y = torch.tensor(2.0, requires_grad=True)
z = x ** 2 + 2*y ** 2
a = torch.sin(z)

print(f"a.requires_grad: {a.requires_grad}")
print(a.grad_fn)
print(a.grad_fn.next_functions)
print(z.grad_fn)
print(x.grad_fn)

a.requires_grad: True
<SinBackward0 object at 0x7fcd1f42fdc0>
((<AddBackward0 object at 0x7fcd1f42e5f0>, 0),)
<AddBackward0 object at 0x7fcd1f42fdc0>
None

x = torch.tensor(1.0, requires_grad=True)
y = torch.tensor(2.0, requires_grad=True)
z = x ** 2 + 2*y ** 2

x_detached = x.detach()
z.backward()

print(f"x.requires_grad: {x.requires_grad}")
print(f"x_detached.requires_grad: {x_detached.requires_grad}")

print(f"x.grad: {x.grad}")
print(f"x_detached.grad: {x_detached.grad}")

x.requires_grad: True
x_detached.requires_grad: False
x.grad: 2.0
x_detached.grad: None

x = torch.tensor(1.0, requires_grad=True)
y = torch.tensor(2.0, requires_grad=True)
z = x ** 2 + 2*y ** 2

x_clone = x.clone()
x_clone = torch.tensor(5.0)
print(f"x_clone: {x_clone}")
print(f"x: {x}")

print(f"x_clone.requires_grad: {x_clone.requires_grad}")

x_clone: 5.0
x: 1.0
x_clone.requires_grad: False

x_clone = x.clone().detach()
print(f"x_clone.requires_grad: {x_clone.requires_grad}")

x_clone.requires_grad: False

x = torch.tensor(1.0, requires_grad=True)
y = torch.tensor(2.0, requires_grad=True)
z = x ** 2 + 2 * y ** 2

with torch.no_grad():
    a = torch.sin(x)

print(f"a.requires_grad: {a.requires_grad}")

a.requires_grad: False

import torch.nn as nn

class ShallowNet(nn.Module):
    def __init__(self, n_x, n_h, n_y):
        super(ShallowNet, self).__init__()
        self.fc1 = nn.Linear(n_x, n_h)
        self.fc2 = nn.Linear(n_h, n_y)
        self.act = nn.ReLU()

    def forward(self, x):
        # Forward pass: x -> linear() -> act() -> linear()
        ### Code Here ###

        ### Code Here ###
        return x

# Example of how to initialize the network
n_x = 4  # Input size
n_h = 10  # Hidden layer width
n_y = 1  # Output size

model = ShallowNet(n_x, n_h, n_y)
print(model)

ShallowNet(
  (fc1): Linear(in_features=4, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=1, bias=True)
  (act): ReLU()
)

num_samples = 8
x = torch.randn(num_samples, n_x)
output = model(x)
print(output)

tensor([[ 0.3817],
        [ 0.3093],
        [ 0.2687],
        [ 0.1068],
        [-0.0068],
        [ 0.0091],
        [ 0.1560],
        [ 0.1721]], grad_fn=<AddmmBackward0>)

# Define the loss function
criterion = nn.MSELoss()  #

# Define the optimizer (SGD)
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Generate random data (full-batch)
torch.manual_seed(0) # For reproduce

x = torch.randn(num_samples, n_x)
y = torch.randn(num_samples, 1)

num_epochs = 10
for epoch in range(num_epochs):
    optimizer.zero_grad()  # Clear previous gradients

    # Forward pass
    ### Code Here ###


    ### Code Here ###

    # Backward pass and optimization
    ### Code Here ###


    ### Code Here ###

    # Print the loss
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}")

Epoch [1/10], Loss: 0.9464
Epoch [2/10], Loss: 0.9357
Epoch [3/10], Loss: 0.9255
Epoch [4/10], Loss: 0.9158
Epoch [5/10], Loss: 0.9066
Epoch [6/10], Loss: 0.8978
Epoch [7/10], Loss: 0.8893
Epoch [8/10], Loss: 0.8812
Epoch [9/10], Loss: 0.8734
Epoch [10/10], Loss: 0.8660

import torch
from torch.utils.data import DataLoader, TensorDataset

# Generate random data (as tensors)
num_samples = 1000
n_x = 10  # Number of features
n_y = 1  # Number of output features
x = torch.randn(num_samples, n_x)  # Input data
y = torch.randn(num_samples, 1)    # Target output

# Create a TensorDataset and DataLoader for mini-batch processing
dataset = TensorDataset(x, y)
batch_size = 32
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

torch.random.manual_seed(0) # For reproduce
# Define the model, loss function, and optimizer
n_h = 10  # Hidden layer width
model = ShallowNet(n_x=n_x, n_h=n_h, n_y=n_y)
criterion = torch.nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Training loop with mini-batch SGD
num_epochs = 10

for epoch in range(num_epochs):
    running_loss = 0.0

    # Loop over mini-batches
    ### Code Here ###
    for
    ### Code Here ###
        optimizer.zero_grad()  # Clear previous gradients

        # Forward pass: compute the model's output for the mini-batch
        ### Code Here ###

        ### Code Here ###

        # Backward pass and optimization
        ### Code Here ###


        ### Code Here ###

        # Accumulate loss for printing
        running_loss += loss.item()

    # Print the average loss for the current epoch
    avg_loss = running_loss / len(data_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

Epoch [1/10], Loss: 1.1037
Epoch [2/10], Loss: 1.0542
Epoch [3/10], Loss: 1.0580
Epoch [4/10], Loss: 1.0350
Epoch [5/10], Loss: 1.0737
Epoch [6/10], Loss: 1.0209
Epoch [7/10], Loss: 1.0275
Epoch [8/10], Loss: 1.0241
Epoch [9/10], Loss: 1.0418
Epoch [10/10], Loss: 1.0044

import torch
import torchvision

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor())
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=torchvision.transforms.ToTensor())

print(f"Number of training samples: {len(train_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")
print(f"Input shape: {train_dataset[0][0].shape}")

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz

100%|██████████| 9.91M/9.91M [00:02<00:00, 4.12MB/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz

100%|██████████| 28.9k/28.9k [00:00<00:00, 132kB/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz

100%|██████████| 1.65M/1.65M [00:01<00:00, 1.26MB/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [Errno 111] Connection refused>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz

100%|██████████| 4.54k/4.54k [00:00<00:00, 4.22MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Number of training samples: 60000
Number of test samples: 10000
Input shape: torch.Size([1, 28, 28])

import matplotlib.pyplot as plt
def plot_images(dataset, num_images=5):
    random_indices = torch.randperm(len(dataset))[:num_images]  # Randomly shuffle indices and pick num_images
    fig, axes = plt.subplots(1, num_images, figsize=(12, 4))

    for i, idx in enumerate(random_indices):
        # Get image and label
        image, label = dataset[idx]
        if image.ndim == 3:  # Check if the image has 3 channels (RGB)
            image = image.permute(1, 2, 0)  # Rearrange dimensions to (Height, Width, Channels)
        else:
            image = image.squeeze()  # Remove the channel dimension for plotting (1x28x28 -> 28x28)

        # Plot the image
        axes[i].imshow(image, cmap='gray')
        axes[i].set_title(f"Label: {label}")
        axes[i].axis('off')  # Turn off the axis

    plt.show()

# Plot the random 5 images from `train_data_full`
plot_images(train_dataset, num_images=5)

# Create DataLoaders
### Code Here ###

### Code Here ###
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

from torch.nn.functional import one_hot

torch.manual_seed(0) # For reproduce

n_y = 10 # 10 classes
model = ShallowNet(n_x=28*28, n_h=10, n_y=n_y)
criterion = nn.MSELoss()
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

train_losses = []
test_losses = []
num_epochs = 10
for epoch in range(num_epochs):
    train_running_loss = 0.0
    test_running_loss = 0.0

    # Test loop
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        for inputs, targets in test_loader:
            ### Code Here ###


            ### Code Here ###
            outputs = model(inputs)
            test_loss = criterion(outputs, targets)
            test_running_loss += test_loss.item()

    # Training loop
    model.train()  # Set the model to training mode
    for i, (inputs, targets) in enumerate(train_loader):
        optimizer.zero_grad()  # Clear previous gradients
        ### Code Here ###


        ### Code Here ###

        outputs = model(inputs)
        train_loss = criterion(outputs, targets)
        train_running_loss += train_loss.item()
        train_loss.backward()
        optimizer.step()

    train_losses.append(train_running_loss / len(train_loader))
    test_losses.append(test_running_loss / len(test_loader))
    print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}")

Epoch [1/10], Train Loss: 0.0874, Test Loss: 0.1305
Epoch [2/10], Train Loss: 0.0730, Test Loss: 0.0769
Epoch [3/10], Train Loss: 0.0661, Test Loss: 0.0688
Epoch [4/10], Train Loss: 0.0612, Test Loss: 0.0629
Epoch [5/10], Train Loss: 0.0575, Test Loss: 0.0587
Epoch [6/10], Train Loss: 0.0546, Test Loss: 0.0554
Epoch [7/10], Train Loss: 0.0525, Test Loss: 0.0529
Epoch [8/10], Train Loss: 0.0508, Test Loss: 0.0510
Epoch [9/10], Train Loss: 0.0495, Test Loss: 0.0495
Epoch [10/10], Train Loss: 0.0485, Test Loss: 0.0484

import matplotlib.pyplot as plt

def plot_losses(train_losses, test_losses):
    plt.figure(figsize=(10, 6))

    # Plot training losses (blue dashed line)
    plt.plot(train_losses, label='Training Loss', color='blue', linestyle='--')

    # Plot test losses (red solid line)
    ### Code Here ###


    ### Code Here ###

    # Add labels and title
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Test Loss Over Epochs')

    # Add a legend
    plt.legend()

    # Show the plot
    plt.show()

# Plot the train and test losses
plot_losses(train_losses, test_losses)

def predict_and_accuracy(model, data_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    predictions = []

    with torch.no_grad():  # Disable gradient computation during prediction
        for inputs, targets in data_loader:
            inputs = inputs.view(-1, 28 * 28)  # Flatten the images
            outputs = model(inputs)
            ### Code Here ###


            ### Code Here ###
            predictions.extend(predicted.tolist())
            total += targets.size(0)
            correct += (predicted == targets).sum().item()

    ### Code Here ###


    ### Code Here ###
    return predictions, accuracy

# Train Accuracy:
predictions, accuracy = predict_and_accuracy(model, train_loader)
print(f"Training Accuracy:: {accuracy:.4f}")

Training Accuracy:: 0.8137

# Test Accuracy
predictions, accuracy = predict_and_accuracy(model, test_loader)
print(f"Testing Accuracy: {accuracy:.4f}")

Testing Accuracy: 0.8259

torch.manual_seed(0) # For reproduce

model = ShallowNet(n_x=28*28, n_h=8, n_y=n_y)
### Code Here ### (Define loss criterion)


### Code Here ###
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

train_losses = []
test_losses = []
num_epochs = 10
for epoch in range(num_epochs):
    train_running_loss = 0.0
    test_running_loss = 0.0
    # Test loop
    model.eval()
    with torch.no_grad():
        for inputs, targets in test_loader:
            ### Code Here ###


            ### Code Here ###
            test_running_loss += test_loss.item()

    # Training loop
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()  # Clear previous gradients
        ### Code Here ###

        ### Code Here ###

    train_losses.append(train_running_loss / len(train_loader))
    test_losses.append(test_running_loss / len(test_loader))
    print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_losses[-1]:.4f}, Test Loss: {test_losses[-1]:.4f}")

Epoch [1/10], Train Loss: 1.3421, Test Loss: 2.3165
Epoch [2/10], Train Loss: 0.5694, Test Loss: 0.6852
Epoch [3/10], Train Loss: 0.4494, Test Loss: 0.4663
Epoch [4/10], Train Loss: 0.4003, Test Loss: 0.4021
Epoch [5/10], Train Loss: 0.3716, Test Loss: 0.3670
Epoch [6/10], Train Loss: 0.3530, Test Loss: 0.3458
Epoch [7/10], Train Loss: 0.3402, Test Loss: 0.3344
Epoch [8/10], Train Loss: 0.3308, Test Loss: 0.3231
Epoch [9/10], Train Loss: 0.3237, Test Loss: 0.3176
Epoch [10/10], Train Loss: 0.3177, Test Loss: 0.3128

# Plot the train and test losses
### Code Here ###
plot_losses(train_losses, test_losses)
### Code Here ###

### Code Here ###
_, train_accuracy = predict_and_accuracy(model, train_loader)
_, test_accuracy = predict_and_accuracy(model, test_loader)
### Code Here ###
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

Training Accuracy: 0.9115
Testing Accuracy: 0.9132

from torch.utils.data import random_split
train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size

### Code Here ### (random_split train_dataset into train_data and val_data)


### Code Here ###
print(f"Number of training samples: {len(train_data)}")
print(f"Number of validation samples: {len(val_data)}")

# Redefine valudation set loader
batch_size = 64
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_data, batch_size=batch_size, shuffle=False)

Number of training samples: 54000
Number of validation samples: 6000

def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, targets in train_loader:
            ### Code Here ###




            ### Code Here ###

        print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {running_loss / len(train_loader):.4f}")

learning_rates = torch.logspace(-3, 0, 5)
widths = [2, 16, 128, 1024]
print(f"Learning Rate: {learning_rates}")
print(f"Width: {widths}")

Learning Rate: tensor([0.0010, 0.0056, 0.0316, 0.1778, 1.0000])
Width: [2, 16, 128, 1024]

import random
random.seed(4) # For reproducibility
# Sample random combinations of learning rates and widths
random_combinations = [(random.choice(learning_rates), random.choice(widths)) for _ in range(4)]  # Example with 20 combinations
print(f"Random Combinations: {random_combinations}")

Random Combinations: [(tensor(0.0056), 128), (tensor(0.0010), 1024), (tensor(0.1778), 16), (tensor(0.0010), 2)]

best_params = None
best_val_accuracy = 0.0

for lr, width in random_combinations:
    print(f"Training with Learning Rate = {lr:.6f}, Width = {width}")

    torch.manual_seed(0)  # For reproducibility
    # Build and initialize the model with the given width
    model = ShallowNet(n_x=28*28, n_h=width, n_y=n_y)

    # Define the optimizer with the sampled learning rate
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    # Define loss criterion
    criterion = nn.CrossEntropyLoss()

    # Train the model on the training set
    ### Code Here ###


    ### Code Here ###

    # Validate the model on the validation set
    ### Code Here ###


    ### Code Here ###

    # Track the best-performing combination
    ### Code Here ###
    if
    ### Code Here ###
        best_val_accuracy = val_accuracy
        best_params = (lr, width)

print(f"Best parameters found: Learning Rate = {best_params[0]}, Width = {best_params[1]}")

Training with Learning Rate = 0.005623, Width = 128
Epoch [1/10], Loss: 1.6581
Epoch [2/10], Loss: 0.7577
Epoch [3/10], Loss: 0.5312
Epoch [4/10], Loss: 0.4482
Epoch [5/10], Loss: 0.4049
Epoch [6/10], Loss: 0.3779
Epoch [7/10], Loss: 0.3588
Epoch [8/10], Loss: 0.3442
Epoch [9/10], Loss: 0.3323
Epoch [10/10], Loss: 0.3223
Training with Learning Rate = 0.001000, Width = 1024
Epoch [1/10], Loss: 2.1743
Epoch [2/10], Loss: 1.8915
Epoch [3/10], Loss: 1.5737
Epoch [4/10], Loss: 1.2786
Epoch [5/10], Loss: 1.0545
Epoch [6/10], Loss: 0.8986
Epoch [7/10], Loss: 0.7903
Epoch [8/10], Loss: 0.7127
Epoch [9/10], Loss: 0.6548
Epoch [10/10], Loss: 0.6100
Training with Learning Rate = 0.177828, Width = 16
Epoch [1/10], Loss: 0.4211
Epoch [2/10], Loss: 0.2501
Epoch [3/10], Loss: 0.2155
Epoch [4/10], Loss: 0.1946
Epoch [5/10], Loss: 0.1815
Epoch [6/10], Loss: 0.1732
Epoch [7/10], Loss: 0.1668
Epoch [8/10], Loss: 0.1619
Epoch [9/10], Loss: 0.1551
Epoch [10/10], Loss: 0.1495
Training with Learning Rate = 0.001000, Width = 2
Epoch [1/10], Loss: 2.2896
Epoch [2/10], Loss: 2.1741
Epoch [3/10], Loss: 2.0789
Epoch [4/10], Loss: 1.9932
Epoch [5/10], Loss: 1.9205
Epoch [6/10], Loss: 1.8617
Epoch [7/10], Loss: 1.8131
Epoch [8/10], Loss: 1.7711
Epoch [9/10], Loss: 1.7336
Epoch [10/10], Loss: 1.6996
Best parameters found: Learning Rate = 0.17782793939113617, Width = 16
Epoch [1/10], Loss: 0.4183
Epoch [2/10], Loss: 0.2366
Epoch [3/10], Loss: 0.2000
Epoch [4/10], Loss: 0.1784
Epoch [5/10], Loss: 0.1650
Epoch [6/10], Loss: 0.1533
Epoch [7/10], Loss: 0.1476
Epoch [8/10], Loss: 0.1402
Epoch [9/10], Loss: 0.1340
Epoch [10/10], Loss: 0.1309

# Train with the best parameters
torch.manual_seed(0)
model = ShallowNet(n_x=28*28, n_h=best_params[1], n_y=n_y)
optimizer = torch.optim.SGD(model.parameters(), lr=best_params[0])
train_model(model, train_loader, criterion, optimizer)

Epoch [1/10], Loss: 0.4211
Epoch [2/10], Loss: 0.2501
Epoch [3/10], Loss: 0.2155
Epoch [4/10], Loss: 0.1946
Epoch [5/10], Loss: 0.1815
Epoch [6/10], Loss: 0.1732
Epoch [7/10], Loss: 0.1668
Epoch [8/10], Loss: 0.1619
Epoch [9/10], Loss: 0.1551
Epoch [10/10], Loss: 0.1495

_, train_accuracy = predict_and_accuracy(model,train_loader)
_, val_accuracy = predict_and_accuracy(model,val_loader)
_, test_accuracy = predict_and_accuracy(model,test_loader)
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

Training Accuracy: 0.9624
Validation Accuracy: 0.9495
Testing Accuracy: 0.9538

torch.manual_seed(0) # for reproduct
width = 2048 # large width
model = ShallowNet(n_x=28*28, n_h=width, n_y=10)
learning_rate = 1.0
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()
print(f"Training with Learning Rate = {learning_rate}, Width = {width}")
train_model(model, train_loader, criterion, optimizer, num_epochs=10)

Training with Learning Rate = 1.0, Width = 2048
Epoch [1/10], Loss: 0.2541
Epoch [2/10], Loss: 0.0846
Epoch [3/10], Loss: 0.0558
Epoch [4/10], Loss: 0.0360
Epoch [5/10], Loss: 0.0240
Epoch [6/10], Loss: 0.0158
Epoch [7/10], Loss: 0.0094
Epoch [8/10], Loss: 0.0066
Epoch [9/10], Loss: 0.0038
Epoch [10/10], Loss: 0.0015

_, train_accuracy = predict_and_accuracy(model,train_loader)
_, val_accuracy = predict_and_accuracy(model,val_loader)
_, test_accuracy = predict_and_accuracy(model,test_loader)
print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")

Training Accuracy: 1.0000
Validation Accuracy: 0.9852
Testing Accuracy: 0.9846

Learning Rate \ Width	2	16	128	1024
1	val.	val.	val.	val.
0.1	val.	val.	val.	val.
0.01	val.	val.	val.	val.
0.001	val.	val.	val.	val.

Homework 4: Generalizaiton and regularization¶

0 - Introduction to PyTorch and Autograd¶

1 - Build a Neural Network and Train using `autograd`¶

1.1 - Define `ShallowNet` [10/10]¶

1.2 - Define Loss Function and Optimizer [10/10]¶

1.3 - Use `DataLoader` for Mini-Batch SGD [20/20]¶

1.4 - Train on a realistic dataset using Pytorch [40/40]¶

2 - Tuning Hyperparameters¶

2.1 - Train with Cross Entropy Loss [10/10]¶

2.2 - Tune network width and learning rate [10/10]¶

3- Overparameterizaiton¶

Homework 4: Generalizaiton and regularization¶

0 - Introduction to PyTorch and Autograd¶

1 - Build a Neural Network and Train using autograd¶

1.1 - Define ShallowNet [10/10]¶

1.2 - Define Loss Function and Optimizer [10/10]¶

1.3 - Use DataLoader for Mini-Batch SGD [20/20]¶

1.4 - Train on a realistic dataset using Pytorch [40/40]¶

2 - Tuning Hyperparameters¶

2.1 - Train with Cross Entropy Loss [10/10]¶

2.2 - Tune network width and learning rate [10/10]¶

3- Overparameterizaiton¶

1 - Build a Neural Network and Train using `autograd`¶

1.1 - Define `ShallowNet` [10/10]¶

1.3 - Use `DataLoader` for Mini-Batch SGD [20/20]¶