Neural Nets and Deep Learning for Image Classification
Neural Nets
Imagine this box is a decision function we want to learn for classification.
We can also view the problem as trying to approximate the box function using logistic regression:
If we attempt a linear regression turned into a logistic one, we will still get errors for the higher x values.
Shown here:
Reminder this logistic regression is an activation function
We could draw another line though:
This too perhaps:
Again though not perfect:
However apparently we can make a band pass filter like shape by subtracting one from another to get our prediction to match the desired square pulse from earlier (kind of):
Pretty square pulse we are aiming for:
As you can see with two different neurons perfomring a logistic regression with a different line, we can effectively have our square pulse.
This first layer of neurons and activation functions are called the hidden layer. Each neuron is an artifician neuron. The output layer has one artificial neuron.
Shown below are the activation outputs. You can see red and blue colors but that is not the output of the activation layers. Imagine the dots are black and the red and blue are just letting us know the ground truth. The final layer that fits a black line (decision boundary) is what the output layer has to figure out.
Review that all these logistic regressions are a series of weights and biases that can be learned. These are free forward neural networks and are also called cully connected networks
With many neurons we can create complex decision boundaries.
Lab
# Setup Environment
cd ~/Desktop; rm -r temp; # To remove
cd ~/Desktop; mkdir temp; cd temp; pyenv activate venv3.10.4;
# Import the libraries we need for this lab
# Allows us to use arrays to manipulate and store data
import numpy as np
# PyTorch Library
import torch
# PyTorch Neural Network
import torch.nn as nn
# Allows us to use activation functions
import torch.nn.functional as F
# Used to graph data and loss curves
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# Used to help create the dataset and perform mini-batch
from torch.utils.data import Dataset, DataLoader
# Plot the data
def plot_decision_regions_2class(model,data_set):
cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#00AAFF'])
cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#00AAFF'])
X = data_set.x.numpy()
y = data_set.y.numpy()
h = .02
x_min, x_max = X[:, 0].min() - 0.1 , X[:, 0].max() + 0.1
y_min, y_max = X[:, 1].min() - 0.1 , X[:, 1].max() + 0.1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
XX = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
yhat = np.logical_not((model(XX)[:, 0] > 0.5).numpy()).reshape(xx.shape)
plt.pcolormesh(xx, yy, yhat, cmap=cmap_light, shading='auto')
plt.plot(X[y[:, 0] == 0, 0], X[y[:, 0] == 0, 1], 'o', label='y=0')
plt.plot(X[y[:, 0] == 1, 0], X[y[:, 0] == 1, 1], 'ro', label='y=1')
plt.title("decision region")
plt.legend()
# Calculate the accuracy
def accuracy(model, data_set):
# Rounds prediction to nearest integer 0 or 1
# Checks if prediction matches the actual values and returns accuracy rate
return np.mean(data_set.y.view(-1).numpy() == (model(data_set.x)[:, 0] > 0.5).numpy())
# Define the class Net with one hidden layer
class Net(nn.Module):
# Constructor
def __init__(self, D_in, H, D_out):
super(Net, self).__init__()
# D_in is the input size of the first layer (size of input layer)
# H is the outpout size of the first layer and the input size of the second layer (size of hidden layer)
# D_out is the output size of the second layer (size of output layer)
self.linear1 = nn.Linear(D_in, H)
self.linear2 = nn.Linear(H, D_out)
# Prediction
def forward(self, x):
# Puts x through first layer then sigmoid function
x = torch.sigmoid(self.linear1(x))
# Puts result of previous line through second layer then sigmoid function
x = torch.sigmoid(self.linear2(x))
# Output is a number between 0 and 1 due to the sigmoid function. Whichever the output is closer to, 0 or 1, is the class prediction
return x
# Function to Train the Model
def train(data_set, model, criterion, train_loader, optimizer, epochs=5):
# Lists to keep track of cost and accuracy
COST = []
ACC = []
# Number of times we train on the entire dataset
for epoch in range(epochs):
# Total loss over epoch
total=0
# For batch in train laoder
for x, y in train_loader:
# Resets the calculated gradient value, this must be done each time as it accumulates if we do not reset
optimizer.zero_grad()
# Makes a prediction based on X value
yhat = model(x)
# Measures the loss between prediction and acutal Y value
loss = criterion(yhat, y)
# Calculates the gradient value with respect to each weight and bias
loss.backward()
# Updates the weight and bias according to calculated gradient value
optimizer.step()
# Cumulates loss
total+=loss.item()
# Saves cost and accuracy
ACC.append(accuracy(model, data_set))
COST.append(total)
# Prints Cost vs Epoch graph
fig, ax1 = plt.subplots()
color = 'tab:red'
ax1.plot(COST, color=color)
ax1.set_xlabel('epoch', color=color)
ax1.set_ylabel('total loss', color=color)
ax1.tick_params(axis='y', color=color)
# Prints Accuracy vs Epoch graph
ax2 = ax1.twinx()
color = 'tab:blue'
ax2.set_ylabel('accuracy', color=color) # we already handled the x-label with ax1
ax2.plot(ACC, color=color)
ax2.tick_params(axis='y', color=color)
fig.tight_layout() # otherwise the right y-label is slightly clipped
plt.show()
return COST
# Make some data
# Define the class XOR_Data
class XOR_Data(Dataset):
# Constructor
# N_s is the size of the dataset
def __init__(self, N_s=100):
# Create a N_s by 2 array for the X values representing the coordinates
self.x = torch.zeros((N_s, 2))
# Create a N_s by 1 array for the class the X value belongs to
self.y = torch.zeros((N_s, 1))
# Split the dataset into 4 sections
for i in range(N_s // 4):
# Create data centered around (0,0) of class 0
self.x[i, :] = torch.Tensor([0.0, 0.0])
self.y[i, 0] = torch.Tensor([0.0])
# Create data centered around (0,1) of class 1
self.x[i + N_s // 4, :] = torch.Tensor([0.0, 1.0])
self.y[i + N_s // 4, 0] = torch.Tensor([1.0])
# Create data centered around (1,0) of class 1
self.x[i + N_s // 2, :] = torch.Tensor([1.0, 0.0])
self.y[i + N_s // 2, 0] = torch.Tensor([1.0])
# Create data centered around (1,1) of class 0
self.x[i + 3 * N_s // 4, :] = torch.Tensor([1.0, 1.0])
self.y[i + 3 * N_s // 4, 0] = torch.Tensor([0.0])
# Add some noise to the X values to make them different
self.x = self.x + 0.01 * torch.randn((N_s, 2))
self.len = N_s
# Getter
def __getitem__(self, index):
return self.x[index],self.y[index]
# Get Length
def __len__(self):
return self.len
# Plot the data
def plot_stuff(self):
plt.plot(self.x[self.y[:, 0] == 0, 0].numpy(), self.x[self.y[:, 0] == 0, 1].numpy(), 'o', label="y=0")
plt.plot(self.x[self.y[:, 0] == 1, 0].numpy(), self.x[self.y[:, 0] == 1, 1].numpy(), 'ro', label="y=1")
plt.legend()
# Create dataset object
data_set = XOR_Data()
data_set.plot_stuff()
plt.show()
Quiz question:
Create a neural network model
with one neuron in the hidden layer. Then, use the following code to train it:
# y in XOR is eithe in class 0 or 1 so D_in is 2.
model = Net(D_in=2, H=1, D_out=1)
# Train the model
learning_rate = 0.1
# We create a criterion which will measure loss
criterion = nn.BCELoss()
# Create an optimizer that updates model parameters using the learning rate and gradient
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# Create a Data Loader for the training data with a batch size of 1
train_loader = DataLoader(dataset=data_set, batch_size=1)
# Using the training function train the model on 500 epochs
LOSS12 = train(data_set, model, criterion, train_loader, optimizer, epochs=500)
# Plot the data with decision boundaries
plot_decision_regions_2class(model, data_set)
Two Neurons
model = Net(D_in=2, H=2, D_out=1)
# Train the model
learning_rate = 0.1
# We create a criterion which will measure loss
criterion = nn.BCELoss()
# Create an optimizer that updates model parameters using the learning rate and gradient
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# Create a Data Loader for the training data with a batch size of 1
train_loader = DataLoader(dataset=data_set, batch_size=1)
# Using the training function train the model on 500 epochs
LOSS12 = train(data_set, model, criterion, train_loader, optimizer, epochs=500)
# Plot the data with decision boundaries
plot_decision_regions_2class(model, data_set)
Three Neurons
model = Net(D_in=2, H=3, D_out=1)
# Train the model
learning_rate = 0.1
# We create a criterion which will measure loss
criterion = nn.BCELoss()
# Create an optimizer that updates model parameters using the learning rate and gradient
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# Create a Data Loader for the training data with a batch size of 1
train_loader = DataLoader(dataset=data_set, batch_size=1)
# Using the training function train the model on 500 epochs
LOSS12 = train(data_set, model, criterion, train_loader, optimizer, epochs=500)
# Plot the data with decision boundaries
plot_decision_regions_2class(model, data_set)