Friday, April 26, 2019

Units in Hidden Layer and its Shapes




In [47]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn as nn
%matplotlib inline
import torch
from sklearn.datasets import make_blobs,make_circles
In [2]:
X, y = make_circles(n_samples=500, noise=0.1, factor = 0.2)
In [3]:
def scatter_plot():
    plt.scatter(X[y==0][:,0], X[y==0][:,1], label = "Class 0")
    plt.scatter(X[y==1][:,0], X[y==1][:,1], label = "Class 1")
    plt.legend()

scatter_plot()
  
In [4]:
X_data = torch.Tensor(X)
y_data = torch.Tensor(y.reshape(500,1))
Data is ready
In [5]:
class LR(nn.Module):
    
    def __init__(self,inp, H1, op):
        super().__init__()
        self.linear = nn.Linear(inp, H1)
        self.linear2 = nn.Linear(H1,op)
    def forward(self,x):
        x = torch.sigmoid(self.linear(x))
        x = torch.sigmoid(self.linear2(x))
        return x
Experiment 1: Two neurons in the hidden layer
In [6]:
torch.manual_seed(2)
model = LR(2,2,1)
In [7]:
list(model.parameters())
Out[7]:
[Parameter containing:
 tensor([[ 0.1622, -0.1683],
         [ 0.1939, -0.0361]], requires_grad=True), Parameter containing:
 tensor([0.3021, 0.1683], requires_grad=True), Parameter containing:
 tensor([[-0.0813, -0.5717]], requires_grad=True), Parameter containing:
 tensor([0.1614], requires_grad=True)]
In [8]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.05)
In [9]:
losses = []
for epoch in range(1000):
    y_pred = model.forward(X_data)
    loss = criterion(y_pred, y_data)
    losses.append(loss.item())
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  
  
In [10]:
plt.plot(range(1000),losses)
Out[10]:
[<matplotlib.lines.Line2D at 0x11bb64a90>]
In [11]:
list(model.parameters())
Out[11]:
[Parameter containing:
 tensor([[-10.8445,   9.3445],
         [ 14.6513, -11.6698]], requires_grad=True), Parameter containing:
 tensor([-6.2115, -7.8234], requires_grad=True), Parameter containing:
 tensor([[-8.7913, -9.8806]], requires_grad=True), Parameter containing:
 tensor([1.8178], requires_grad=True)]
In [12]:
optimizer.state
Out[12]:
defaultdict(dict, {Parameter containing:
             tensor([[-10.8445,   9.3445],
                     [ 14.6513, -11.6698]], requires_grad=True): {'exp_avg': tensor([[ 5.1652e-05, -4.7676e-05],
                      [-1.4190e-04,  1.1948e-04]]),
              'exp_avg_sq': tensor([[1.2006e-06, 9.9462e-07],
                      [8.4483e-07, 1.0953e-06]]),
              'step': 1000},
             Parameter containing:
             tensor([-6.2115, -7.8234], requires_grad=True): {'exp_avg': tensor([7.8085e-06, 6.7004e-05]),
              'exp_avg_sq': tensor([6.8437e-08, 7.5955e-07]),
              'step': 1000},
             Parameter containing:
             tensor([[-8.7913, -9.8806]], requires_grad=True): {'exp_avg': tensor([[9.2521e-05, 2.2201e-04]]),
              'exp_avg_sq': tensor([[2.0147e-05, 1.5855e-05]]),
              'step': 1000},
             Parameter containing:
             tensor([1.8178], requires_grad=True): {'exp_avg': tensor([6.1678e-06]),
              'exp_avg_sq': tensor([3.9356e-06]),
              'step': 1000}})
In [13]:
optimizer.defaults
Out[13]:
{'amsgrad': False,
 'betas': (0.9, 0.999),
 'eps': 1e-08,
 'lr': 0.05,
 'weight_decay': 0}
In [14]:
optimizer.defaults
Out[14]:
{'amsgrad': False,
 'betas': (0.9, 0.999),
 'eps': 1e-08,
 'lr': 0.05,
 'weight_decay': 0}
In [15]:
optimizer.param_groups
Out[15]:
[{'amsgrad': False,
  'betas': (0.9, 0.999),
  'eps': 1e-08,
  'lr': 0.05,
  'params': [Parameter containing:
   tensor([[-10.8445,   9.3445],
           [ 14.6513, -11.6698]], requires_grad=True), Parameter containing:
   tensor([-6.2115, -7.8234], requires_grad=True), Parameter containing:
   tensor([[-8.7913, -9.8806]], requires_grad=True), Parameter containing:
   tensor([1.8178], requires_grad=True)],
  'weight_decay': 0}]
Testing
In [16]:
def plot_decision_boundary(X, y):
    
    x_span = np.linspace(min(X[:, 0]) -0.25, max(X[:, 0])+0.25)
    y_span = np.linspace(min(X[:, 1]) -0.25, max(X[:, 1])+0.25)
    xx, yy = np.meshgrid(x_span, y_span)
    grid = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
    pred_func = model.forward(grid)
    z = pred_func.view(xx.shape).detach().numpy()
    plt.contourf(xx, yy, z)
In [17]:
plot_decision_boundary(X, y)
scatter_plot()
In [18]:
x = 0.025
y = 0.025
point = torch.Tensor([x, y])
prediction = model.forward(point)
prediction = 1 if prediction >0.5 else 0
plt.plot([x], [y], marker='o', markersize=10, color="red")
print("Prediction is", prediction)
plot_decision_boundary(X, y)
Prediction is 1
In [19]:
x = -1
y = 1
point = torch.Tensor([x, y])
prediction = model.forward(point)
prediction = 1 if prediction >0.5 else 0
plt.plot([x], [y], marker='o', markersize=10, color="red")
print("Prediction is", prediction)
plot_decision_boundary(X, y)
Prediction is 0
Two neurons in the hidden layer is not enough to classify our datasets. By seeing the data, we need at least three neurons(triangle) to classify our data correctly. Four neurons will form a square shapes to classify the points correctly

Three neurons in the hidden layer

In [20]:
X, y = make_circles(n_samples=500, noise=0.1, factor = 0.2)
X_data = torch.Tensor(X)
y_data = torch.Tensor(y.reshape(500,1))
In [21]:
torch.manual_seed(2)
model = LR(2,3,1)
In [22]:
list(model.parameters())
Out[22]:
[Parameter containing:
 tensor([[ 0.1622, -0.1683],
         [ 0.1939, -0.0361],
         [ 0.3021,  0.1683]], requires_grad=True), Parameter containing:
 tensor([-0.0813, -0.5717,  0.1614], requires_grad=True), Parameter containing:
 tensor([[-0.5112,  0.0759,  0.0384]], requires_grad=True), Parameter containing:
 tensor([-0.1270], requires_grad=True)]
In [23]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.05)
In [24]:
losses = []
for epoch in range(1000):
    y_pred = model.forward(X_data)
    loss = criterion(y_pred, y_data)
    losses.append(loss.item())
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  
  
In [25]:
plt.plot(range(1000),losses)
Out[25]:
[<matplotlib.lines.Line2D at 0x11bdef278>]
In [26]:
list(model.parameters())
Out[26]:
[Parameter containing:
 tensor([[  8.2821, -12.1045],
         [ -5.6142, -14.2126],
         [ 13.7425,   1.4241]], requires_grad=True), Parameter containing:
 tensor([-6.7384,  6.9589,  6.4378], requires_grad=True), Parameter containing:
 tensor([[-12.1918,  11.0617,  10.9448]], requires_grad=True), Parameter containing:
 tensor([-15.7837], requires_grad=True)]
In [ ]:
 
In [27]:
def plot_decision_boundary(X, y):
    
    x_span = np.linspace(min(X[:, 0]) -0.25, max(X[:, 0])+0.25)
    y_span = np.linspace(min(X[:, 1]) -0.25, max(X[:, 1])+0.25)
    xx, yy = np.meshgrid(x_span, y_span)
    grid = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
    pred_func = model.forward(grid)
    z = pred_func.view(xx.shape).detach().numpy()
    plt.contourf(xx, yy, z)
In [28]:
plot_decision_boundary(X, y)
scatter_plot()

Four units in the hidden layer

In [29]:
X, y = make_circles(n_samples=500, noise=0.1, factor = 0.2)
X_data = torch.Tensor(X)
y_data = torch.Tensor(y.reshape(500,1))
In [30]:
torch.manual_seed(2)
model = LR(2,4,1)
In [31]:
list(model.parameters())
Out[31]:
[Parameter containing:
 tensor([[ 0.1622, -0.1683],
         [ 0.1939, -0.0361],
         [ 0.3021,  0.1683],
         [-0.0813, -0.5717]], requires_grad=True), Parameter containing:
 tensor([ 0.1614, -0.6260,  0.0929,  0.0470], requires_grad=True), Parameter containing:
 tensor([[-0.1099,  0.4088,  0.0334,  0.2073]], requires_grad=True), Parameter containing:
 tensor([0.2116], requires_grad=True)]
In [32]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.05)
In [33]:
losses = []
for epoch in range(1000):
    y_pred = model.forward(X_data)
    loss = criterion(y_pred, y_data)
    losses.append(loss.item())
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  
  
In [34]:
plt.plot(range(1000),losses)
Out[34]:
[<matplotlib.lines.Line2D at 0x1199c15f8>]
In [35]:
list(model.parameters())
Out[35]:
[Parameter containing:
 tensor([[ -1.0902, -12.8122],
         [-10.8484,  14.9042],
         [ 13.0257,   4.0187],
         [ 12.3682,  -2.9821]], requires_grad=True), Parameter containing:
 tensor([-6.7193, -8.5359, -6.6661,  7.2223], requires_grad=True), Parameter containing:
 tensor([[-12.3217, -10.7492, -12.2166,   9.3663]], requires_grad=True), Parameter containing:
 tensor([-2.6503], requires_grad=True)]
In [36]:
def plot_decision_boundary(X, y):
    
    x_span = np.linspace(min(X[:, 0]) -0.25, max(X[:, 0])+0.25)
    y_span = np.linspace(min(X[:, 1]) -0.25, max(X[:, 1])+0.25)
    xx, yy = np.meshgrid(x_span, y_span)
    grid = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
    pred_func = model.forward(grid)
    z = pred_func.view(xx.shape).detach().numpy()
    plt.contourf(xx, yy, z)
In [37]:
plot_decision_boundary(X, y)
scatter_plot()

More units in the hidden layer

Let's increase the hidden unit to 10. It should form circular shapes that fits our training data very well (overfitting)
In [38]:
X, y = make_circles(n_samples=500, noise=0.1, factor = 0.2)
X_data = torch.Tensor(X)
y_data = torch.Tensor(y.reshape(500,1))
In [39]:
torch.manual_seed(2)
model = LR(2,10,1)
In [40]:
list(model.parameters())
Out[40]:
[Parameter containing:
 tensor([[ 0.1622, -0.1683],
         [ 0.1939, -0.0361],
         [ 0.3021,  0.1683],
         [-0.0813, -0.5717],
         [ 0.1614, -0.6260],
         [ 0.0929,  0.0470],
         [-0.1555,  0.5782],
         [ 0.0472,  0.2932],
         [ 0.2992, -0.4171],
         [-0.2718,  0.6800]], requires_grad=True), Parameter containing:
 tensor([-0.6926, -0.0480, -0.0560,  0.5016, -0.0672,  0.1862, -0.0339, -0.3959,
         -0.4008, -0.3435], requires_grad=True), Parameter containing:
 tensor([[-0.2873, -0.2052,  0.0744,  0.2081,  0.0156, -0.1450,  0.1390, -0.1214,
          -0.0701, -0.1734]], requires_grad=True), Parameter containing:
 tensor([-0.0993], requires_grad=True)]
In [41]:
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.05)
In [42]:
losses = []
for epoch in range(1000):
    y_pred = model.forward(X_data)
    loss = criterion(y_pred, y_data)
    losses.append(loss.item())
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
  
  
In [43]:
plt.plot(range(1000),losses)
Out[43]:
[<matplotlib.lines.Line2D at 0x1049877f0>]
In [44]:
list(model.parameters())
Out[44]:
[Parameter containing:
 tensor([[ 6.6365, -6.0255],
         [10.6103,  0.0485],
         [ 8.2460, -0.0346],
         [-2.7792, -7.7013],
         [12.2997,  0.4020],
         [-8.1266, -5.1448],
         [ 5.3756,  8.5375],
         [ 2.9836,  8.6678],
         [ 4.4349, -7.9975],
         [-3.8674,  8.4983]], requires_grad=True), Parameter containing:
 tensor([-4.3390, -4.8931,  3.8393,  3.7049,  5.9222, -4.4001,  4.9860, -4.2407,
         -4.4089, -4.0788], requires_grad=True), Parameter containing:
 tensor([[-6.4468, -6.3571,  3.6258,  3.7723,  3.3041, -6.4906,  3.8125, -7.0982,
          -6.0789, -6.0252]], requires_grad=True), Parameter containing:
 tensor([-4.2891], requires_grad=True)]
In [45]:
def plot_decision_boundary(X, y):
    
    x_span = np.linspace(min(X[:, 0]) -0.25, max(X[:, 0])+0.25)
    y_span = np.linspace(min(X[:, 1]) -0.25, max(X[:, 1])+0.25)
    xx, yy = np.meshgrid(x_span, y_span)
    grid = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
    pred_func = model.forward(grid)
    z = pred_func.view(xx.shape).detach().numpy()
    plt.contourf(xx, yy, z)
In [46]:
plot_decision_boundary(X, y)
scatter_plot()

No comments :

Post a Comment