Saturday, May 5, 2018

Deep Learning Model Building using Pytorch Version 0.4



In [1]:
import numpy as np
import torch
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader,Dataset
from torch.utils.data.sampler import RandomSampler
In [2]:
data = pd.read_csv("diabetes.csv", header = None)
In [3]:
data.head()
Out[3]:
0 1 2 3 4 5 6 7 8
0 -0.294118 0.487437 0.180328 -0.292929 0.000000 0.001490 -0.531170 -0.033333 0
1 -0.882353 -0.145729 0.081967 -0.414141 0.000000 -0.207153 -0.766866 -0.666667 1
2 -0.058824 0.839196 0.049180 0.000000 0.000000 -0.305514 -0.492741 -0.633333 0
3 -0.882353 -0.105528 0.081967 -0.535354 -0.777778 -0.162444 -0.923997 0.000000 1
4 0.000000 0.376884 -0.344262 -0.292929 -0.602837 0.284650 0.887276 -0.600000 0
In [4]:
torch.__version__
Out[4]:
'0.4.0'
In [10]:
class DiabetesDataSet(Dataset):
    def __init__(self):
        super(DiabetesDataSet, self).__init__()
        self.data = pd.read_csv("diabetes.csv", header= None)
        self.len = len(self.data)
        self.X = torch.tensor(torch.from_numpy(data.iloc[:,0:-1].values), dtype=torch.float)
        self.y = torch.tensor(torch.from_numpy(data.iloc[:,-1].values), dtype = torch.float).reshape(-1,1)
    def __len__(self):
        return self.len
    def __getitem__(self, index):
        return self.X[index], self.y[index]
In [11]:
dataset = DiabetesDataSet()
In [12]:
data_len = len(data); data_len
Out[12]:
759
In [13]:
val_size = 100
In [14]:
indices = np.arange(data_len)
In [15]:
valid_index = np.random.choice(indices, val_size, replace = False); valid_index
Out[15]:
array([628, 100, 144, 538, 122, 567, 347, 741, 447, 569, 249, 675, 280,
       163, 278, 341, 237,  10, 676, 721, 189, 332, 710, 697, 181, 391,
       625, 726, 343,  18, 247, 407, 308, 622, 312, 180,  22, 354, 268,
       754,  35, 286, 518, 367, 476, 433,  74, 356, 573,  36, 591, 145,
       629, 219, 223, 484, 271, 112, 120, 287, 749,  64, 702, 690, 640,
       446, 608, 605, 142, 345, 692, 563, 269, 359, 129, 201, 148, 192,
       218, 470, 196, 127, 103, 290, 229,  59, 187, 172, 736,  80, 128,
       576, 552, 633, 753, 146, 141, 594, 684, 330])
In [16]:
train_index = list(set(indices) - set(valid_index))
In [17]:
train_sampler = RandomSampler(train_index)
valid_sampler = RandomSampler(valid_index)
In [18]:
train_loader = torch.utils.data.DataLoader(dataset, batch_size=64, sampler=train_sampler )
valid_loader = torch.utils.data.DataLoader(dataset, batch_size=1, sampler=valid_sampler )
In [19]:
list(valid_loader)[0:5]
Out[19]:
[[tensor([[-0.1765,  0.0050,  0.0000,  0.0000,  0.0000, -0.1058, -0.6533,
           -0.6333]]), tensor([[ 0.]])],
 [tensor([[-0.1765,  0.5075,  0.0820, -0.1515, -0.1915,  0.0343, -0.4535,
           -0.3000]]), tensor([[ 1.]])],
 [tensor([[-0.6471,  0.8090,  0.0492, -0.4949, -0.8345,  0.0134, -0.8352,
           -0.8333]]), tensor([[ 1.]])],
 [tensor([[-0.1765,  0.1457,  0.0820,  0.0000,  0.0000, -0.0224, -0.8463,
           -0.3000]]), tensor([[ 0.]])],
 [tensor([[-0.8824,  0.0754,  0.1148, -0.6162,  0.0000, -0.2101, -0.9257,
           -0.9000]]), tensor([[ 1.]])]]

Model building

In [87]:
class LogisticRegression(torch.nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.l1 = torch.nn.Linear(8,6)
        self.l2 = torch.nn.Linear(6,1)
        self.tanh = torch.nn.Tanh()
        self.sigmoid = torch.nn.Sigmoid()
        self.relu = torch.nn.ReLU()
    def forward(self, x):
        o1 = self.tanh(self.l1(x))
        o2 = self.tanh(self.l2(o1))
        return self.sigmoid(o2)
        
In [96]:
model = LogisticRegression()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
In [97]:
list(model.parameters())
Out[97]:
[Parameter containing:
 tensor([[-0.0757,  0.1741,  0.3242, -0.0835, -0.2505, -0.1529,  0.0892,
           0.1268],
         [-0.0962,  0.1949, -0.0479, -0.1036,  0.0793,  0.0808,  0.2273,
          -0.2280],
         [-0.0430,  0.0878,  0.2377, -0.2757,  0.0803, -0.2573,  0.3431,
           0.0895],
         [-0.0682, -0.1610,  0.0420, -0.1509, -0.2129,  0.1504, -0.0468,
          -0.1280],
         [-0.1197,  0.0876, -0.0403, -0.2835,  0.1486, -0.2251,  0.0741,
           0.1799],
         [ 0.0142,  0.0600, -0.0389,  0.1680,  0.2521,  0.2423,  0.3453,
           0.2976]]), Parameter containing:
 tensor([ 0.1646, -0.1738, -0.0404, -0.1957, -0.3481, -0.0875]), Parameter containing:
 tensor([[ 0.1094,  0.0086, -0.3964, -0.3861, -0.2615,  0.2997]]), Parameter containing:
 tensor([ 0.3987])]
In [98]:
tr_loss = []
vr_loss = []
for epoch in range(100):
    train_loss = []
    val_loss = []
    for i, (inputs,labels) in enumerate(train_loader):
        #inputs, labels = torch.tensor(inputs), torch.tensor(labels)
        pred = model(inputs)
        loss = criterion(pred, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        train_loss.append(loss.item())
    tr_loss.append(np.mean(train_loss))
    for i, (inputs, labels) in enumerate(valid_loader):
        #inputs, labels = torch.tensor(inputs), torch.tensor(labels)
        pred = model(inputs)
        loss = criterion(pred, labels)
        val_loss.append(loss.item())
    vr_loss.append(np.mean(val_loss))
        
In [99]:
plt.plot(tr_loss)
plt.plot(vr_loss)
Out[99]:
[<matplotlib.lines.Line2D at 0x11f0d33c8>]
In [100]:
pred_all = []
labels_all = []
for i, (inputs, labels) in enumerate(valid_loader):
        #inputs, labels = torch.tensor(inputs), torch.tensor(labels)
        pred = model(inputs)
        loss = criterion(pred, labels)
        pred_all.append(pred.item())
        labels_all.append(labels.item())
In [101]:
pred_all = np.array(pred_all) > 0.5
In [102]:
pred_all
Out[102]:
array([ True, False,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True, False,  True, False,
        True, False,  True,  True,  True,  True,  True, False, False,
        True,  True,  True,  True,  True, False, False,  True,  True,
       False, False,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True, False, False,  True,  True,
        True,  True,  True, False,  True,  True, False,  True,  True,
       False, False,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True, False,
        True,  True, False,  True,  True, False, False,  True,  True,
       False])
In [103]:
np.mean(pred_all == labels_all)
Out[103]:
0.77

No comments :

Post a Comment