from torch.autograd import Variable
import torch
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import loadmat
%matplotlib inline
M = loadmat("mnist_all.mat")
def get_test(M):
batch_xs = np.zeros((0, 28*28))
batch_y_s = np.zeros( (0, 10))
test_k = ["test"+str(i) for i in range(10)]
for k in range(10):
batch_xs = np.vstack((batch_xs, ((np.array(M[test_k[k]])[:])/255.) ))
one_hot = np.zeros(10)
one_hot[k] = 1
batch_y_s = np.vstack((batch_y_s, np.tile(one_hot, (len(M[test_k[k]]), 1)) ))
return batch_xs, batch_y_s
def get_train(M):
batch_xs = np.zeros((0, 28*28))
batch_y_s = np.zeros( (0, 10))
train_k = ["train"+str(i) for i in range(10)]
for k in range(10):
batch_xs = np.vstack((batch_xs, ((np.array(M[train_k[k]])[:])/255.) ))
one_hot = np.zeros(10)
one_hot[k] = 1
batch_y_s = np.vstack((batch_y_s, np.tile(one_hot, (len(M[train_k[k]]), 1)) ))
return batch_xs, batch_y_s
train_x, train_y = get_train(M)
test_x, test_y = get_test(M)
dim_x = 28*28
dim_h = 20
dim_out = 10
Let's display some of the training data
plt.imshow(train_x[100].reshape((28,28)), cmap = plt.cm.gray)
train_y[100]
plt.imshow(train_x[101].reshape((28,28)), cmap = plt.cm.gray)
train_y[100]
plt.imshow(train_x[50000].reshape((28,28)), cmap = plt.cm.gray)
train_y[50000]
Now, let's define Variable
s containing the training data
dtype_float = torch.FloatTensor
x = Variable(torch.from_numpy(train_x), requires_grad=False).type(dtype_float)
y = Variable(torch.from_numpy(train_y.astype(float)), requires_grad=False).type(dtype_float)
b0 = Variable(torch.randn((1, dim_h)), requires_grad=True)
W0 = Variable(torch.randn((dim_x, dim_h)), requires_grad=True)
b1 = Variable(torch.randn((1, dim_out)), requires_grad=True)
W1 = Variable(torch.randn((dim_h, dim_out)), requires_grad=True)
Note that everything is accessible right away:
Let's now define the the model. Note that since we'll want to reuse it for different inputs, we'll want it to be in a function (or really in a class -- we'll show how to do that later). First, we'll remind ourselves of the dimensions of the data
x.data.shape, b0.data.shape, W0.data.shape
b1.data.shape, W1.data.shape, y.data.shape
def model(x, b0, W0, b1, W1):
h = torch.nn.ReLU()(torch.matmul(x, W0) + b0.repeat(x.data.shape[0], 1))
out = torch.matmul(h, W1) + b1.repeat(h.data.shape[0], 1)
return out
y_out = model(x, b0, W0, b1, W1)
################################################################################
#Subsample the training set for faster training
train_idx = np.random.permutation(range(train_x.shape[0]))[:1000]
x = Variable(torch.from_numpy(train_x[train_idx]), requires_grad=False).type(dtype_float)
y = Variable(torch.from_numpy(train_y[train_idx].astype(float)), requires_grad=False).type(dtype_float)
y_out = model(x, b0, W0, b1, W1)
#################################################################################
logSoftMax = torch.nn.LogSoftmax() # We'll be too lazy to define this one by hand
loss = -torch.mean(torch.sum(y * logSoftMax(y_out), 1))
loss
learning_rate = 1e-1
for t in range(1000):
y_out = model(x, b0, W0, b1, W1)
loss = -torch.mean(torch.sum(y * logSoftMax(y_out), 1))
loss.backward()
b0.data -= learning_rate * b0.grad.data
W0.data -= learning_rate * W0.grad.data
b1.data -= learning_rate * b1.grad.data
W1.data -= learning_rate * W1.grad.data
b0.grad.data.zero_()
W0.grad.data.zero_()
b1.grad.data.zero_()
W1.grad.data.zero_()
#print(loss.data.numpy())
x_test_all_var = Variable(torch.from_numpy(test_x), requires_grad=False).type(dtype_float)
y_test_out = model(x_test_all_var, b0, W0, b1, W1).data.numpy()
np.argmax(y_test_out, 1)
np.argmax(y_test_out[:100], 1)
np.argmax(test_y[:100], 1)
np.argmax(y_test_out[1000:1100], 1)
np.argmax(test_y[1000:1100], 1)