In [None]:
"""Extracting feature using caffe and train / test SVM.
"""

__author__ = 'yukun@cs.toronto.edu'

import matplotlib.colors as clr
import matplotlib.pyplot as plt
import numpy as np

from PIL import Image

from sklearn import svm
from sklearn import datasets

%matplotlib inline
plt.style.use('ggplot')

In [None]:
# Configure caffe
import sys
sys.path.insert(0, '/Users/yukun/Documents/Projects/caffe/python')
import caffe
caffe.set_mode_cpu()

# Extracting features using Caffe

In [None]:
# Set up path for pre-trained model, prototxt and mean file.
MODEL_DIR = '/Users/yukun/Documents/Projects/caffe/models/bvlc_reference_caffenet/'
MODEL_PROTO_PATH = MODEL_DIR + 'deploy.prototxt'
MODEL_PATH = MODEL_DIR + 'bvlc_reference_caffenet.caffemodel'
MEAN_FILE_PATH = MODEL_DIR + 'ilsvrc_2012_mean.npy'

In [None]:
# Prototxt file for CaffeNet
!cat '/Users/yukun/Documents/Projects/caffe/models/bvlc_reference_caffenet/deploy.prototxt'

In [None]:
ls /Users/yukun/Documents/Projects/caffe/models/bvlc_reference_caffenet/

In [None]:
net = caffe.Net(MODEL_PROTO_PATH, MODEL_PATH, caffe.TEST)

In [None]:
# CNNs takes an input tensor of shape (BATCH_SIZE, DEPTH, HEIGHT, WIDTH) with BGR ordering
data_shape = net.blobs['data'].shape
print 'input data shape: (%d, %d, %d ,%d)' % (data_shape[0], data_shape[1], data_shape[2], data_shape[3])

In [None]:
# For each input image, we need a transformer to convert it to (DEPTH, HEIGHT, WIDTH)
# Set up input transformer
transformer = caffe.io.Transformer({'data': data_shape})
transformer.set_mean('data', np.load(MEAN_FILE_PATH).mean(1).mean(1))
transformer.set_transpose('data', (2,0,1))
transformer.set_channel_swap('data', (2,1,0))

In [None]:
mean_tensor = np.load(MEAN_FILE_PATH)

In [None]:
print mean_tensor.shape

In [None]:
# Load a cat image
# Image from http://demo.caffe.berkeleyvision.org
pil_img = Image.open('/Users/yukun/Documents/Projects/caffe_tutorial/cat_1.jpg')
plt.imshow(pil_img)
plt.axis('off')

In [None]:
np_image = np.asarray(pil_img)
print np_image.shape

In [None]:
# Transform image into input tensor
input_tensor = transformer.preprocess('data', np.asarray(pil_img).astype(np.float))
print 'input tensor shape: %s' % (input_tensor.shape, )

In [None]:
# Plot depth channel
figure = plt.figure(figsize=(10, 5))
for c in range(3):
    plt.subplot(1, 3, c + 1)
    plt.imshow(input_tensor[c])
    plt.axis('off')
    plt.title('Channel %d' % c, fontsize=20)
    print 'depth channel %d, max: %.2f, min %.2f' % (c, np.max(input_tensor[c]), np.min(input_tensor[c]))

In [None]:
# Read 1000 classes from ImageNet
with open('/Users/yukun/Documents/Projects/caffe/data/ilsvrc12/synset_words.txt') as f:
    content = f.readlines()

class_name = [' '.join(line.strip('\n').split(',')[0].split(' ')[1:]) for line in content]
print class_name[:5]

In [None]:
# Extracting features
# Reshape data blob to fit image input size
net.blobs['data'].reshape(1, *input_tensor.shape)
net.blobs['data'].data[...] = input_tensor

# Forwarding
response = net.forward()
print response['prob'].shape

In [None]:
# Plot predicted probs 
prob = response['prob'].flatten()
plt.plot(prob, lw=2)
cls_id = np.argmax(prob)
plt.title('max prob: %s (%.2f%%)' % (class_name[cls_id], prob[cls_id] * 100), fontsize=20)

In [None]:
# Get intermediate layer response
fc7_data = net.blobs['fc7'].data
print 'fully connected layer 7 response has shape: %s' % (fc7_data.shape, )

In [None]:
plt.plot(fc7_data.flatten())
plt.title('Fully connected layer 7 response', fontsize=20)

# Why CNN features

In [None]:
photo_list = ['cat_1.jpg', 'cat_2.jpg', 'dog.jpg']
# Load images
# Images from caffe.berkeleyvision.org, wallpaperbeta.com
figure = plt.figure(figsize=(14, 7))
pil_img_list = []
for i, photo in enumerate(photo_list):
    plt.subplot(1, 3, i + 1)
    pil_img = Image.open('/Users/yukun/Documents/Projects/caffe_tutorial/' + photo)
    pil_img = pil_img.resize((227, 227))
    pil_img_list.append(pil_img)
    plt.imshow(pil_img)
    plt.axis('off')

In [None]:
# Compute similarity of images
pixel_space_feat = [np.asarray(img).flatten().astype(np.float) for img in pil_img_list]
pixel_space_feat = [(feat / np.linalg.norm(feat)) for feat in pixel_space_feat]
# Plot similarity matrix
similarity_matrix = np.zeros((3, 3))
for i in range(3):
    similarity_matrix[i] = np.dot(pixel_space_feat, pixel_space_feat[i])
plt.imshow(similarity_matrix, interpolation='nearest', cmap=plt.get_cmap('OrRd'))
plt.grid('off')
plt.xticks([0, 1, 2], ['cat', 'cat', 'dog'], fontsize=20)
plt.yticks([0, 1, 2], ['cat', 'cat', 'dog'], fontsize=20)
plt.colorbar()


In [None]:
# Image representation using CNN prob
# Construct an input batch with 3 images
input_tensor = []
for image in pil_img_list:
    input_tensor.append(transformer.preprocess('data', np.asarray(image).astype(np.float)))
input_tensor = np.asarray(input_tensor)

# Reshape data blob to fit image input size
net.blobs['data'].reshape(*input_tensor.shape)
net.blobs['data'].data[...] = input_tensor

# Forwarding
response = net.forward()
print response['prob'].shape

In [None]:
# Plot prob
for i, prob in enumerate(response['prob']):
    handle, = plt.plot(prob, lw=2)
    cls_id = np.argmax(prob)
    handle.set_label('%s (%.2f%%)' % (class_name[cls_id], prob[cls_id] * 100))
    
plt.legend(fontsize=15)

In [None]:
# Plot similarity matrix using CNN prob
cnn_prob_feat = [(feat / np.linalg.norm(feat)) for feat in response['prob']]

similarity_matrix = np.zeros((3, 3))
for i in range(3):
    similarity_matrix[i] = np.dot(cnn_prob_feat, cnn_prob_feat[i])
plt.imshow(similarity_matrix, interpolation='nearest', cmap=plt.get_cmap('OrRd'))
plt.grid('off')
plt.xticks([0, 1, 2], ['cat', 'cat', 'dog'], fontsize=20)
plt.yticks([0, 1, 2], ['cat', 'cat', 'dog'], fontsize=20)
plt.colorbar()

In [None]:
print 'fc7 layer has tensor with shape %s' % (net.blobs['fc7'].data.shape, )

In [None]:
# Plot similarity matrix using CNN fc7
cnn_fc7_feat = [(feat / np.linalg.norm(feat)) for feat in net.blobs['fc7'].data]

similarity_matrix = np.zeros((3, 3))
for i in range(3):
    similarity_matrix[i] = np.dot(cnn_fc7_feat, cnn_fc7_feat[i])
plt.imshow(similarity_matrix, interpolation='nearest', cmap=plt.get_cmap('OrRd'))
plt.grid('off')
plt.xticks([0, 1, 2], ['cat', 'cat', 'dog'], fontsize=20)
plt.yticks([0, 1, 2], ['cat', 'cat', 'dog'], fontsize=20)
plt.colorbar()

# Why intermediate response

In [None]:
photo_list = ['impressionism_1.jpg', 'impressionism_2.jpg', 'cubism.jpg']
# Load images
# Images from www.theartwolf.com, en.wikipedia.org
figure = plt.figure(figsize=(14, 7))
pil_img_list = []
for i, photo in enumerate(photo_list):
    plt.subplot(1, 3, i + 1)
    pil_img = Image.open('/Users/yukun/Documents/Projects/caffe_tutorial/' + photo)
    pil_img = pil_img.resize((227, 227))
    pil_img_list.append(pil_img)
    plt.imshow(pil_img)
    plt.axis('off')

In [None]:
# Image representation using CNN
# Construct an input batch with 3 images
input_tensor = []
for image in pil_img_list:
    input_tensor.append(transformer.preprocess('data', np.asarray(image).astype(np.float)))
input_tensor = np.asarray(input_tensor)

# Reshape data blob to fit image input size
net.blobs['data'].reshape(*input_tensor.shape)
net.blobs['data'].data[...] = input_tensor

# Forwarding
response = net.forward()
print response['prob'].shape

In [None]:
# Plot prob
for i, prob in enumerate(response['prob']):
    handle, = plt.plot(prob, lw=2)
    cls_id = np.argmax(prob)
    handle.set_label('%s (%.2f%%)' % (class_name[cls_id], prob[cls_id] * 100))
    
plt.legend(fontsize=15)

In [None]:
# Plot similarity matrix using CNN prob
cnn_prob_feat = [(feat / np.linalg.norm(feat)) for feat in response['prob']]

similarity_matrix = np.zeros((3, 3))
for i in range(3):
    similarity_matrix[i] = np.dot(cnn_prob_feat, cnn_prob_feat[i])
plt.imshow(similarity_matrix, interpolation='nearest', cmap=plt.get_cmap('OrRd'))
plt.grid('off')
plt.xticks([0, 1, 2], ['impressionism', 'impressionism', 'cubism'], fontsize=20, rotation='vertical')
plt.yticks([0, 1, 2], ['impressionism', 'impressionism', 'cubism'], fontsize=20)
plt.colorbar()

In [None]:
# Plot similarity matrix using CNN fc7
cnn_fc7_feat = [(feat / np.linalg.norm(feat)) for feat in net.blobs['fc7'].data]

similarity_matrix = np.zeros((3, 3))
for i in range(3):
    similarity_matrix[i] = np.dot(cnn_fc7_feat, cnn_fc7_feat[i])
plt.imshow(similarity_matrix, interpolation='nearest', cmap=plt.get_cmap('OrRd'))
plt.grid('off')
plt.xticks([0, 1, 2], ['impressionism', 'impressionism', 'cubism'], fontsize=20, rotation='vertical')
plt.yticks([0, 1, 2], ['impressionism', 'impressionism', 'cubism'], fontsize=20)
plt.colorbar()

Want to know more? See:
http://caffe.berkeleyvision.org/gathered/examples/finetune_flickr_style.html

# Training / testing SVM
examples borrowed from http://scikit-learn.org/stable/auto_examples/svm/plot_iris.html

In [None]:
# Load data
iris = datasets.load_iris()
data = iris.data[:, :2]  # we only take the first two features.
labels = iris.target
print 'Load data with %d examples.' % len(labels)

In [None]:
# Randomly permute examples
np.random.seed(1234)
random_order = np.random.permutation(len(labels))
data = data[random_order, :]
labels = labels[random_order]

# Split the dataset into train and test
train_data = data[:100]
train_labels = labels[:100]
test_data = data[100:]
test_labels = labels[100:]

In [None]:
# Plot iris dataset
# Make the plot pretty
area = np.pi * 8 ** 2 * np.ones(len(train_labels))
color_map = np.asarray([[240, 39, 40], [197, 176, 213], [152, 223, 138]]) / 255.
color_map = clr.ListedColormap([tuple(c) for c in color_map])

plt.figure(figsize=(8, 5))
plt.scatter(train_data[:, 0], train_data[:, 1], s=area, c=train_labels, cmap=color_map, alpha=1)
plt.axis('equal')
plt.xlabel('Sepal length', fontsize=20)
plt.ylabel('Sepal width', fontsize=20)

In [None]:
def plot_decision_boundary(svm, data, labels, cmap):
    # create a mesh to plot in
    h = .01  # step size in the mesh
    x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1
    y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    pred = svm.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    plt.figure(figsize=(8, 5))
    pred = pred.reshape(xx.shape)
    plt.contourf(xx, yy, pred, cmap=cmap, alpha=0.5)
    plt.scatter(data[:, 0], data[:, 1], s=area, c=labels, cmap=cmap, alpha=0.6)
    plt.axis('equal')
    plt.axis('off')

In [None]:
# Create a linear SVM
C = 1.0  # SVM regularization parameter
lin_svc = svm.LinearSVC(C=C).fit(train_data, train_labels)
plot_decision_boundary(lin_svc, train_data, train_labels, color_map)
plt.title('SVM decision boundary for C=1.0', fontsize=20)

In [None]:
# Linear SVM with small regularization parameter
C = 0.1 
lin_svc_small_c = svm.LinearSVC(C=C).fit(train_data, train_labels)
plot_decision_boundary(lin_svc_small_c, train_data, train_labels, color_map)
plt.title('SVM decision boundary for C=0.1', fontsize=20)

In [None]:
# Linear SVM with large regularization parameter 
C = 100.
lin_svc_large_c = svm.LinearSVC(C=C).fit(train_data, train_labels)
plot_decision_boundary(lin_svc_large_c, train_data, train_labels, color_map)
plt.title('SVM decision boundary for C=100.0', fontsize=20)

In [None]:
# SVM prediction
svm_pred = lin_svc.predict(test_data)
accuracy = 100.0 * np.sum(np.equal(svm_pred, test_labels)) / len(test_labels)
plot_decision_boundary(lin_svc, test_data, test_labels, color_map)
plt.title('SVM prediction for C=1.0, accuracy=%.2f' % accuracy, fontsize=20)

In [None]:
svm_pred_small_c = lin_svc_small_c.predict(test_data)
accuracy = 100.0 * np.sum(np.equal(svm_pred_small_c, test_labels)) / len(test_labels)
plot_decision_boundary(lin_svc_small_c, test_data, test_labels, color_map)
plt.title('SVM prediction for C=0.1, accuracy=%.2f' % accuracy, fontsize=20)

In [None]:
svm_pred_large_c = lin_svc_large_c.predict(test_data)
accuracy = 100.0 * np.sum(np.equal(svm_pred_large_c, test_labels)) / len(test_labels)
plot_decision_boundary(lin_svc_large_c, test_data, test_labels, color_map)
plt.title('SVM prediction for C=100.0, accuracy=%.2f' % accuracy, fontsize=20)