# imports and basic notebook setup
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from IPython.display import clear_output, Image, display
from google.protobuf import text_format
import pylab as pl
from scipy.misc import imread, imresize
import tensorflow as tf
from caffe_classes import class_names
def showarray(a, fmt='jpeg'):
a = np.uint8(np.clip(a, 0, 255))
f = StringIO()
PIL.Image.fromarray(a).save(f, fmt)
display(Image(data=f.getvalue()))
train_x = np.zeros((1, 227, 227, 3)).astype(np.float32)
train_y = np.zeros((1, 1000))
xdim = train_x.shape[1:]
ydim = train_y.shape[1]
#####################################################################
# Read Image
x_dummy = (np.random.random((1,) + xdim) / 255.).astype(np.float32)
i = x_dummy.copy()
i[0, :, :, :] = (imread("poodle.png")[:, :, :3]).astype(np.float32)
mean_image = np.mean(i)
image_input = i - np.mean(i)
net_data = pl.load("bvlc_alexnet.npy").item()
def conv(input, kernel, biases, k_h, k_w, c_o, s_h, s_w,
padding="VALID",
group=1):
'''From https://github.com/ethereon/caffe-tensorflow
'''
c_i = input.get_shape()[-1]
assert c_i % group == 0
assert c_o % group == 0
def convolve(i, k):
return tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
# convolve = lambda i, k: tf.nn.conv2d(
# i, k, [1, s_h, s_w, 1], padding=padding)
if group == 1:
conv = convolve(input, kernel)
else:
input_groups = tf.split(3, group, input)
kernel_groups = tf.split(3, group, kernel)
output_groups = [convolve(i, k)
for i, k in zip(input_groups, kernel_groups)]
conv = tf.concat(3, output_groups)
return tf.reshape(tf.nn.bias_add(conv, biases),
conv.get_shape().as_list())
x_placeholder = tf.placeholder(tf.float32, shape=image_input.shape,
name='input')
# x = tf.Variable(i, name='input')
# conv1
# conv(11, 11, 96, 4, 4, padding='VALID', name='conv1')
k_h = 11
k_w = 11
c_o = 96
s_h = 4
s_w = 4
conv1W = tf.Variable(net_data["conv1"][0])
conv1b = tf.Variable(net_data["conv1"][1])
conv1_in = conv(x_placeholder, conv1W, conv1b, k_h, k_w, c_o,
s_h, s_w, padding="SAME", group=1)
conv1 = tf.nn.relu(conv1_in)
# lrn1
# lrn(2, 2e-05, 0.75, name='norm1')
radius = 2
alpha = 2e-05
beta = 0.75
bias = 1.0
lrn1 = tf.nn.local_response_normalization(conv1,
depth_radius=radius,
alpha=alpha,
beta=beta,
bias=bias)
# maxpool1
# max_pool(3, 3, 2, 2, padding='VALID', name='pool1')
k_h = 3
k_w = 3
s_h = 2
s_w = 2
padding = 'VALID'
maxpool1 = tf.nn.max_pool(lrn1, ksize=[1, k_h, k_w, 1], strides=[
1, s_h, s_w, 1], padding=padding)
# conv2
# conv(5, 5, 256, 1, 1, group=2, name='conv2')
k_h = 5
k_w = 5
c_o = 256
s_h = 1
s_w = 1
group = 2
conv2W = tf.Variable(net_data["conv2"][0])
conv2b = tf.Variable(net_data["conv2"][1])
conv2_in = conv(maxpool1, conv2W, conv2b, k_h, k_w, c_o,
s_h, s_w, padding="SAME", group=group)
conv2 = tf.nn.relu(conv2_in)
# lrn2
# lrn(2, 2e-05, 0.75, name='norm2')
radius = 2
alpha = 2e-05
beta = 0.75
bias = 1.0
lrn2 = tf.nn.local_response_normalization(conv2,
depth_radius=radius,
alpha=alpha,
beta=beta,
bias=bias)
# maxpool2
# max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
k_h = 3
k_w = 3
s_h = 2
s_w = 2
padding = 'VALID'
maxpool2 = tf.nn.max_pool(lrn2, ksize=[1, k_h, k_w, 1], strides=[
1, s_h, s_w, 1], padding=padding)
# conv3
# conv(3, 3, 384, 1, 1, name='conv3')
k_h = 3
k_w = 3
c_o = 384
s_h = 1
s_w = 1
group = 1
conv3W = tf.Variable(net_data["conv3"][0])
conv3b = tf.Variable(net_data["conv3"][1])
conv3_in = conv(maxpool2, conv3W, conv3b, k_h, k_w, c_o,
s_h, s_w, padding="SAME", group=group)
conv3 = tf.nn.relu(conv3_in)
# conv4
# conv(3, 3, 384, 1, 1, group=2, name='conv4')
k_h = 3
k_w = 3
c_o = 384
s_h = 1
s_w = 1
group = 2
conv4W = tf.Variable(net_data["conv4"][0])
conv4b = tf.Variable(net_data["conv4"][1])
conv4_in = conv(conv3, conv4W, conv4b, k_h, k_w, c_o,
s_h, s_w, padding="SAME", group=group)
conv4 = tf.nn.relu(conv4_in)
# conv5
# conv(3, 3, 256, 1, 1, group=2, name='conv5')
k_h = 3
k_w = 3
c_o = 256
s_h = 1
s_w = 1
group = 2
conv5W = tf.Variable(net_data["conv5"][0])
conv5b = tf.Variable(net_data["conv5"][1])
conv5_in = conv(conv4, conv5W, conv5b, k_h, k_w, c_o,
s_h, s_w, padding="SAME", group=group)
conv5 = tf.nn.relu(conv5_in)
# maxpool5
# max_pool(3, 3, 2, 2, padding='VALID', name='pool5')
k_h = 3
k_w = 3
s_h = 2
s_w = 2
padding = 'VALID'
maxpool5 = tf.nn.max_pool(conv5, ksize=[1, k_h, k_w, 1], strides=[
1, s_h, s_w, 1], padding=padding)
# fc6
# fc(4096, name='fc6')
fc6W = tf.Variable(net_data["fc6"][0])
fc6b = tf.Variable(net_data["fc6"][1])
fc6 = tf.nn.relu_layer(tf.reshape(
maxpool5, [1, int(np.prod(maxpool5.get_shape()[1:]))]), fc6W, fc6b)
# fc7
# fc(4096, name='fc7')
fc7W = tf.Variable(net_data["fc7"][0])
fc7b = tf.Variable(net_data["fc7"][1])
fc7 = tf.nn.relu_layer(fc6, fc7W, fc7b, name='fc7')
# fc8
# fc(1000, relu=False, name='fc8')
fc8W = tf.Variable(net_data["fc8"][0])
fc8b = tf.Variable(net_data["fc8"][1])
fc8 = tf.nn.xw_plus_b(fc7, fc8W, fc8b)
# prob
# softmax(name='prob'))
prob = tf.nn.softmax(fc8)
grad_p5 = tf.gradients(maxpool5, x_placeholder,grad_ys=maxpool5)[0]
grad_c3 = tf.gradients(conv3, x_placeholder,grad_ys=conv3)[0]
init = tf.initialize_all_variables()
sess = tf.InteractiveSession()
sess.run(init)
feed_dict = {x_placeholder: image_input}
output = sess.run(prob, feed_dict=feed_dict)
inds = np.argsort(output)[0, :]
for i in range(5):
print class_names[inds[-1 - i]], output[0, inds[-1 - i]]
Making the "dream" images is very simple. Essentially it is just a gradient ascent process that tries to maximize the L2 norm of activations of a particular DNN layer. Here are a few simple tricks that we found useful for getting good images:
First we implement a basic gradient ascent step function, applying the first two tricks:
def make_step(x_placeholder, image, bias,grad, layer, sess,
step_size=1.5,
jitter=32, clip=True):
'''Basic gradient ascent step.'''
# image = feed_dict[x_placeholder]
# src = net.blobs['data'] # input image is stored in Net's 'data' blob
# dst = net.blobs[end]
ox, oy = np.random.randint(-jitter, jitter+1, 2)
image = np.roll(np.roll(image, ox, -1), oy, -2) # apply jitter shift
feed_dict = { x_placeholder: image}
sess.run(layer, feed_dict=feed_dict)
# objective(dst) # specify the optimization objective
# net.backward(start=end)
g = grad.eval(feed_dict=feed_dict, session=sess)
# g = src.diff[0]
# apply normalized ascent step to the input image
image += step_size/np.abs(g).mean() * g
image = np.roll(np.roll(image, -ox, -1), -oy, -2) # unshift image
if clip:
# bias = net.transformer.mean['data']
image[:] = np.clip(image, -bias, 255-bias)
return image
Next we implement an ascent through different scales. We call these scales "octaves".
def deepdream(sess, x_placeholder, base_img, bias, grad, layer,
iter_n=10, octave_n=4, octave_scale=1.4,
clip=True, **step_params):
for i in xrange(iter_n):
base_img = make_step(x_placeholder, base_img, bias, grad, layer, sess,
clip=clip, **step_params)
showarray(np.reshape(base_img+bias,(227,227,3)))
clear_output(wait=True)
return base_img
Now we are ready to let the neural network reveal its dreams! Let's take a cloud image as a starting point:
img = np.float32(PIL.Image.open('deepdream/sky1024px.jpg'))
img = imresize(img, (227,227))
showarray(img)
img = np.float32(PIL.Image.open('deepdream/sky1024px.jpg'))
img = imresize(img, (227,227))
showarray(img)
bias = np.array([104.0, 116.0, 122.0])
img = img - bias
img_input = np.reshape(img, (1,227,227,3))
_=deepdream(sess, x_placeholder, img_input, bias, grad_c3, conv3, 500)
img = np.float32(PIL.Image.open('deepdream/sky1024px.jpg'))
img = imresize(img, (227,227))
showarray(img)
bias = np.array([104.0, 116.0, 122.0])
img = img - bias
img_input = np.reshape(img, (1,227,227,3))
_=deepdream(sess, x_placeholder, img_input, bias, grad_p5, maxpool5, 500)