Deep Dream (with TensorFlow)¶

# imports and basic notebook setup
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
from IPython.display import clear_output, Image, display
from google.protobuf import text_format
import pylab as pl
from scipy.misc import imread, imresize
import tensorflow as tf

from caffe_classes import class_names

def showarray(a, fmt='jpeg'):
    a = np.uint8(np.clip(a, 0, 255))
    f = StringIO()
    PIL.Image.fromarray(a).save(f, fmt)
    display(Image(data=f.getvalue()))

train_x = np.zeros((1, 227, 227, 3)).astype(np.float32)    
train_y = np.zeros((1, 1000))    
xdim = train_x.shape[1:]    
ydim = train_y.shape[1]    
     
     
#####################################################################
# Read Image     
     
x_dummy = (np.random.random((1,) + xdim) / 255.).astype(np.float32)    
i = x_dummy.copy()    
i[0, :, :, :] = (imread("poodle.png")[:, :, :3]).astype(np.float32) 
mean_image = np.mean(i)
image_input = i - np.mean(i)

Loading DNN model¶

net_data = pl.load("bvlc_alexnet.npy").item()


def conv(input, kernel, biases, k_h, k_w, c_o, s_h, s_w,  
         padding="VALID",
         group=1):
    '''From https://github.com/ethereon/caffe-tensorflow             
    '''                            
    c_i = input.get_shape()[-1]
    assert c_i % group == 0
    assert c_o % group == 0                             
         
    def convolve(i, k):                                              
        return tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
    # convolve = lambda i, k: tf.nn.conv2d(       
    #    i, k, [1, s_h, s_w, 1], padding=padding)
                                          
    if group == 1:                        
        conv = convolve(input, kernel)                       
    else:                                         
        input_groups = tf.split(3, group, input)  
        kernel_groups = tf.split(3, group, kernel)
        output_groups = [convolve(i, k)                              
                         for i, k in zip(input_groups, kernel_groups)]
        conv = tf.concat(3, output_groups)                           
    return tf.reshape(tf.nn.bias_add(conv, biases), 
                      conv.get_shape().as_list())
            

x_placeholder = tf.placeholder(tf.float32, shape=image_input.shape,
                               name='input')
# x = tf.Variable(i, name='input')

# conv1
# conv(11, 11, 96, 4, 4, padding='VALID', name='conv1')
k_h = 11
k_w = 11
c_o = 96
s_h = 4
s_w = 4
conv1W = tf.Variable(net_data["conv1"][0])
conv1b = tf.Variable(net_data["conv1"][1])
conv1_in = conv(x_placeholder, conv1W, conv1b, k_h, k_w, c_o,
                s_h, s_w, padding="SAME", group=1)
conv1 = tf.nn.relu(conv1_in)

# lrn1
# lrn(2, 2e-05, 0.75, name='norm1')
radius = 2
alpha = 2e-05
beta = 0.75
bias = 1.0
lrn1 = tf.nn.local_response_normalization(conv1,
                                          depth_radius=radius,
                                          alpha=alpha,
                                          beta=beta,
                                          bias=bias)

# maxpool1
# max_pool(3, 3, 2, 2, padding='VALID', name='pool1')
k_h = 3
k_w = 3
s_h = 2
s_w = 2
padding = 'VALID'
maxpool1 = tf.nn.max_pool(lrn1, ksize=[1, k_h, k_w, 1], strides=[
                          1, s_h, s_w, 1], padding=padding)

# conv2                                                                          
# conv(5, 5, 256, 1, 1, group=2, name='conv2')                                   
k_h = 5                                                                          
k_w = 5                                                                          
c_o = 256                                                                        
s_h = 1                                                                          
s_w = 1                                                                          
group = 2                                                                        
conv2W = tf.Variable(net_data["conv2"][0])                                       
conv2b = tf.Variable(net_data["conv2"][1])                                       
conv2_in = conv(maxpool1, conv2W, conv2b, k_h, k_w, c_o,                         
                s_h, s_w, padding="SAME", group=group)                           
conv2 = tf.nn.relu(conv2_in)                                                     
                                                                                 
                                                                                 
# lrn2                                                                           
# lrn(2, 2e-05, 0.75, name='norm2')                                              
radius = 2                                                                       
alpha = 2e-05                                                                    
beta = 0.75                                                                      
bias = 1.0                                                                       
lrn2 = tf.nn.local_response_normalization(conv2,                                 
                                          depth_radius=radius,                   
                                          alpha=alpha,                           
                                          beta=beta,                             
                                          bias=bias)                             
                                                                                 
# maxpool2                                                                       
# max_pool(3, 3, 2, 2, padding='VALID', name='pool2')                            
k_h = 3                                                                          
k_w = 3                                                                          
s_h = 2                                                                          
s_w = 2                                                                          
padding = 'VALID'                                                                
maxpool2 = tf.nn.max_pool(lrn2, ksize=[1, k_h, k_w, 1], strides=[                
                          1, s_h, s_w, 1], padding=padding) 
# conv3                                                                          
# conv(3, 3, 384, 1, 1, name='conv3')                                            
k_h = 3                                                                          
k_w = 3                                                                          
c_o = 384                                                                        
s_h = 1                                                                          
s_w = 1                                                                          
group = 1                                                                        
conv3W = tf.Variable(net_data["conv3"][0])                                       
conv3b = tf.Variable(net_data["conv3"][1])                                       
conv3_in = conv(maxpool2, conv3W, conv3b, k_h, k_w, c_o,                         
                s_h, s_w, padding="SAME", group=group)                           
conv3 = tf.nn.relu(conv3_in)                                                     
                                                                                 
# conv4                                                                          
# conv(3, 3, 384, 1, 1, group=2, name='conv4')                                   
k_h = 3                                                                          
k_w = 3                                                                          
c_o = 384                                                                        
s_h = 1                                                                          
s_w = 1                                                                          
group = 2                                                                        
conv4W = tf.Variable(net_data["conv4"][0])                                       
conv4b = tf.Variable(net_data["conv4"][1])                                       
conv4_in = conv(conv3, conv4W, conv4b, k_h, k_w, c_o,                            
                s_h, s_w, padding="SAME", group=group)                           
conv4 = tf.nn.relu(conv4_in)                                                     
                                                                                 
                                                                                 
# conv5                                                                          
# conv(3, 3, 256, 1, 1, group=2, name='conv5')                                   
k_h = 3                                                                          
k_w = 3                                                                          
c_o = 256                                                                        
s_h = 1                                                                          
s_w = 1                                                                          
group = 2                                                                        
conv5W = tf.Variable(net_data["conv5"][0])                                       
conv5b = tf.Variable(net_data["conv5"][1])                                       
conv5_in = conv(conv4, conv5W, conv5b, k_h, k_w, c_o,                            
                s_h, s_w, padding="SAME", group=group)                           
conv5 = tf.nn.relu(conv5_in)          
# maxpool5                                                                       
# max_pool(3, 3, 2, 2, padding='VALID', name='pool5')                            
k_h = 3                                                                          
k_w = 3                                                                          
s_h = 2                                                                          
s_w = 2                                                                          
padding = 'VALID'                                                                
maxpool5 = tf.nn.max_pool(conv5, ksize=[1, k_h, k_w, 1], strides=[               
                          1, s_h, s_w, 1], padding=padding)                      
                                                                                 
# fc6                                                                            
# fc(4096, name='fc6')                                                           
fc6W = tf.Variable(net_data["fc6"][0])                                           
fc6b = tf.Variable(net_data["fc6"][1])                                           
fc6 = tf.nn.relu_layer(tf.reshape(                                               
    maxpool5, [1, int(np.prod(maxpool5.get_shape()[1:]))]), fc6W, fc6b)          
                                                                                 
# fc7                                                                            
# fc(4096, name='fc7')                                                           
fc7W = tf.Variable(net_data["fc7"][0])                                           
fc7b = tf.Variable(net_data["fc7"][1])                                           
fc7 = tf.nn.relu_layer(fc6, fc7W, fc7b, name='fc7')                              
                                                                                 
# fc8                                                                            
# fc(1000, relu=False, name='fc8')                                               
fc8W = tf.Variable(net_data["fc8"][0])                                           
fc8b = tf.Variable(net_data["fc8"][1])                                           
fc8 = tf.nn.xw_plus_b(fc7, fc8W, fc8b)                                           
                                                                                 
# prob                                                                           
# softmax(name='prob'))                                                          
prob = tf.nn.softmax(fc8)                                                        
grad_p5 = tf.gradients(maxpool5, x_placeholder,grad_ys=maxpool5)[0] 
              
grad_c3 = tf.gradients(conv3, x_placeholder,grad_ys=conv3)[0]
init = tf.initialize_all_variables()

sess = tf.InteractiveSession()                                                              
sess.run(init)                                                                   
                                                                                 
                                                                                 
feed_dict = {x_placeholder: image_input}                                         
output = sess.run(prob, feed_dict=feed_dict)

inds = np.argsort(output)[0, :]                                                  
for i in range(5):                                                               
    print class_names[inds[-1 - i]], output[0, inds[-1 - i]]

miniature poodle 0.389457
toy poodle 0.223092
Bedlington terrier 0.173047
standard poodle 0.149645
komondor 0.0258335

Producing dreams¶

Making the "dream" images is very simple. Essentially it is just a gradient ascent process that tries to maximize the L2 norm of activations of a particular DNN layer. Here are a few simple tricks that we found useful for getting good images:

offset image by a random jitter
normalize the magnitude of gradient ascent steps
apply ascent across multiple scales (octaves) (Not yet!)

First we implement a basic gradient ascent step function, applying the first two tricks:

def make_step(x_placeholder, image, bias,grad, layer, sess, 
              step_size=1.5,  
              jitter=32, clip=True):
    '''Basic gradient ascent step.'''

#     image = feed_dict[x_placeholder]
#     src = net.blobs['data'] # input image is stored in Net's 'data' blob
#     dst = net.blobs[end]

    ox, oy = np.random.randint(-jitter, jitter+1, 2)
    image = np.roll(np.roll(image, ox, -1), oy, -2) # apply jitter shift
    feed_dict = { x_placeholder: image}
    sess.run(layer, feed_dict=feed_dict)
#     objective(dst)  # specify the optimization objective
#     net.backward(start=end)                                       
    g = grad.eval(feed_dict=feed_dict, session=sess)   
#     g = src.diff[0]
    # apply normalized ascent step to the input image
    image += step_size/np.abs(g).mean() * g

    image = np.roll(np.roll(image, -ox, -1), -oy, -2) # unshift image
            
    if clip:
#         bias = net.transformer.mean['data']
        image[:] = np.clip(image, -bias, 255-bias)    
    return image

Next we implement an ascent through different scales. We call these scales "octaves".

def deepdream(sess, x_placeholder, base_img, bias, grad, layer,
              iter_n=10, octave_n=4, octave_scale=1.4, 
              clip=True, **step_params):

    for i in xrange(iter_n):
        base_img = make_step(x_placeholder, base_img, bias, grad, layer, sess,
                      clip=clip, **step_params)
        showarray(np.reshape(base_img+bias,(227,227,3)))

        clear_output(wait=True)
    return base_img

Now we are ready to let the neural network reveal its dreams! Let's take a cloud image as a starting point:

img = np.float32(PIL.Image.open('deepdream/sky1024px.jpg'))
img = imresize(img, (227,227))
showarray(img)

img = np.float32(PIL.Image.open('deepdream/sky1024px.jpg'))
img = imresize(img, (227,227))
showarray(img)
bias = np.array([104.0, 116.0, 122.0])
img = img - bias
img_input = np.reshape(img, (1,227,227,3))
_=deepdream(sess, x_placeholder, img_input, bias, grad_c3, conv3, 500)

img = np.float32(PIL.Image.open('deepdream/sky1024px.jpg'))
img = imresize(img, (227,227))
showarray(img)
bias = np.array([104.0, 116.0, 122.0])
img = img - bias
img_input = np.reshape(img, (1,227,227,3))
_=deepdream(sess, x_placeholder, img_input, bias, grad_p5, maxpool5, 500)