"""STA314 Homework 4.

Copyright and Usage Information
===============================

This file is provided solely for the personal and private use of students
taking STA314 at the University of Toronto St. George campus. All forms of
distribution of this code, whether as given or with any changes, are
expressly prohibited.
"""


from utils import *

import matplotlib.pyplot as plt
import scipy.linalg as lin
import numpy as np


def pca(x, k):
    """ PCA algorithm. Given the data matrix x and k,
    return the eigenvectors, mean of x, and the projected data (code vectors).

    Hint: You may use NumPy or SciPy to compute the eigenvectors/eigenvalues.

    Arguments
        x: A matrix with dimension N x D, where each row corresponds to one data point.
        k: int representing the number of dimension to reduce to.

    Returns
        v: A matrix of dimension D x k that stores top k eigenvectors
        mean: A vector of dimension D that represents the mean of x.
        proj_x: A matrix of dimension N x k where x is projected down to k dimension.
    """
    n, d = x.shape

    # == your code goes here ==
    # ====
    return v, mean, proj_x


def show_eigenvectors(v):
    """ Display the eigenvectors as images.

    Arguments
        v: A matrix of dimension D x k that stores top k eigenvectors
    """
    plt.figure(1)
    plt.clf()
    for i in range(v.shape[1]):
        plt.subplot(1, v.shape[1], i + 1)
        plt.imshow(v[:,i].reshape(16, 16).T, cmap=plt.cm.gray)
    plt.show()


def pca_classify():
    # Load all necessary datasets:
    x_train, y_train = load_train()
    x_valid, y_valid = load_valid()
    x_test, y_test = load_test()

    # Make sure the PCA algorithm is correctly implemented.
    v, mean, proj_x = pca(x_train, 5)
    # The below code visualize the eigenvectors.
    show_eigenvectors(v)

    #####################################################################
    # TODO: COMPLETE THIS SECTION                                                            #
    #####################################################################
    k_lst = [2, 5, 10, 20, 30]
    val_accuracy = np.zeros(len(k_lst))
    for j, k in enumerate(k_lst):
        # == your code goes here ==
        # ====

        for i in range(x_valid.shape[0]):
            # For each validation sample, perform 1-NN classifier on
            # the training code vector. Use Euclidean distance in
            # the code vector space.
            # == your code goes here ==
            # ====

        # Update the val_accuracy accumulator
        # == your code goes here ==
        # ====
    #####################################################################
    #                       END OF YOUR CODE                            #
    #####################################################################
    plt.plot(k_lst, val_accuracy)
    plt.title("Validation Accuracy of 1-NN on top PCs")
    plt.xlabel("Number of principal components")
    plt.ylabel("Validation accuracy")
    plt.show()


if __name__ == "__main__":
    pca_classify()
