/* lin-1.c: Robust linear method for regression.  
 *
 * Reads training examples from "train.n", test inputs from "test.n" and
 * targets from "targets.n".  Produces point predictions in "cguess.n" and
 * densities of targets under a predictive distribution in "clptarg.L.n". Here
 * "n" is the instance number, supplied as a command argument.  Handles badly
 * conditioned cases where inputs are (close to) linearly dependent.
 *
 * (c) Copyright 1996 by Carl Edward Rasmussen. */

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include "util.h"

#define MAX_SV_RATIO 1.0e6       /* Maximum allowed ratio of singular values */
#define two_pi 6.28318530717959

extern void svd(real **A, real *S2, int n);   /* sigular value decomposition */
static real f(real *x, real *w, int length);    /* linear function with bias */

main(int argc, char **argv)
{
  int  i, j, k, no_inp, no_tar, num_fit = 0;
  char df[20], df2[20];                        /* strings to hold file names */
  real **A, **w, **b, *S2, *c, tmp, *sigma2, sig2, mu;
  FILE *fp;
  struct exampleset train, test; 

  if (argc != 2) {
    fprintf(stderr, "Usage: %s instance-number\n", argv[0]); exit(-1);
  }

  train.num = test.num = no_inp = no_tar = -1;      /* default for "unknown" */
  sprintf(df, "test.%s", argv[1]);                    /* name of test inputs */
  sprintf(df2, "targets.%s", argv[1]);               /* name of test targets */
  loadExamples(&test, &no_inp, &no_tar, df, df2);  
  sprintf(df, "train.%s", argv[1]);                 /* name of training file */
  loadExamples(&train, &no_inp, &no_tar, df, NULL);

  A  = createMatrix(2*(no_inp+1), no_inp+1);   /* double size for svd() call */
  b  = createMatrix(no_tar, no_inp+1);
  w  = createMatrix(no_tar, no_inp+1);
  S2 = (real *) malloc((size_t) (no_inp+1)*sizeof(real));
  c  = (real *) malloc((size_t) (no_inp+1)*sizeof(real));
  sigma2 = (real *) malloc((size_t) no_tar*sizeof(real));
  
/* Construct the A matrix; since A is known to be symmetric, we only compute
 * the upper triangular matrix elements and place them symmetrically. Don't
 * forget the bias inputs. */

  for (i=0; i<no_inp; i++) { 
    for (j=i; j<no_inp; j++) {
      for (tmp=0.0, k=0; k<train.num; k++)
        tmp += train.inp[k][i]*train.inp[k][j];
      A[i][j] = A[j][i] = tmp;
    }
    for (tmp=0.0, k=0; k<train.num; k++)    /* contribution from bias inputs */
      tmp += train.inp[k][i];
    A[i][j] = A[j][i] = tmp;
  }
  A[i][j] = train.num;                                     /* corner element */

/* Construct b matrix. If there is only a single target, then b is a vector,
 * but is implemented as a matrix with a single row. */

  for (j=0; j<no_tar; j++) {
    for (i=0; i<no_inp; i++) {
      for (tmp=0.0, k=0; k<train.num; k++)
        tmp += train.inp[k][i]*train.tar[k][j]; 
      b[j][i] = tmp;
    } 
    for (tmp=0.0, k=0; k<train.num; k++)    /* contribution from bias inputs */
      tmp += train.tar[k][j];
    b[j][i] = tmp;
  }

/* Do singular value decomposition of A = USV'; on return, the first no_inp+1
 * rows of A contain the product US and the remaining rows contain V (not V');
 * S2 contains the square of the singular values ordered with the largest
 * first. We "invert" S2, zeroing when S2[i] < sqrt(S[0])/MAX_SV_RATIO; Then we
 * compute invA = V*invS2*(US)' one row at a time and store them in the lower
 * half of A. Lastly, compute w = invA*b.' */

  svd(A, S2, no_inp+1);
  for (i=no_inp; i>=0; i--)                                   /* "invert" S2 */
    if (S2[i]*sq(MAX_SV_RATIO) > S2[0]) {                /* SV large enough? */
      num_fit++;
      S2[i] = 1.0/S2[i];
    }
    else
      S2[i] = 0.0;                                       /* delete direction */
  
  for (i=0; i<=no_inp; i++) {                /* compute invA = V*invS2*(US)' */
    for (j=0; j<=no_inp; j++) {
      for (tmp=0.0, k=0; k<=no_inp; k++)
        tmp += A[i+no_inp+1][k]*A[j][k]*S2[k];
      c[j] = tmp;
    }
    for (j=0; j<=no_inp; j++) A[i+no_inp+1][j] = c[j];  /* copy "c" into "A" */
  }

  for (k=0; k<no_tar; k++)                             /* compute w = invA*b */
    for (i=0; i<=no_inp; i++) {
      for (tmp=0.0, j=0; j<=no_inp; j++)
        tmp += A[i+no_inp+1][j]*b[k][j];
      w[k][i] = tmp;
    }

/* Produce point predictions for the test cases and write them to the "cguess"
 * file; one line per example and no_tar predictions per line. */

  fp = openPredFile("cguess.%s", argv[1]);     /* file for point predictions */
  for (k=0; k<test.num; k++) {        /* make predictions for all test cases */
    for (j=0; j<no_tar; j++)                              /* for each output */
      fprintf(fp, "%f ", f(test.inp[k], w[j], no_inp));
    fprintf(fp,"\n");
  }
  fclose(fp);

/* If train.num <= no_inp+1 we cannot produce a reasonable predictive
 * distribution; otherwise, the log density of the targets under the Gaussian
 * predictive distribution are are written to "clptarg" files. */

  if (train.num <= no_inp+1)            /* are there too few training cases? */
    fprintf(stderr, "Warning: No \"clptarg.L\" files produced",
                    " - too few training examples...\n"); 
  else {
    fp = openPredFile("clptarg.L.%s", argv[1]);
    for (j=0; j<no_tar; j++) {       /* estimate noise level for each target */
      for (tmp=0.0, k=0; k<train.num; k++) 
        tmp += sq(f(train.inp[k], w[j], no_inp)-train.tar[k][j]);
      sigma2[j] = tmp/(train.num-num_fit);
    }

    for (k=0; k<test.num; k++) {      /* make predictions for all test cases */
      for (i=0; i<=no_inp; i++) 
        c[i] = f(test.inp[k], A[i+no_inp+1], no_inp);
      sig2 = f(test.inp[k], c, no_inp);       /* noise from uncertainty in w */
      for (tmp=0.0, i=0; i<no_tar; i++) 
        tmp -= log(two_pi*(sig2+sigma2[i]))+sq(f(test.inp[k], w[i], no_inp)-
                                              test.tar[k][i])/(sig2+sigma2[i]);
      fprintf(fp, "%f\n", 0.5*tmp);
    }
    fclose(fp);
  }
  free(A[0]); free(A); free(w[0]); free(w); free(b[0]); free(b);
  free(S2); free(c); free(sigma2);
}

static real f(real *x, real *w, int length)     /* linear function with bias */
{
  int  i;
  real tmp = 0.0; 

  for (i=0; i<length; i++) tmp += x[i]*w[i];
  return tmp+w[i];                                           /* add the bias */
}
