Main Page   Namespace List   Class Hierarchy   Compound List   File List   Namespace Members   Compound Members   File Members   Related Pages  

CompareHistogram.cpp

00001 
00002 #include <LEDA/d_array.h>
00003 #include <LEDA/graph_alg.h>
00004 #include "SmartArray.h"
00005 #include "Emd.h"
00006 #include "CompareHistograms.h"
00007 
00017 double CompareHistograms(const SmartArray<double> hist_values1, const SmartArray<double> weights1,
00018                                 const SmartArray<double> hist_values2, const SmartArray<double> weights2){
00019 
00020  // variables to fill later.
00021  graph G[2];
00022  node root;
00023  node_array <double> node_weight[2];
00024  edge_array <double> edge_weight[2];
00025  double emd_return;
00026  Matrix embedded[2];
00027  int ncols[2];
00028  int cdim[2];
00029  node_array <int> column_map[2]; // a mapping of nodes into matrix columns.
00030  ColumnVector **gvec[2]; // column vectors corresponding to matrix columns.
00031  double *n_weight[2]; // normalized node weights (to sum up to 1).
00032  ColumnVector mu[2];
00033  double sigma[2];
00034 
00035          
00036  // read in two histograms.
00037  for( i = 0; i < 2; i++ ) {
00038          if (i==0)
00039                  embedded[i] = Get_Graph(G[i], node_weight[i], column_map[i], hist_values1, weights1);
00040          if (i==1)
00041                  embedded[i] = Get_Graph(G[i], node_weight[i], column_map[i], hist_values2, weights2);
00042          
00043          ncols[i] = embedded[i].Ncols();
00044          cdim[i] = embedded[i].Nrows();
00045             
00046          // copy weights into a column-weight array.
00047          double orig_weight[ncols[i]];
00048          node v;
00049          forall_nodes( v, G[i] ) {
00050                  column_map[i][v] -= 1;
00051                  orig_weight[column_map[i][v]] = node_weight[i][v];
00052          }
00053 
00054          // normalize column-weights.
00055          n_weight[i] = (double *) calloc( ncols[i], sizeof(double) );
00056          normalize_weights( orig_weight, ncols[i], n_weight[i] );
00057 
00058          // copy the matrix into an array of column vectors -- for "emd.c".
00059          gvec[i] = (ColumnVector **) calloc( ncols[i], sizeof(ColumnVector *) );
00060          for( int j=0; j<ncols[i]; j++ )
00061                  gvec[i][j] = new ColumnVector( embedded[i].Column(j+1) );
00062 
00063          // normalize column-vectors.
00064          normalize_column_vectors(
00065                          gvec[i], ncols[i], n_weight[i], mu[i], sigma[i] );
00066  }
00067 
00068         // int min_dim = DMIN( cdim[0], cdim[1] );
00069         int min_dim = 3;
00070 
00071         // set up structures for emd.
00072         signature_t s[2] = { {ncols[0], gvec[0], n_weight[0]},
00073                 {ncols[1], gvec[1], n_weight[1]}};
00074         int flow_size;  
00075         flow_t flow[ncols[0]+ncols[1]-1];
00076 
00077         // set up structures for least squares.
00078         d_array <int, ColumnVector *> X;
00079         d_array <int, ColumnVector *> Y;
00080         d_array <int, double> pair_weight;
00081 
00082         // set up emd/lset iteration.
00083         double old_emd_value = 999999.9;
00084         double curr_emd_value = old_emd_value - 1;
00085         while( curr_emd_value < old_emd_value ) {
00086                 // call emd.
00087                 old_emd_value = curr_emd_value;
00088                 curr_emd_value = emd( s, s+1, vector_dist, flow, &flow_size );
00089                 emd_return = curr_emd_value;
00090 
00091                 // set up LSET.
00092                 X.clear(); Y.clear(); pair_weight.clear();
00093                 for( int k=0; k<flow_size; k++ ) {
00094                         X[k] = gvec[0][flow[k].from];
00095                         Y[k] = gvec[1][flow[k].to];
00096                         pair_weight[k] = flow[k].amount;
00097                 }
00098 
00099                 // call least squares.
00100                 double error = least_squares_estimation( X, Y, pair_weight )/flow_size;
00101         }
00102                 
00103  return emd_return;     
00104 }
00105 
00106 
00107 /**************************************************************************************/
00108 
00109 Matrix GetGraph(graph &G,node_array <double> &node_weight, node_array <int> &column_map,
00110                 const SmartArray<double> hist_values, const SmartArray<double> weights){
00111 
00112  int number, i, node1, node2, root_number, mass_no=0, segment;
00113  node v, a, b; edge e;
00114  double mass, weight, x, y, z;
00115  char node_type;
00116 
00117  number = hist_values.GetSize();
00118 
00119  for (i=0; i<number; i++)      //Generating a graph with n nodes and 0 edges.
00120        G.new_node();
00121 
00122 
00123   double *Node_Mass = (double*)malloc(number * sizeof(double));  //Stores the Weight for each node
00124   if (Node_Mass == NULL){
00125           cerr << "Not enough memory !" << endl;
00126           exit(-1);
00127   }
00128 
00129   Matrix Vector_l2(3, G.number_of_nodes()); //There are only x,y,z coordinates
00130 
00131   for (i=0; i<number; i++){
00132           Node_Mass[i] = weights[i];
00133           Vector_l2(1,i+1) = hist_values[i];
00134           Vector_l2(2,i+1) = 0;
00135           Vector_l2(3,i+1) = 0;
00136   }
00137 
00138   node_array<double> node_mass;
00139   node_array<int> node_label;
00140   node_label.init(G);
00141   node_mass.init(G);
00142   node_weight.init(G);
00143   column_map.init(G);
00144   int in = 0;
00145   forall_nodes(v,G) {
00146     node_label[v] = in;
00147     node_mass[v] = Node_Mass[in];
00148     node_weight[v] = Node_Mass[in];
00149     column_map[v] = in + 1;
00150     in++;
00151   }
00152 
00153  return Vector_l2;
00154 }
00155 
00156 // distance defined on pairs of column vectors.
00157 float vector_dist( ColumnVector **v1, ColumnVector **v2 ) {
00158         return (float) sqrt( (*(*v1) - *(*v2)).SumSquare() );
00159 }
00160 
00161 // normalizing weights to sum up to 1.
00162 void normalize_weights( double weight[], int nv, double normed_weight[] ) {
00163         // compute sum of weights and normalize them.
00164         double weight_sum = 0;
00165         for( int i=0; i<nv; i++ )
00166                 weight_sum += weight[i];
00167         for( int i=0; i<nv; i++ )
00168                 normed_weight[i] = weight[i]/weight_sum;
00169 }
00170 
00171 //  "normalizing" a set of column vectors by finding their weighted average,
00172 //  shifting column-vectors to make average 0, and dividing by the deviation.
00176 void normalize_column_vectors( ColumnVector *mcol[], int nc,
00177                 double normed_weight[], ColumnVector &mu, double &sigma ){
00178         int nr = (*mcol[0]).Nrows();
00179 
00180         // compute mu.
00181         mu = ColumnVector(nr);
00182         mu *= 0;
00183         for( int i=0; i<nc; i++ )
00184                 mu += normed_weight[i]*(*mcol[i]);
00185 
00186         // subtract mu from everything.
00187         for( int i=0; i<nc; i++ )
00188                 *mcol[i] -= mu;
00189 
00190         // compute sigma.
00191         sigma = 0;
00192         for( int i=0; i<nc; i++ )
00193                 sigma += (double) normed_weight[i]*((*mcol[i]).SumSquare());
00194         sigma = sqrt(sigma);
00195 
00196         // normalize by sigma.
00197         for( int i=0; i<nc; i++ )
00198                 (*mcol[i]) /= sigma;
00199 
00200 #ifdef _DEBUG_
00201         cerr << "computed sigma: " << sigma << endl;
00202         cerr << "computed mu: " << mu.t();
00203 #endif
00204 }
00205 
00206 //  Projecting X's onto Y's using linear least squares estimate.
00211 double least_squares_estimation(
00212                 d_array <int, ColumnVector *> &X, d_array <int, ColumnVector *> &Y,
00213                 d_array <int, double> &normed_weight ) {
00214 
00215         int ncols = X.size();
00216         int nrows = X[0]->Nrows();
00217 
00218 #ifdef _DEBUG_
00219         // compute initial error (debug).
00220         double init_error = 0;
00221         for( int i=0; i<ncols; i++ ) {
00222                 init_error += (double) normed_weight[i]*sqrt( ((*X[i])-(*Y[i])).SumSquare() );
00223         }
00224         cerr << "initial error in LSET: " << init_error << endl;
00225 #endif
00226 
00227         // compute Sigma_XY.
00228         Matrix Sigma_XY( nrows, nrows );
00229         Sigma_XY *= 0;
00230         for( int i=0; i<ncols; i++ ) {
00231                 Sigma_XY += (normed_weight[i]*(*Y[i]))*(*X[i]).t();
00232         }
00233 
00234         // compute SVD.
00235         Matrix U(nrows, nrows), V(nrows, nrows);
00236         DiagonalMatrix D(nrows);
00237         SVD( Sigma_XY, D, U, V );
00238 
00239         // compute R.
00240         Matrix R = U * V.t();
00241 
00242         // update X's. Since they are represented by pointers,
00243         // need to update each pointer's value once - use a d_array to check it.
00244         d_array <long, ColumnVector *> ptr_map;
00245         for( int i=0; i<ncols; i++ ) {
00246                 if( !ptr_map.defined( (long) X[i] ) ) {
00247                         *X[i] = R * (*X[i]);
00248                         ptr_map[(long) X[i]] = X[i];
00249                 }
00250         }
00251 
00252         // return the error of the estimate.
00253         double error = 0;
00254         for( int i=0; i<ncols; i++ ) {
00255                 error += (double) normed_weight[i]*sqrt( ((*X[i])-(*Y[i])).SumSquare() );
00256         }
00257 
00258 #ifdef _DEBUG_
00259         cerr << "final error in LSET: " << error << endl;
00260 #endif
00261 
00262         return error;
00263 }
00264 

Generated on Sat Nov 13 11:21:21 2004 for Noisy DAG Matcher by doxygen1.2.18