00001
00002 #include <LEDA/d_array.h>
00003 #include <LEDA/graph_alg.h>
00004 #include "SmartArray.h"
00005 #include "Emd.h"
00006 #include "CompareHistograms.h"
00007
00017 double CompareHistograms(const SmartArray<double> hist_values1, const SmartArray<double> weights1,
00018 const SmartArray<double> hist_values2, const SmartArray<double> weights2){
00019
00020
00021 graph G[2];
00022 node root;
00023 node_array <double> node_weight[2];
00024 edge_array <double> edge_weight[2];
00025 double emd_return;
00026 Matrix embedded[2];
00027 int ncols[2];
00028 int cdim[2];
00029 node_array <int> column_map[2];
00030 ColumnVector **gvec[2];
00031 double *n_weight[2];
00032 ColumnVector mu[2];
00033 double sigma[2];
00034
00035
00036
00037 for( i = 0; i < 2; i++ ) {
00038 if (i==0)
00039 embedded[i] = Get_Graph(G[i], node_weight[i], column_map[i], hist_values1, weights1);
00040 if (i==1)
00041 embedded[i] = Get_Graph(G[i], node_weight[i], column_map[i], hist_values2, weights2);
00042
00043 ncols[i] = embedded[i].Ncols();
00044 cdim[i] = embedded[i].Nrows();
00045
00046
00047 double orig_weight[ncols[i]];
00048 node v;
00049 forall_nodes( v, G[i] ) {
00050 column_map[i][v] -= 1;
00051 orig_weight[column_map[i][v]] = node_weight[i][v];
00052 }
00053
00054
00055 n_weight[i] = (double *) calloc( ncols[i], sizeof(double) );
00056 normalize_weights( orig_weight, ncols[i], n_weight[i] );
00057
00058
00059 gvec[i] = (ColumnVector **) calloc( ncols[i], sizeof(ColumnVector *) );
00060 for( int j=0; j<ncols[i]; j++ )
00061 gvec[i][j] = new ColumnVector( embedded[i].Column(j+1) );
00062
00063
00064 normalize_column_vectors(
00065 gvec[i], ncols[i], n_weight[i], mu[i], sigma[i] );
00066 }
00067
00068
00069 int min_dim = 3;
00070
00071
00072 signature_t s[2] = { {ncols[0], gvec[0], n_weight[0]},
00073 {ncols[1], gvec[1], n_weight[1]}};
00074 int flow_size;
00075 flow_t flow[ncols[0]+ncols[1]-1];
00076
00077
00078 d_array <int, ColumnVector *> X;
00079 d_array <int, ColumnVector *> Y;
00080 d_array <int, double> pair_weight;
00081
00082
00083 double old_emd_value = 999999.9;
00084 double curr_emd_value = old_emd_value - 1;
00085 while( curr_emd_value < old_emd_value ) {
00086
00087 old_emd_value = curr_emd_value;
00088 curr_emd_value = emd( s, s+1, vector_dist, flow, &flow_size );
00089 emd_return = curr_emd_value;
00090
00091
00092 X.clear(); Y.clear(); pair_weight.clear();
00093 for( int k=0; k<flow_size; k++ ) {
00094 X[k] = gvec[0][flow[k].from];
00095 Y[k] = gvec[1][flow[k].to];
00096 pair_weight[k] = flow[k].amount;
00097 }
00098
00099
00100 double error = least_squares_estimation( X, Y, pair_weight )/flow_size;
00101 }
00102
00103 return emd_return;
00104 }
00105
00106
00107
00108
00109 Matrix GetGraph(graph &G,node_array <double> &node_weight, node_array <int> &column_map,
00110 const SmartArray<double> hist_values, const SmartArray<double> weights){
00111
00112 int number, i, node1, node2, root_number, mass_no=0, segment;
00113 node v, a, b; edge e;
00114 double mass, weight, x, y, z;
00115 char node_type;
00116
00117 number = hist_values.GetSize();
00118
00119 for (i=0; i<number; i++)
00120 G.new_node();
00121
00122
00123 double *Node_Mass = (double*)malloc(number * sizeof(double));
00124 if (Node_Mass == NULL){
00125 cerr << "Not enough memory !" << endl;
00126 exit(-1);
00127 }
00128
00129 Matrix Vector_l2(3, G.number_of_nodes());
00130
00131 for (i=0; i<number; i++){
00132 Node_Mass[i] = weights[i];
00133 Vector_l2(1,i+1) = hist_values[i];
00134 Vector_l2(2,i+1) = 0;
00135 Vector_l2(3,i+1) = 0;
00136 }
00137
00138 node_array<double> node_mass;
00139 node_array<int> node_label;
00140 node_label.init(G);
00141 node_mass.init(G);
00142 node_weight.init(G);
00143 column_map.init(G);
00144 int in = 0;
00145 forall_nodes(v,G) {
00146 node_label[v] = in;
00147 node_mass[v] = Node_Mass[in];
00148 node_weight[v] = Node_Mass[in];
00149 column_map[v] = in + 1;
00150 in++;
00151 }
00152
00153 return Vector_l2;
00154 }
00155
00156
00157 float vector_dist( ColumnVector **v1, ColumnVector **v2 ) {
00158 return (float) sqrt( (*(*v1) - *(*v2)).SumSquare() );
00159 }
00160
00161
00162 void normalize_weights( double weight[], int nv, double normed_weight[] ) {
00163
00164 double weight_sum = 0;
00165 for( int i=0; i<nv; i++ )
00166 weight_sum += weight[i];
00167 for( int i=0; i<nv; i++ )
00168 normed_weight[i] = weight[i]/weight_sum;
00169 }
00170
00171
00172
00176 void normalize_column_vectors( ColumnVector *mcol[], int nc,
00177 double normed_weight[], ColumnVector &mu, double &sigma ){
00178 int nr = (*mcol[0]).Nrows();
00179
00180
00181 mu = ColumnVector(nr);
00182 mu *= 0;
00183 for( int i=0; i<nc; i++ )
00184 mu += normed_weight[i]*(*mcol[i]);
00185
00186
00187 for( int i=0; i<nc; i++ )
00188 *mcol[i] -= mu;
00189
00190
00191 sigma = 0;
00192 for( int i=0; i<nc; i++ )
00193 sigma += (double) normed_weight[i]*((*mcol[i]).SumSquare());
00194 sigma = sqrt(sigma);
00195
00196
00197 for( int i=0; i<nc; i++ )
00198 (*mcol[i]) /= sigma;
00199
00200 #ifdef _DEBUG_
00201 cerr << "computed sigma: " << sigma << endl;
00202 cerr << "computed mu: " << mu.t();
00203 #endif
00204 }
00205
00206
00211 double least_squares_estimation(
00212 d_array <int, ColumnVector *> &X, d_array <int, ColumnVector *> &Y,
00213 d_array <int, double> &normed_weight ) {
00214
00215 int ncols = X.size();
00216 int nrows = X[0]->Nrows();
00217
00218 #ifdef _DEBUG_
00219
00220 double init_error = 0;
00221 for( int i=0; i<ncols; i++ ) {
00222 init_error += (double) normed_weight[i]*sqrt( ((*X[i])-(*Y[i])).SumSquare() );
00223 }
00224 cerr << "initial error in LSET: " << init_error << endl;
00225 #endif
00226
00227
00228 Matrix Sigma_XY( nrows, nrows );
00229 Sigma_XY *= 0;
00230 for( int i=0; i<ncols; i++ ) {
00231 Sigma_XY += (normed_weight[i]*(*Y[i]))*(*X[i]).t();
00232 }
00233
00234
00235 Matrix U(nrows, nrows), V(nrows, nrows);
00236 DiagonalMatrix D(nrows);
00237 SVD( Sigma_XY, D, U, V );
00238
00239
00240 Matrix R = U * V.t();
00241
00242
00243
00244 d_array <long, ColumnVector *> ptr_map;
00245 for( int i=0; i<ncols; i++ ) {
00246 if( !ptr_map.defined( (long) X[i] ) ) {
00247 *X[i] = R * (*X[i]);
00248 ptr_map[(long) X[i]] = X[i];
00249 }
00250 }
00251
00252
00253 double error = 0;
00254 for( int i=0; i<ncols; i++ ) {
00255 error += (double) normed_weight[i]*sqrt( ((*X[i])-(*Y[i])).SumSquare() );
00256 }
00257
00258 #ifdef _DEBUG_
00259 cerr << "final error in LSET: " << error << endl;
00260 #endif
00261
00262 return error;
00263 }
00264