#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>

// compressor (arithmetic) for Neil's file.
// expects the bias of the coin to be from a beta distribn with params 
// defined in ACdefns.h

// usage: ACencode < filep.01.10000 > file.ACZ

// [Unlike ACencode0.c, this encoder uses integer arithmetic.
// The two encoders, which look like they should produce identical outputs,
// start producing different outputs after 30 bits.]

// (c) David J.C. MacKay
// License: GPL                                  http://www.gnu.org/copyleft/gpl.html

// Originates from:  http://www.inference.phy.cam.ac.uk/mackay/itprnn/code/c/compress/

#include "ACdefns.h"

int main( int argc , char *argv[] ){
  FILE *fp;
  int r;
  double c0=BETA0 , c1=BETA1 , cT , p0  ; // c0 and c1 are the effective counts of 0s and 1s
  long a=0, b=ONE , w , boundary ; // a and b are the bottom and top of the 
                                   //  arithmetic coding interval.
                                   //  boundary is the divider between them.

  int c , tot0 = 0 , tot1 = 0  ; 

  fp=stdin;
  r = 0 ; 
  while( (c=getc(fp)) != EOF ) {
    w = b-a ;
    cT = c0 + c1 ; 
    //    p1 = c1 / cT ;
    p0 = c0 / cT ;
    boundary = a + (long) (p0 * (double)w) ;

    // Debugging messages, modify boundary in special cases // // // // // // // 
    // check for boundary too close to a or b
    if ( b < a + 2*BMIN ) {
      boundary = a + (b-a)/2 ;
      fprintf(stderr,"warning, strange biases in input file? events are occurring that have very small probability. ");
      if ( (b-a )<4 ) {
	fprintf(stderr,"warning, total failure of algorithm likely %d %d\n" , tot0 , tot1 );
	//	exit(0);
      }
    } else {
      if ( boundary < a + BMIN ) {
	boundary = a + BMIN ;
	fprintf(stderr,"warning, strange biases in input file? events have very small probability!\n");
      }
      if ( boundary > b - BMIN ) {
	boundary = b - BMIN ;
	fprintf(stderr,"warning, strange biases in input file? events have very small probability!\n");
      }
    }
    // End Debugging messages // // // // // // // // // // // // // 

    if( c == '1' ) {
      a = boundary ; // b unchanged
      tot1++; c1 += 1.0 ;
    } else if ( c=='0' ) {
      b = boundary ; // a unchanged
      tot0++; c0 += 1.0 ;
    } else {
      // we ignore carriage returns, etc
    } 
    // output bits if possible
    while ( (a>=HALF) || (b<=HALF) ) {
      if (a>=HALF) {
	printf("1");
	a = a-HALF ;
	b = b-HALF ;
      } else {
	printf("0");
      }
      a *= 2 ;      b *= 2 ;     
    }
    if ( ( a < 0 ) || ( b> ONE ) ) {
      fprintf(stderr,"ERROR in a,b range\n") ; 
    }
  }
  // time to terminate!
  if ( (HALF-a) > (b-HALF) ) {
    w = (HALF-a) ;
    printf("0");
    while ( w < HALF ) {
      printf("1");
      w *=2 ;
    }
  } else {
    w = (b-HALF) ;
    printf("1");
    while ( w < HALF ) {
      printf("0");
      w *=2 ;
    }
  }

  fprintf(stderr,"Shannon information content of the file is %d log(f) + %d log(1-f) = %8.3f\n",tot0,tot1, (tot0*log(0.99)+tot1*log(0.01))/log(0.5) ) ; 
  fprintf(stderr,"Decompress with ACdecode %d < filename\n",tot0+tot1 ) ; 

  return(0);
}

