/* HTML-CONTEXTS.C - PROCEDURES FOR MAINTAINING HTML CONTEXTS. */
#include
#include
#include
#include "code.h"
#include "freq.h"
#include "html.h"
/* PROCESS COMMAND LINE OPTIONS. Processes all the -p and the -h or -H options,
as well as recording the name of the stats file which may follow these
options. The command line is passed in terms of an "argv" pointer, which is
updated so that it points past the last argument processed. The option
values are stored in an html_options structure. The value of this procedure
is zero if there is no error, non-zero if there is an error. */
int html_arguments
( char ***pargv, /* Pointer to the "argv" parameter of "main" */
html_options *options /* Structure to store options in */
)
{
/* Set defaults. */
options->h_option = 0;
options->p_option = 0;
options->stats_file = 0;
/* Process flags that start with '-'. */
*pargv += 1;
while (**pargv!=0 && ***pargv=='-')
{
if (strcmp(**pargv,"-h")==0)
{ if (options->h_option!=0) return 1; /* duplicate -h or -H options */
options->h_option = 1;
}
else if (strcmp(**pargv,"-H")==0)
{ if (options->h_option!=0) return 1; /* duplicate -h or -H options */
options->h_option = 2;
}
else if (strcmp(**pargv,"-p")==0)
{ if (options->p_option!=0) return 1; /* duplicate -p options */
options->p_option = 1;
}
else
{ return 2; /* invalid flag */
}
*pargv += 1;
}
/* Record name of statistics file if there's another argument present. */
if (**pargv!=0)
{ options->stats_file = **pargv;
*pargv += 1;
}
return 0; /* no errors */
}
/* SET UP INITIAL CONTEXT. Initializes the "context" structure given
according to the "options" passed. This includes allocating space
for frequency tables, initializing these tables (to 1s, or from the
statistics file), and set the initial values of the context variables. */
void html_initial_context
( html_options *options, /* Command line options */
html_context *context /* Place to store context information */
)
{
FILE *sf; /* Statistics file */
int i;
/* Open the statistics file, if there is one. Exit if it can't be read. */
if (options->stats_file)
{ sf = fopen(options->stats_file,"rb");
if (sf==NULL)
{ fprintf(stderr,"Can't open statistics file (%s)\n",options->stats_file);
exit(1);
}
}
/* Find out how many frequency structures we need. */
context->n_freq = 1;
if (options->p_option==1) context->n_freq *= No_of_chars;
if (options->h_option==1) context->n_freq *= 3;
else if (options->h_option==2) context->n_freq *= 4;
/* Allocate frequency structures. */
context->freq = calloc (context->n_freq, sizeof (frequencies));
if (context->freq==0)
{ fprintf(stderr,"Not enough memory to compress with these options\n");
exit(1);
}
/* Initialize the frequency structures, from stats file, or just to all 1s. */
for (i = 0; in_freq; i++)
{ if (options->stats_file)
{ if (fread(&context->freq[i],sizeof(frequencies),1,sf)!=1)
{ fprintf(stderr,"Error reading statistics file (%s)\n",
options->stats_file);
exit(1);
}
}
else
{ initialize_frequencies(&context->freq[i]);
}
}
/* Set initial context variables. Note: We need to set h_context to zero
even if no -h or -H option is specified. */
context->h_context = 0;
context->p_context = ' ';
}
/* GO ON TO THE NEXT CONTEXT. Based on the character that was just encoded
or decoded, this procedure changes (or leaves unchanged) the context
variables in the "context" structure passed. These variables determine
which frequency table will be returned by html_find_table. */
void html_next_context
( int ch, /* Character seen */
html_options *options, /* Command line options */
html_context *context /* Context information to update */
)
{
if (options->h_option>=1 && context->h_context==0 && ch=='<')
{ context->h_context = 1;
}
else if (options->h_option>=1 && context->h_context==1 && ch=='"')
{ context->h_context = 2;
}
else if (options->h_option==2 && context->h_context==2 && ch==':')
{ context->h_context = 3;
}
else if (options->h_option>=1 && context->h_context>=2 && ch=='"')
{ context->h_context = 1;
}
else if (options->h_option>=1 && context->h_context==1 && ch=='>')
{ context->h_context = 0;
}
context->p_context = ch;
}
/* FIND FREQUENCY TABLE FOR CURRENT CONTEXT. Returns a pointer to the
frequency table for the current context. */
frequencies *html_find_table
( html_options *options, /* Command line options */
html_context *context /* Context information */
)
{
if (options->p_option)
{ return &context->freq [context->p_context + No_of_chars*context->h_context];
}
else
{ return &context->freq [context->h_context];
}
}