#include <stdlib.h>
#include <ctype.h>
#include <math.h>
#include <assert.h>
#include <vlcutils/error.h>
#include "definitions.h"
#include "freqdist.h"
#include "index.h"

int file_length(FILE *input)
{
     int length;

     assert(input);
     fseek(input, 0, SEEK_END);
     length = ftell(input);
     fseek(input, 0, SEEK_SET);
     return length;
}

/*
 * Displays the number of occuerences of each letter in the input
 * file, then return the number of letters in the input file.  (This
 * can be different from the number of bytes in the file, because
 * newlines are ignored.)
 */
int statistics(const char *inputfname)
{
     FILE *ifp;
     int numA, numC, numT, numG, numN, dbsize;
     char c;

     ifp = fopen(inputfname, "r");
     fseek(ifp, 0, SEEK_END);
     dbsize = ftell(ifp);
     fclose(ifp);
     return dbsize;

     numA = numC = numT = numG = numN = 0;
     dbsize = 0;
     while ((c = fgetc(ifp)) > 0) {
	  dbsize++;
	  c = toupper(c);
	  if      (c == 'A') { numA++; }
	  else if (c == 'C') { numC++; }
	  else if (c == 'G') { numG++; }
	  else if (c == 'T') { numT++; }
	  else if (c == 'N') { numN++; }
	  else if (c != '\n')
	       fatal_error("Input file contains incorrect letter '%c' "
			   "at position %d\n", c, dbsize);
     }
     fclose(ifp);
     dbsize = numA + numC + numT + numG + numN;
  
     fprintf(stderr, "file name = %s\n", inputfname);
     fprintf(stderr, "numA = %d\n", numA);
     fprintf(stderr, "numC = %d\n", numC);
     fprintf(stderr, "numG = %d\n", numG);
     fprintf(stderr, "numT = %d\n", numT);
     fprintf(stderr, "numN = %d\n", numN);
     fprintf(stderr, "sum = %d\n", dbsize);

     return(dbsize);
}


void store_index(const struct index *index, FILE *output)
{
     int i, j;

     assert(index && output);
     fprintf(output, "%d %d %d\n", index->window_size, index->dimensions,
	     index->num_boxes);
     for (i = 0; i < index->num_boxes; i++) {
	  fprintf(output, "%d\n", index->elements[i].capacity);
	  for(j = 0; j < ALPH_LEN; j++)
	       fprintf(output, "%d\n", index->elements[i].lower[j]);
	  for(j = 0; j < ALPH_LEN; j++)
	       fprintf(output, "%d\n", index->elements[i].higher[j]);
     }
}

/*
 * Creates the index structure for fixed box capacity.
 * The number of occurences of each letter is used as the feature vector.
 */
void create_index_file(FILE *input, FILE *output, int window_size, int box_capacity)
{
     char c, cold, window[MAX_WINDOW];
     int i, j, k, start, numBox, numD, numchar, 
	  intdata[ALPH_LEN], min[ALPH_LEN], max[ALPH_LEN];
     double averagevolume,volume;

     numchar = file_length(input);
     numBox = ceil((double)(numchar - window_size + 1.0) / box_capacity);

     fprintf(stderr, "Creating index structure. Window size = %d, "
	     "box capacity = %d\n", window_size, box_capacity);
     fprintf(output, "%d %d %d\n", window_size, ALPH_LEN, numBox);

     numBox = 0;
     for(i = 0; i < ALPH_LEN; i++)
	  intdata[i] = 0;

     k = j = 0;
     while (j < window_size) {
	  c = fgetc(input);
	  if (c != '\n') {
	       increment(c, intdata);
	       window[k] = c;
	       k++;
	       j++;
	  }
     }

     for (i = 0; i < ALPH_LEN; i++) {
	  min[i] = max[i] = intdata[i]; 
     }

     start = 0;
     numD = 1; 
     j = 0; 
     k = 1;
     averagevolume = 0.0;
     while ((c = fgetc(input)) > 0) {
	  if (c != '\n'){
	       k++; 
	       numD++;
      
	       cold = window[start];
	       window[start] = c;
	       decrement(cold, intdata);
	       increment(c, intdata);
      
	       start = (start + 1) % window_size;
      
	       for (i = 0; i < ALPH_LEN; i++) {
		    if(intdata[i] < min[i]) min[i] = intdata[i];
		    if(intdata[i] > max[i]) max[i] = intdata[i];
	       }
      
	       if (k == box_capacity) {
		    fprintf(output, "%d\n", numD);
		    for(i = 0; i < ALPH_LEN; i++)
			 fprintf(output, "%d\n", min[i]);
		    for(i = 0; i < ALPH_LEN; i++)
			 fprintf(output, "%d\n", max[i]);
		    k = numD = 0; 
		    numBox++;
                    volume = 1.0;
		    for (i = 0; i < ALPH_LEN; i++){
                         volume *= (max[i]-min[i]+1);
			 min[i] = max[i] = intdata[i];
                    }
                    averagevolume += volume/1000.0;
	       }
	  }
     }

     if (numD > 0) {
	  fprintf(output, "%d\n", numD);
	  for (i = 0; i < ALPH_LEN; i++) fprintf(output, "%d\n", min[i]);
	  for (i = 0; i < ALPH_LEN; i++) fprintf(output, "%d\n", max[i]);
	  numBox++;
     }
     fprintf(stderr, "Index creation completed\n");
     fprintf(stderr, "Number of boxes = %d\n",numBox);
     fprintf(stderr, "Average volume (x 1000)= %f\n",averagevolume/numBox);
}


/*
 * Creates the index structure for fixed box volume.
 * The number of occurences of each letter is used as the feature vector.
 */
void create_index_volume(FILE *input, FILE *output, int window_size, 
				  double max_volume)
{
     char c, cold, window[MAX_WINDOW];
     int i, j, start, numBox, numD, numchar, intdata[ALPH_LEN], min[ALPH_LEN], 
	  max[ALPH_LEN];
     double volume;

     numchar = file_length(input);

     fprintf(stderr, "Creating index structure. Window size = %d, "
	     "max volume = %f\n", window_size, max_volume);
     fprintf(output, "%d %d %d\n", window_size, ALPH_LEN, -1);

     numBox = 0;
     for(i = 0; i < ALPH_LEN; i++)
	  intdata[i] = 0;

     /* Go through the first window of data, add the letters to window[],
	and find the point (intdata[]) corresponding to this window. */
     j = 0;
     while (j < window_size) {
	  c = fgetc(input);
	  if (c != '\n') {
	       increment(c, intdata);
	       window[j] = c;
	       j++;
	  }
     }
     /* Initially, the box only contains this one point. */
     for (i = 0; i < ALPH_LEN; i++)
	  min[i] = max[i] = intdata[i]; 

     start = 0;
     numD = 1; 
     j = 0; 
     while ((c = fgetc(input)) > 0) {
	  if (c != '\n'){
	       numD++;
      
	       cold = window[start];
	       window[start] = c;
	       decrement(cold, intdata);
	       increment(c, intdata);
      
	       start = (start + 1) % window_size;
      
	       volume = 1.0;
	       for (i = 0; i < ALPH_LEN; i++) {
		    if (intdata[i] < min[i]) min[i] = intdata[i];
		    if (intdata[i] > max[i]) max[i] = intdata[i];
		    volume *= max[i] - min[i] + 1;
	       }
      
	       if (volume >= max_volume) {
		    fprintf(output, "%d\n", numD);
		    for(i = 0; i < ALPH_LEN; i++)
			 fprintf(output, "%d\n", min[i]);
		    for(i = 0; i < ALPH_LEN; i++)
			 fprintf(output, "%d\n", max[i]);
		    numD = 0; 
		    numBox++;
		    for (i = 0; i < ALPH_LEN; i++)
			 min[i] = max[i] = intdata[i];
	       }
	  }
     }

     if (numD > 0) {
	  fprintf(output, "%d\n", numD);
	  for (i = 0; i < ALPH_LEN; i++) fprintf(output, "%d\n", min[i]);
	  for (i = 0; i < ALPH_LEN; i++) fprintf(output, "%d\n", max[i]);
	  numBox++;
     }
     fprintf(stderr, "Index creation complete.\n");
     fprintf(stderr, "Number of boxes = %d\n", numBox);
}



/*
 * Creates the index structure for fixed box density. 
 * The number of occurences of each letter is used as the feature vector.
 */
void create_index_density_nolookahead(FILE *input, FILE *output,
				      int window_size, double min_density)
{
     char c, cold, window[MAX_WINDOW];
     int i, j, start, numBox, numD, numchar, volume, 
	  intdata[ALPH_LEN], min[ALPH_LEN], max[ALPH_LEN];
     double density, averagevolume;

     numchar = file_length(input);

     fprintf(stderr, "Creating index structure. Window size = %d, "
	     "min density = %f\n", window_size, min_density);
     fprintf(output, "%d %d %d\n", window_size, ALPH_LEN, -1);

     numBox = 0;
     for(i = 0; i < ALPH_LEN; i++)
	  intdata[i] = 0;

     /* Go through the first window of data, add the letters to window[],
	and find the point (intdata[]) corresponding to this window. */
     j = 0;
     while (j < window_size) {
	  c = fgetc(input);
	  if (c != '\n') {
	       increment(c, intdata);
	       window[j] = c;
	       j++;
	  }
     }
     /* Initially, the box only contains this one point. */
     for (i = 0; i < ALPH_LEN; i++)
	  min[i] = max[i] = intdata[i]; 

     start = 0;
     numD = 1; 
     j = 0; 
     averagevolume = 0.0;
     while ((c = fgetc(input)) > 0) {
	  if (c != '\n'){
	       numD++;
      
	       cold = window[start];
	       window[start] = c;
	       decrement(cold, intdata);
	       increment(c, intdata);
      
	       start = (start + 1) % window_size;
      
	       volume = 1;
	       for (i = 0; i < ALPH_LEN; i++) {
		    if (intdata[i] < min[i]) min[i] = intdata[i];
		    if (intdata[i] > max[i]) max[i] = intdata[i];
		    volume *= max[i] - min[i] + 1;
	       }
               density = (double)numD/volume;
      
	       if (density < min_density) {
		    fprintf(stderr, "capacity=%d, volume=%d, density=%f\n", 
                            numD,volume,density);
		    fprintf(output, "%d\n", numD);
		    for(i = 0; i < ALPH_LEN; i++)
			 fprintf(output, "%d\n", min[i]);
		    for(i = 0; i < ALPH_LEN; i++)
			 fprintf(output, "%d\n", max[i]);
		    numD = 0; 
		    numBox++;
		    for (i = 0; i < ALPH_LEN; i++)
			 min[i] = max[i] = intdata[i];
                    averagevolume += volume/1000.0;
	       }
	  }
     }

     if (numD > 0) {
	  fprintf(output, "%d\n", numD);
	  for (i = 0; i < ALPH_LEN; i++) fprintf(output, "%d\n", min[i]);
	  for (i = 0; i < ALPH_LEN; i++) fprintf(output, "%d\n", max[i]);
	  numBox++;
     }
     fprintf(stderr, "Index creation complete.\n");
     fprintf(stderr, "Number of boxes = %d\n", numBox);
     fprintf(stderr, "Average volume (x 1000)= %f\n",averagevolume/numBox);

} /* create_index_density_nolookahead */



/*
 * Creates the index structure for fixed box density. 
 * The number of occurences of each letter is used as the feature vector.
 */
void create_index_density_lookahead(FILE *input, FILE *output, int window_size, 
				    int max_density)
{
     char c, cold, window[MAX_WINDOW];
     int i, j, start, numBox, numD, numchar, volume, density, 
	  intdata[ALPH_LEN], min[ALPH_LEN], max[ALPH_LEN];

     numchar = file_length(input);

     fprintf(stderr, "Creating index structure. Window size = %d, "
	     "max density = %d\n", window_size, max_density);
     fprintf(output, "%d %d %d\n", window_size, ALPH_LEN, -1);

     numBox = 0;
     for(i = 0; i < ALPH_LEN; i++)
	  intdata[i] = 0;

     /* Go through the first window of data, add the letters to window[],
	and find the point (intdata[]) corresponding to this window. */
     j = 0;
     while (j < window_size) {
	  c = fgetc(input);
	  if (c != '\n') {
	       increment(c, intdata);
	       window[j] = c;
	       j++;
	  }
     }
     /* Initially, the box only contains this one point. */
     for (i = 0; i < ALPH_LEN; i++)
	  min[i] = max[i] = intdata[i]; 

     start = 0;
     numD = 1; 
     j = 0; 
     while ((c = fgetc(input)) > 0) {
	  if (c != '\n'){
	       numD++;
      
	       cold = window[start];
	       window[start] = c;
	       decrement(cold, intdata);
	       increment(c, intdata);
      
	       start = (start + 1) % window_size;
      
	       volume = 1;
	       for (i = 0; i < ALPH_LEN; i++) {
		    if (intdata[i] < min[i]) min[i] = intdata[i];
		    if (intdata[i] > max[i]) max[i] = intdata[i];
		    volume *= max[i] - min[i] + 1;
	       }
               density = volume/numD;
      
	       if (density >= max_density) {
		    fprintf(output, "%d\n", numD);
		    for(i = 0; i < ALPH_LEN; i++)
			 fprintf(output, "%d\n", min[i]);
		    for(i = 0; i < ALPH_LEN; i++)
			 fprintf(output, "%d\n", max[i]);
		    numD = 0; 
		    numBox++;
		    for (i = 0; i < ALPH_LEN; i++)
			 min[i] = max[i] = intdata[i];
	       }
	  }
     }

     if (numD > 0) {
	  fprintf(output, "%d\n", numD);
	  for (i = 0; i < ALPH_LEN; i++) fprintf(output, "%d\n", min[i]);
	  for (i = 0; i < ALPH_LEN; i++) fprintf(output, "%d\n", max[i]);
	  numBox++;
     }
     fprintf(stderr, "Index creation completed\n");
     fprintf(stderr, "Number of boxes = %d\n", numBox);

} /* create_index_density_lookahead */


/* Reads an index from a file.  Always returns an index; prints an
 * error message and exits if an error occurs.  Consequences are
 * undefined if the file is not of the proper format.
 */
struct index *read_index(const char *filename)
{
     FILE *file;
     struct index *index;
     int i; /* Iterates over the number of boxes */
     int j; /* Iterates over the dimensions */
     int capacity;
     long start;
     
     assert(filename);
     file = fopen(filename, "r");
     if (!file)
	  fatal_perror("%s: fopen", filename);

     index = (struct index *)malloc(sizeof(struct index));
     if (!index)
	  fatal_perror("malloc");
     fscanf(file, "%d %d %d", &index->window_size, &index->dimensions, 
	    &index->num_boxes);
     if (index->num_boxes <= 0) {
	  /* Initially, allocate one page. */
	  index->num_boxes = floor(4096 / sizeof(MBR));
	  /* Set the box capacity to -1 */
	  index->box_capacity = -1;
     } else
	  index->box_capacity = 0;
     index->elements = (MBR*)malloc(sizeof(MBR) * index->num_boxes);
     if (!index->elements)
	  fatal_perror("malloc");
     
     i = 0;
     start = 0;
     while (fscanf(file, "%d", &capacity) > 0) {
	  if (i == index->num_boxes) {
	    /* More boxes than expected.  Allocate more memory. */
	    index->num_boxes *= 2;
	    index->elements = (MBR*)realloc(index->elements, 
					    sizeof(MBR) * index->num_boxes);
	    if (!index->elements)
		 fatal_perror("realloc");
	  }
	  index->elements[i].box_number = i;
	  index->elements[i].capacity = capacity;
	  index->elements[i].start = start;
	  start += capacity;
	  /* Set the box capacity for the whole index if it is fixed. */
	  if (i == 0 && index->box_capacity == 0)
	       index->box_capacity = capacity;
	  for (j = 0; j < ALPH_LEN; j++)
	       fscanf(file, "%d", &index->elements[i].lower[j]);
	  for (j = 0; j < ALPH_LEN; j++)
	       fscanf(file, "%d", &index->elements[i].higher[j]);
	  i++;
     }
     index->num_boxes = i;
     index->memory_usage = sizeof(struct index) + index->num_boxes * sizeof(MBR);
     return index;
}

struct index *index_dup(const struct index *oldindex)
{
     struct index *newindex;

     assert(oldindex);
     newindex = malloc(sizeof(struct index));
     if (!newindex) fatal_perror("malloc");
     memcpy(newindex, oldindex, sizeof(struct index));
     newindex->elements = malloc(oldindex->num_boxes * sizeof(MBR));
     if (!newindex->elements) fatal_perror("malloc");
     memcpy(newindex->elements, oldindex->elements, 
            oldindex->num_boxes * sizeof(MBR));
     return newindex;
}

void index_grow(struct index *index, int growth)
{
     int i, d;
     MBR *box;

     assert(index);
     for (i = 0; i < index->num_boxes; i++) {
          box = &index->elements[i];
          for (d = 0; d < 4; d++) {
               box->lower[d] -= growth;
               if (box->lower[d] < 0) box->lower[d] = 0;
               box->higher[d] += growth;
               if (box->higher[d] >= index->window_size)
                    box->higher[d] = index->window_size - 1;
          }
     }
}

void free_index(struct index *index)
{
     assert(index);
     assert(index->num_boxes == 0 || index->elements);
     free(index->elements);
     index->elements = 0;
     free(index);
}
