/********************************************************************
 *                                                                  *
 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE.   *
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001             *
 * by the Xiph.Org Foundation http://www.xiph.org/                  *
 *                                                                  *
 ********************************************************************

 function: utility main for training codebooks
 last mod: $Id: train.c,v 1.4 2008-02-02 15:54:09 richardash1981 Exp $

 ********************************************************************/

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <errno.h>
#include <signal.h>
#include "vqgen.h"
#include "vqext.h"
#include "bookutil.h"

static char *rline(FILE *in,FILE *out,int pass){
  while(1){
    char *line=get_line(in);
    if(line && line[0]=='#'){
      if(pass)fprintf(out,"%s\n",line);
    }else{
      return(line);
    }
  }
}

/* command line:
   trainvq  vqfile [options] trainfile [trainfile]

   options: -params     entries,dim,quant
            -subvector  start[,num]
	    -error      desired_error
	    -iterations iterations
*/

static void usage(void){
  fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
	  "<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
	  "options: -p[arams]     <entries,dim,quant>\n"
	  "         -s[ubvector]  <start[,num]>\n"
	  "         -e[rror]      <desired_error>\n"
	  "         -i[terations] <maxiterations>\n"
	  "         -d[istance]   quantization mesh spacing for density limitation\n"
	  "         -b <dummy>    eliminate cell size biasing; use normal LBG\n\n"
	  "         -c <dummy>    Use centroid (not median) midpoints\n"

	  "examples:\n"
	  "   train a new codebook to 1%% tolerance on datafile 'foo':\n"
	  "      xxxvqtrain book -p 256,6,8 -e .01 foo\n"
	  "      (produces a trained set in book-0.vqi)\n\n"
	  "   continue training 'book-0.vqi' (produces book-1.vqi):\n"
	  "      xxxvqtrain book-0.vqi\n\n"
	  "   add subvector from element 1 to <dimension> from files\n"
	  "      data*.m to the training in progress, prodicing book-1.vqi:\n"
	  "      xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
}

int exiting=0;
void setexit(int dummy){
  fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
  exiting=1;
}

int main(int argc,char *argv[]){
  vqgen v;

  int entries=-1,dim=-1;
  int start=0,num=-1;
  float desired=.05f,mindist=0.f;
  int iter=1000;
  int biasp=1;
  int centroid=0;

  FILE *out=NULL;
  char *line;
  long i,j,k;
  int init=0;
  q.quant=-1;

  argv++;
  if(!*argv){
    usage();
    exit(0);
  }

  /* get the book name, a preexisting book to continue training */
  {
    FILE *in=NULL;
    char *filename=alloca(strlen(*argv)+30),*ptr;

    strcpy(filename,*argv);
    in=fopen(filename,"r");
    ptr=strrchr(filename,'-');
    if(ptr){
      int num;
      ptr++;
      num=atoi(ptr);
      sprintf(ptr,"%d.vqi",num+1);
    }else
      strcat(filename,"-0.vqi");
    
    out=fopen(filename,"w");
    if(out==NULL){
      fprintf(stderr,"Unable to open %s for writing\n",filename);
      exit(1);
    }
    
    if(in){
      /* we wish to suck in a preexisting book and continue to train it */
      float a;
      
      line=rline(in,out,1);
      if(strcmp(line,vqext_booktype)){
	fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
	exit(1);
      } 
      
      line=rline(in,out,1);
      if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
	fprintf(stderr,"Syntax error reading book file\n");
	exit(1);
      }
      
      vqgen_init(&v,dim,vqext_aux,entries,mindist,
		 vqext_metric,vqext_weight,centroid);
      init=1;
      
      /* quant setup */
      line=rline(in,out,1);
      if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
		&q.quant,&q.sequencep)!=4){
	fprintf(stderr,"Syntax error reading book file\n");
	exit(1);
      }
      
      /* quantized entries */
      i=0;
      for(j=0;j<entries;j++){
	for(k=0;k<dim;k++){
	  line=rline(in,out,0);
	  sscanf(line,"%f",&a);
	  v.entrylist[i++]=a;
	}
      }      
      vqgen_unquantize(&v,&q);

      /* bias */
      i=0;
      for(j=0;j<entries;j++){
	line=rline(in,out,0);
	sscanf(line,"%f",&a);
	v.bias[i++]=a;
      }
      
      v.seeded=1;
      {
	float *b=alloca((dim+vqext_aux)*sizeof(float));
	i=0;
	while(1){
	  for(k=0;k<dim+vqext_aux;k++){
	    line=rline(in,out,0);
	    if(!line)break;
	    sscanf(line,"%f",b+k);
	  }
	  if(feof(in))break;
	  vqgen_addpoint(&v,b,b+dim);
	}
      }
      
      fclose(in);
    }
  }
  
  /* get the rest... */
  argv=argv++;
  while(*argv){
    if(argv[0][0]=='-'){
      /* it's an option */
      if(!argv[1]){
	fprintf(stderr,"Option %s missing argument.\n",argv[0]);
	exit(1);
      }
      switch(argv[0][1]){
      case 'p':
	if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
	  goto syner;
	break;
      case 's':
	if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
	  num= -1;
	  if(sscanf(argv[1],"%d",&start)!=1)
	    goto syner;
	}
	break;
      case 'e':
	if(sscanf(argv[1],"%f",&desired)!=1)
	  goto syner;
	break;
      case 'd':
	if(sscanf(argv[1],"%f",&mindist)!=1)
	  goto syner;
	if(init)v.mindist=mindist;
	break;
      case 'i':
	if(sscanf(argv[1],"%d",&iter)!=1)
	  goto syner;
	break;
      case 'b':
	biasp=0;
	break;
      case 'c':
	centroid=1;
	break;
      default:
	fprintf(stderr,"Unknown option %s\n",argv[0]);
	exit(1);
      }
      argv+=2;
    }else{
      /* it's an input file */
      char *file=strdup(*argv++);
      FILE *in;
      int cols=-1;

      if(!init){
	if(dim==-1 || entries==-1 || q.quant==-1){
	  fprintf(stderr,"-p required when training a new set\n");
	  exit(1);
	}
	vqgen_init(&v,dim,vqext_aux,entries,mindist,
		   vqext_metric,vqext_weight,centroid);
	init=1;
      }

      in=fopen(file,"r");
      if(in==NULL){
	fprintf(stderr,"Could not open input file %s\n",file);
	exit(1);
      }
      fprintf(out,"# training file entry: %s\n",file);

      while((line=rline(in,out,0))){
	if(cols==-1){
	  char *temp=line;
	  while(*temp==' ')temp++;
	  for(cols=0;*temp;cols++){
	    while(*temp>32)temp++;
	    while(*temp==' ')temp++;
	  }

	  fprintf(stderr,"%d colums per line in file %s\n",cols,file);

	}
	{
	  int i;
	  float b[cols];
	  if(start+num*dim>cols){
	    fprintf(stderr,"ran out of columns reading %s\n",file);
	    exit(1);
	  }
	  while(*line==' ')line++;
	  for(i=0;i<cols;i++){

	    /* static length buffer bug workaround */
	    char *temp=line;
	    char old;
	    while(*temp>32)temp++;

	    old=temp[0];
	    temp[0]='\0';
	    b[i]=atof(line);
	    temp[0]=old;
	    
	    while(*line>32)line++;
	    while(*line==' ')line++;
	  }
	  if(num<=0)num=(cols-start)/dim;
	  for(i=0;i<num;i++)
	    vqext_addpoint_adj(&v,b,start+i*dim,dim,cols,num);

	}
      }
      fclose(in);
    }
  }

  if(!init){
    fprintf(stderr,"No input files!\n");
    exit(1);
  }

  vqext_preprocess(&v);

  /* train the book */
  signal(SIGTERM,setexit);
  signal(SIGINT,setexit);

  for(i=0;i<iter && !exiting;i++){
    float result;
    if(i!=0){
      vqgen_unquantize(&v,&q);
      vqgen_cellmetric(&v);
    }
    result=vqgen_iterate(&v,biasp);
    vqext_quantize(&v,&q);
    if(result<desired)break;
  }

  /* save the book */

  fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
  fprintf(out,"%s\n",vqext_booktype);
  fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
  fprintf(out,"%ld %ld %d %d\n",
	  q.min,q.delta,q.quant,q.sequencep);

  /* quantized entries */
  fprintf(out,"# quantized entries---\n");
  i=0;
  for(j=0;j<entries;j++)
    for(k=0;k<dim;k++)
      fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
  
  fprintf(out,"# biases---\n");
  i=0;
  for(j=0;j<entries;j++)
    fprintf(out,"%f\n",v.bias[i++]);

  /* we may have done the density limiting mesh trick; refetch the
     training points from the temp file */

  rewind(v.asciipoints);
  fprintf(out,"# points---\n");
  {
    /* sloppy, no error handling */
    long bytes;
    char buff[4096];
    while((bytes=fread(buff,1,4096,v.asciipoints)))
      while(bytes)bytes-=fwrite(buff,1,bytes,out);
  }

  fclose(out);
  fclose(v.asciipoints);

  vqgen_unquantize(&v,&q);
  vqgen_cellmetric(&v);
  exit(0);

  syner:
    fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
    exit(1);
}