1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-04-30 15:49:41 +02:00
2010-01-24 09:19:39 +00:00

365 lines
8.5 KiB
C

/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: utility main for training codebooks
last mod: $Id: train.c,v 1.4 2008-02-02 15:54:09 richardash1981 Exp $
********************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <errno.h>
#include <signal.h>
#include "vqgen.h"
#include "vqext.h"
#include "bookutil.h"
static char *rline(FILE *in,FILE *out,int pass){
while(1){
char *line=get_line(in);
if(line && line[0]=='#'){
if(pass)fprintf(out,"%s\n",line);
}else{
return(line);
}
}
}
/* command line:
trainvq vqfile [options] trainfile [trainfile]
options: -params entries,dim,quant
-subvector start[,num]
-error desired_error
-iterations iterations
*/
static void usage(void){
fprintf(stderr, "\nOggVorbis %s VQ codebook trainer\n\n"
"<foo>vqtrain vqfile [options] [datasetfile] [datasetfile]\n"
"options: -p[arams] <entries,dim,quant>\n"
" -s[ubvector] <start[,num]>\n"
" -e[rror] <desired_error>\n"
" -i[terations] <maxiterations>\n"
" -d[istance] quantization mesh spacing for density limitation\n"
" -b <dummy> eliminate cell size biasing; use normal LBG\n\n"
" -c <dummy> Use centroid (not median) midpoints\n"
"examples:\n"
" train a new codebook to 1%% tolerance on datafile 'foo':\n"
" xxxvqtrain book -p 256,6,8 -e .01 foo\n"
" (produces a trained set in book-0.vqi)\n\n"
" continue training 'book-0.vqi' (produces book-1.vqi):\n"
" xxxvqtrain book-0.vqi\n\n"
" add subvector from element 1 to <dimension> from files\n"
" data*.m to the training in progress, prodicing book-1.vqi:\n"
" xxxvqtrain book-0.vqi -s 1,1 data*.m\n\n",vqext_booktype);
}
int exiting=0;
void setexit(int dummy){
fprintf(stderr,"\nexiting... please wait to finish this iteration\n");
exiting=1;
}
int main(int argc,char *argv[]){
vqgen v;
int entries=-1,dim=-1;
int start=0,num=-1;
float desired=.05f,mindist=0.f;
int iter=1000;
int biasp=1;
int centroid=0;
FILE *out=NULL;
char *line;
long i,j,k;
int init=0;
q.quant=-1;
argv++;
if(!*argv){
usage();
exit(0);
}
/* get the book name, a preexisting book to continue training */
{
FILE *in=NULL;
char *filename=alloca(strlen(*argv)+30),*ptr;
strcpy(filename,*argv);
in=fopen(filename,"r");
ptr=strrchr(filename,'-');
if(ptr){
int num;
ptr++;
num=atoi(ptr);
sprintf(ptr,"%d.vqi",num+1);
}else
strcat(filename,"-0.vqi");
out=fopen(filename,"w");
if(out==NULL){
fprintf(stderr,"Unable to open %s for writing\n",filename);
exit(1);
}
if(in){
/* we wish to suck in a preexisting book and continue to train it */
float a;
line=rline(in,out,1);
if(strcmp(line,vqext_booktype)){
fprintf(stderr,"wrong book type; %s!=%s\n",line,vqext_booktype);
exit(1);
}
line=rline(in,out,1);
if(sscanf(line,"%d %d %d",&entries,&dim,&vqext_aux)!=3){
fprintf(stderr,"Syntax error reading book file\n");
exit(1);
}
vqgen_init(&v,dim,vqext_aux,entries,mindist,
vqext_metric,vqext_weight,centroid);
init=1;
/* quant setup */
line=rline(in,out,1);
if(sscanf(line,"%ld %ld %d %d",&q.min,&q.delta,
&q.quant,&q.sequencep)!=4){
fprintf(stderr,"Syntax error reading book file\n");
exit(1);
}
/* quantized entries */
i=0;
for(j=0;j<entries;j++){
for(k=0;k<dim;k++){
line=rline(in,out,0);
sscanf(line,"%f",&a);
v.entrylist[i++]=a;
}
}
vqgen_unquantize(&v,&q);
/* bias */
i=0;
for(j=0;j<entries;j++){
line=rline(in,out,0);
sscanf(line,"%f",&a);
v.bias[i++]=a;
}
v.seeded=1;
{
float *b=alloca((dim+vqext_aux)*sizeof(float));
i=0;
while(1){
for(k=0;k<dim+vqext_aux;k++){
line=rline(in,out,0);
if(!line)break;
sscanf(line,"%f",b+k);
}
if(feof(in))break;
vqgen_addpoint(&v,b,b+dim);
}
}
fclose(in);
}
}
/* get the rest... */
argv=argv++;
while(*argv){
if(argv[0][0]=='-'){
/* it's an option */
if(!argv[1]){
fprintf(stderr,"Option %s missing argument.\n",argv[0]);
exit(1);
}
switch(argv[0][1]){
case 'p':
if(sscanf(argv[1],"%d,%d,%d",&entries,&dim,&q.quant)!=3)
goto syner;
break;
case 's':
if(sscanf(argv[1],"%d,%d",&start,&num)!=2){
num= -1;
if(sscanf(argv[1],"%d",&start)!=1)
goto syner;
}
break;
case 'e':
if(sscanf(argv[1],"%f",&desired)!=1)
goto syner;
break;
case 'd':
if(sscanf(argv[1],"%f",&mindist)!=1)
goto syner;
if(init)v.mindist=mindist;
break;
case 'i':
if(sscanf(argv[1],"%d",&iter)!=1)
goto syner;
break;
case 'b':
biasp=0;
break;
case 'c':
centroid=1;
break;
default:
fprintf(stderr,"Unknown option %s\n",argv[0]);
exit(1);
}
argv+=2;
}else{
/* it's an input file */
char *file=strdup(*argv++);
FILE *in;
int cols=-1;
if(!init){
if(dim==-1 || entries==-1 || q.quant==-1){
fprintf(stderr,"-p required when training a new set\n");
exit(1);
}
vqgen_init(&v,dim,vqext_aux,entries,mindist,
vqext_metric,vqext_weight,centroid);
init=1;
}
in=fopen(file,"r");
if(in==NULL){
fprintf(stderr,"Could not open input file %s\n",file);
exit(1);
}
fprintf(out,"# training file entry: %s\n",file);
while((line=rline(in,out,0))){
if(cols==-1){
char *temp=line;
while(*temp==' ')temp++;
for(cols=0;*temp;cols++){
while(*temp>32)temp++;
while(*temp==' ')temp++;
}
fprintf(stderr,"%d colums per line in file %s\n",cols,file);
}
{
int i;
float b[cols];
if(start+num*dim>cols){
fprintf(stderr,"ran out of columns reading %s\n",file);
exit(1);
}
while(*line==' ')line++;
for(i=0;i<cols;i++){
/* static length buffer bug workaround */
char *temp=line;
char old;
while(*temp>32)temp++;
old=temp[0];
temp[0]='\0';
b[i]=atof(line);
temp[0]=old;
while(*line>32)line++;
while(*line==' ')line++;
}
if(num<=0)num=(cols-start)/dim;
for(i=0;i<num;i++)
vqext_addpoint_adj(&v,b,start+i*dim,dim,cols,num);
}
}
fclose(in);
}
}
if(!init){
fprintf(stderr,"No input files!\n");
exit(1);
}
vqext_preprocess(&v);
/* train the book */
signal(SIGTERM,setexit);
signal(SIGINT,setexit);
for(i=0;i<iter && !exiting;i++){
float result;
if(i!=0){
vqgen_unquantize(&v,&q);
vqgen_cellmetric(&v);
}
result=vqgen_iterate(&v,biasp);
vqext_quantize(&v,&q);
if(result<desired)break;
}
/* save the book */
fprintf(out,"# OggVorbis VQ codebook trainer, intermediate file\n");
fprintf(out,"%s\n",vqext_booktype);
fprintf(out,"%d %d %d\n",entries,dim,vqext_aux);
fprintf(out,"%ld %ld %d %d\n",
q.min,q.delta,q.quant,q.sequencep);
/* quantized entries */
fprintf(out,"# quantized entries---\n");
i=0;
for(j=0;j<entries;j++)
for(k=0;k<dim;k++)
fprintf(out,"%d\n",(int)(rint(v.entrylist[i++])));
fprintf(out,"# biases---\n");
i=0;
for(j=0;j<entries;j++)
fprintf(out,"%f\n",v.bias[i++]);
/* we may have done the density limiting mesh trick; refetch the
training points from the temp file */
rewind(v.asciipoints);
fprintf(out,"# points---\n");
{
/* sloppy, no error handling */
long bytes;
char buff[4096];
while((bytes=fread(buff,1,4096,v.asciipoints)))
while(bytes)bytes-=fwrite(buff,1,bytes,out);
}
fclose(out);
fclose(v.asciipoints);
vqgen_unquantize(&v,&q);
vqgen_cellmetric(&v);
exit(0);
syner:
fprintf(stderr,"Syntax error in argument '%s'\n",*argv);
exit(1);
}