1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-10-08 23:53:31 +02:00
Files
.github
autotools
cmake-proxies
dox2-src
help
images
include
lib-src
FileDialog
expat
ffmpeg
header-substitutes
lame
lib-widget-extra
libflac
libid3tag
libmad
libmp3lame
libnyquist
libogg
libscorealign
libsndfile
libsoxr
libvamp
libvorbis
doc
examples
include
lib
m4
macos
macosx
symbian
test
vq
16.vqs
16u.vqs
44c-1.vqs
44c0.vqs
44c1.vqs
44c2.vqs
44c3.vqs
44c4.vqs
44c5.vqs
44c6.vqs
44c7.vqs
44c8.vqs
44c9.vqs
44p-1.vqs
44p0.vqs
44p1.vqs
44p2.vqs
44p3.vqs
44p4.vqs
44p5.vqs
44p6.vqs
44p7.vqs
44p8.vqs
44p9.vqs
44u0.vqs
44u1.vqs
44u2.vqs
44u3.vqs
44u4.vqs
44u5.vqs
44u6.vqs
44u7.vqs
44u8.vqs
44u9.vqs
8.vqs
8u.vqs
Makefile.am
Makefile.in
auxpartition.pl
bookutil.c
bookutil.h
build.c
cascade.c
distribution.c
floor_11.vqs
floor_22.vqs
floor_44.vqs
genericdata.c
huffbuild.c
latticebuild.c
latticehint.c
latticepare.c
latticetune.c
localcodebook.h
lspdata.c
make_floor_books.pl
make_residue_books.pl
metrics.c
residue_entropy
residuedata.c
residuesplit.c
run.c
train.c
vqext.h
vqgen.c
vqgen.h
vqsplit.c
vqsplit.h
win32
AUTHORS
CHANGES
COPYING
Makefile.am
Makefile.in
README
autogen.sh
autotools.patch
compile
config.guess
config.h.in
config.sub
configure
configure.ac
depcomp
install-sh
libvorbis.spec.in
local-libogg.patch
ltmain.sh
missing
no-docs-examples.patch
todo.txt
vorbis-uninstalled.pc.in
vorbis.m4
vorbis.pc.in
vorbisenc-uninstalled.pc.in
vorbisenc.pc.in
vorbisfile-uninstalled.pc.in
vorbisfile.pc.in
lv2
mod-null
mod-nyq-bench
mod-script-pipe
mod-track-panel
mpglib
portaudio-v19
portburn
portmidi
portmixer
portsmf
sbsms
soundtouch
twolame
Makefile.am
Makefile.in
audacity-patches.txt
dist-libsoxr.mk
dist-libvamp.mk
dist-portaudio.mk
linux
locale
m4
mac
nyq-po
nyquist
plug-ins
presets
qa
scripts
src
tests
win
.gitattributes
.gitignore
.travis.yml
ABOUT-NLS
CHANGELOG.txt
CMakeLists.txt
CODE_OF_CONDUCT.md
CONTRIBUTING.md
Directory.Build.props
INSTALL
LICENSE.txt
Makefile.am
Makefile.in
PULL_REQUEST_TEMPLATE.md
README.md
README.txt
appveyor.yml
audacity.dox
configure
configure.ac
todo.txt
audacity/lib-src/libvorbis/vq/latticepare.c
2010-01-24 09:19:39 +00:00

596 lines
17 KiB
C

/********************************************************************
* *
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
* *
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
* by the Xiph.Org Foundation http://www.xiph.org/ *
* *
********************************************************************
function: utility for paring low hit count cells from lattice codebook
last mod: $Id: latticepare.c,v 1.7 2008-02-02 15:54:08 richardash1981 Exp $
********************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include <errno.h>
#include "../lib/scales.h"
#include "bookutil.h"
#include "vqgen.h"
#include "vqsplit.h"
#include "../lib/os.h"
/* Lattice codebooks have two strengths: important fetaures that are
poorly modelled by global error minimization training (eg, strong
peaks) are not neglected 2) compact quantized representation.
A fully populated lattice codebook, however, swings point 1 too far
in the opposite direction; rare features need not be modelled quite
so religiously and as such, we waste bits unless we eliminate the
least common cells. The codebook rep supports unused cells, so we
need to tag such cells and build an auxiliary (non-thresh) search
mechanism to find the proper match quickly */
/* two basic steps; first is pare the cell for which dispersal creates
the least additional error. This will naturally choose
low-population cells and cells that have not taken on points from
neighboring paring (but does not result in the lattice collapsing
inward and leaving low population ares totally unmodelled). After
paring has removed the desired number of cells, we need to build an
auxiliary search for each culled point */
/* Although lattice books (due to threshhold-based matching) do not
actually use error to make cell selections (in fact, it need not
bear any relation), the 'secondbest' entry finder here is in fact
error/distance based, so latticepare is only useful on such books */
/* command line:
latticepare latticebook.vqh input_data.vqd <target_cells>
produces a new output book on stdout
*/
static float _dist(int el,float *a, float *b){
int i;
float acc=0.f;
for(i=0;i<el;i++){
float val=(a[i]-b[i]);
acc+=val*val;
}
return(acc);
}
static float *pointlist;
static long points=0;
void add_vector(codebook *b,float *vec,long n){
int dim=b->dim,i,j;
int step=n/dim;
for(i=0;i<step;i++){
for(j=i;j<n;j+=step){
pointlist[points++]=vec[j];
}
}
}
static int bestm(codebook *b,float *vec){
encode_aux_threshmatch *tt=b->c->thresh_tree;
int dim=b->dim;
int i,k,o;
int best=0;
/* what would be the closest match if the codebook was fully
populated? */
for(k=0,o=dim-1;k<dim;k++,o--){
int i;
for(i=0;i<tt->threshvals-1;i++)
if(vec[o]<tt->quantthresh[i])break;
best=(best*tt->quantvals)+tt->quantmap[i];
}
return(best);
}
static int closest(codebook *b,float *vec,int current){
encode_aux_threshmatch *tt=b->c->thresh_tree;
int dim=b->dim;
int i,k,o;
float bestmetric=0;
int bestentry=-1;
int best=bestm(b,vec);
if(current<0 && b->c->lengthlist[best]>0)return best;
for(i=0;i<b->entries;i++){
if(b->c->lengthlist[i]>0 && i!=best && i!=current){
float thismetric=_dist(dim, vec, b->valuelist+i*dim);
if(bestentry==-1 || thismetric<bestmetric){
bestentry=i;
bestmetric=thismetric;
}
}
}
return(bestentry);
}
static float _heuristic(codebook *b,float *ppt,int secondbest){
float *secondcell=b->valuelist+secondbest*b->dim;
int best=bestm(b,ppt);
float *firstcell=b->valuelist+best*b->dim;
float error=_dist(b->dim,firstcell,secondcell);
float *zero=alloca(b->dim*sizeof(float));
float fromzero;
memset(zero,0,b->dim*sizeof(float));
fromzero=sqrt(_dist(b->dim,firstcell,zero));
return(error/fromzero);
}
static int longsort(const void *a, const void *b){
return **(long **)b-**(long **)a;
}
void usage(void){
fprintf(stderr,"Ogg/Vorbis lattice codebook paring utility\n\n"
"usage: latticepare book.vqh data.vqd <target_cells> <protected_cells> base\n"
"where <target_cells> is the desired number of final cells (or -1\n"
" for no change)\n"
" <protected_cells> is the number of highest-hit count cells\n"
" to protect from dispersal\n"
" base is the base name (not including .vqh) of the new\n"
" book\n\n");
exit(1);
}
int main(int argc,char *argv[]){
char *basename;
codebook *b=NULL;
int entries=0;
int dim=0;
long i,j,target=-1,protect=-1;
FILE *out=NULL;
int argnum=0;
argv++;
if(*argv==NULL){
usage();
exit(1);
}
while(*argv){
if(*argv[0]=='-'){
argv++;
}else{
switch (argnum++){
case 0:case 1:
{
/* yes, this is evil. However, it's very convenient to parse file
extentions */
/* input file. What kind? */
char *dot;
char *ext=NULL;
char *name=strdup(*argv++);
dot=strrchr(name,'.');
if(dot)
ext=dot+1;
else{
ext="";
}
/* codebook */
if(!strcmp(ext,"vqh")){
basename=strrchr(name,'/');
if(basename)
basename=strdup(basename)+1;
else
basename=strdup(name);
dot=strrchr(basename,'.');
if(dot)*dot='\0';
b=codebook_load(name);
dim=b->dim;
entries=b->entries;
}
/* data file; we do actually need to suck it into memory */
/* we're dealing with just one book, so we can de-interleave */
if(!strcmp(ext,"vqd") && !points){
int cols;
long lines=0;
char *line;
float *vec;
FILE *in=fopen(name,"r");
if(!in){
fprintf(stderr,"Could not open input file %s\n",name);
exit(1);
}
reset_next_value();
line=setup_line(in);
/* count cols before we start reading */
{
char *temp=line;
while(*temp==' ')temp++;
for(cols=0;*temp;cols++){
while(*temp>32)temp++;
while(*temp==' ')temp++;
}
}
vec=alloca(cols*sizeof(float));
/* count, then load, to avoid fragmenting the hell out of
memory */
while(line){
lines++;
for(j=0;j<cols;j++)
if(get_line_value(in,vec+j)){
fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
exit(1);
}
if((lines&0xff)==0)spinnit("counting samples...",lines*cols);
line=setup_line(in);
}
pointlist=_ogg_malloc((cols*lines+entries*dim)*sizeof(float));
rewind(in);
line=setup_line(in);
while(line){
lines--;
for(j=0;j<cols;j++)
if(get_line_value(in,vec+j)){
fprintf(stderr,"Too few columns on line %ld in data file\n",lines);
exit(1);
}
/* deinterleave, add to heap */
add_vector(b,vec,cols);
if((lines&0xff)==0)spinnit("loading samples...",lines*cols);
line=setup_line(in);
}
fclose(in);
}
}
break;
case 2:
target=atol(*argv++);
if(target==0)target=entries;
break;
case 3:
protect=atol(*argv++);
break;
case 4:
{
char *buff=alloca(strlen(*argv)+5);
sprintf(buff,"%s.vqh",*argv);
basename=*argv++;
out=fopen(buff,"w");
if(!out){
fprintf(stderr,"unable ot open %s for output",buff);
exit(1);
}
}
break;
default:
usage();
}
}
}
if(!entries || !points || !out)usage();
if(target==-1)usage();
/* add guard points */
for(i=0;i<entries;i++)
for(j=0;j<dim;j++)
pointlist[points++]=b->valuelist[i*dim+j];
points/=dim;
/* set up auxiliary vectors for error tracking */
{
encode_aux_nearestmatch *nt=NULL;
long pointssofar=0;
long *pointindex;
long indexedpoints=0;
long *entryindex;
long *reventry;
long *membership=_ogg_malloc(points*sizeof(long));
long *firsthead=_ogg_malloc(entries*sizeof(long));
long *secondary=_ogg_malloc(points*sizeof(long));
long *secondhead=_ogg_malloc(entries*sizeof(long));
long *cellcount=_ogg_calloc(entries,sizeof(long));
long *cellcount2=_ogg_calloc(entries,sizeof(long));
float *cellerror=_ogg_calloc(entries,sizeof(float));
float *cellerrormax=_ogg_calloc(entries,sizeof(float));
long cellsleft=entries;
for(i=0;i<points;i++)membership[i]=-1;
for(i=0;i<entries;i++)firsthead[i]=-1;
for(i=0;i<points;i++)secondary[i]=-1;
for(i=0;i<entries;i++)secondhead[i]=-1;
for(i=0;i<points;i++){
/* assign vectors to the nearest cell. Also keep track of second
nearest for error statistics */
float *ppt=pointlist+i*dim;
int firstentry=closest(b,ppt,-1);
int secondentry=closest(b,ppt,firstentry);
float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
if(!(i&0xff))spinnit("initializing... ",points-i);
membership[i]=firsthead[firstentry];
firsthead[firstentry]=i;
secondary[i]=secondhead[secondentry];
secondhead[secondentry]=i;
if(i<points-entries){
cellerror[firstentry]+=secondmetric-firstmetric;
cellerrormax[firstentry]=max(cellerrormax[firstentry],
_heuristic(b,ppt,secondentry));
cellcount[firstentry]++;
cellcount2[secondentry]++;
}
}
/* which cells are most heavily populated? Protect as many from
dispersal as the user has requested */
{
long **countindex=_ogg_calloc(entries,sizeof(long *));
for(i=0;i<entries;i++)countindex[i]=cellcount+i;
qsort(countindex,entries,sizeof(long *),longsort);
for(i=0;i<protect;i++){
int ptr=countindex[i]-cellcount;
cellerrormax[ptr]=9e50f;
}
}
{
fprintf(stderr,"\r");
for(i=0;i<entries;i++){
/* decompose index */
int entry=i;
for(j=0;j<dim;j++){
fprintf(stderr,"%d:",entry%b->c->thresh_tree->quantvals);
entry/=b->c->thresh_tree->quantvals;
}
fprintf(stderr,":%ld/%ld, ",cellcount[i],cellcount2[i]);
}
fprintf(stderr,"\n");
}
/* do the automatic cull request */
while(cellsleft>target){
int bestcell=-1;
float besterror=0;
float besterror2=0;
long head=-1;
char spinbuf[80];
sprintf(spinbuf,"cells left to eliminate: %ld : ",cellsleft-target);
/* find the cell with lowest removal impact */
for(i=0;i<entries;i++){
if(b->c->lengthlist[i]>0){
if(bestcell==-1 || cellerrormax[i]<=besterror2){
if(bestcell==-1 || cellerrormax[i]<besterror2 ||
besterror>cellerror[i]){
besterror=cellerror[i];
besterror2=cellerrormax[i];
bestcell=i;
}
}
}
}
fprintf(stderr,"\reliminating cell %d \n"
" dispersal error of %g max/%g total (%ld hits)\n",
bestcell,besterror2,besterror,cellcount[bestcell]);
/* disperse it. move each point out, adding it (properly) to
the second best */
b->c->lengthlist[bestcell]=0;
head=firsthead[bestcell];
firsthead[bestcell]=-1;
while(head!=-1){
/* head is a point number */
float *ppt=pointlist+head*dim;
int firstentry=closest(b,ppt,-1);
int secondentry=closest(b,ppt,firstentry);
float firstmetric=_dist(dim,b->valuelist+dim*firstentry,ppt);
float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
long next=membership[head];
if(head<points-entries){
cellcount[firstentry]++;
cellcount[bestcell]--;
cellerror[firstentry]+=secondmetric-firstmetric;
cellerrormax[firstentry]=max(cellerrormax[firstentry],
_heuristic(b,ppt,secondentry));
}
membership[head]=firsthead[firstentry];
firsthead[firstentry]=head;
head=next;
if(cellcount[bestcell]%128==0)
spinnit(spinbuf,cellcount[bestcell]+cellcount2[bestcell]);
}
/* now see that all points that had the dispersed cell as second
choice have second choice reassigned */
head=secondhead[bestcell];
secondhead[bestcell]=-1;
while(head!=-1){
float *ppt=pointlist+head*dim;
/* who are we assigned to now? */
int firstentry=closest(b,ppt,-1);
/* what is the new second closest match? */
int secondentry=closest(b,ppt,firstentry);
/* old second closest is the cell being disbanded */
float oldsecondmetric=_dist(dim,b->valuelist+dim*bestcell,ppt);
/* new second closest error */
float secondmetric=_dist(dim,b->valuelist+dim*secondentry,ppt);
long next=secondary[head];
if(head<points-entries){
cellcount2[secondentry]++;
cellcount2[bestcell]--;
cellerror[firstentry]+=secondmetric-oldsecondmetric;
cellerrormax[firstentry]=max(cellerrormax[firstentry],
_heuristic(b,ppt,secondentry));
}
secondary[head]=secondhead[secondentry];
secondhead[secondentry]=head;
head=next;
if(cellcount2[bestcell]%128==0)
spinnit(spinbuf,cellcount2[bestcell]);
}
cellsleft--;
}
/* paring is over. Build decision trees using points that now fall
through the thresh matcher. */
/* we don't free membership; we flatten it in order to use in lp_split */
for(i=0;i<entries;i++){
long head=firsthead[i];
spinnit("rearranging membership cache... ",entries-i);
while(head!=-1){
long next=membership[head];
membership[head]=i;
head=next;
}
}
free(secondhead);
free(firsthead);
free(cellerror);
free(cellerrormax);
free(secondary);
pointindex=_ogg_malloc(points*sizeof(long));
/* make a point index of fall-through points */
for(i=0;i<points;i++){
int best=_best(b,pointlist+i*dim,1);
if(best==-1)
pointindex[indexedpoints++]=i;
spinnit("finding orphaned points... ",points-i);
}
/* make an entry index */
entryindex=_ogg_malloc(entries*sizeof(long));
target=0;
for(i=0;i<entries;i++){
if(b->c->lengthlist[i]>0)
entryindex[target++]=i;
}
/* make working space for a reverse entry index */
reventry=_ogg_malloc(entries*sizeof(long));
/* do the split */
nt=b->c->nearest_tree=
_ogg_calloc(1,sizeof(encode_aux_nearestmatch));
nt->alloc=4096;
nt->ptr0=_ogg_malloc(sizeof(long)*nt->alloc);
nt->ptr1=_ogg_malloc(sizeof(long)*nt->alloc);
nt->p=_ogg_malloc(sizeof(long)*nt->alloc);
nt->q=_ogg_malloc(sizeof(long)*nt->alloc);
nt->aux=0;
fprintf(stderr,"Leaves added: %d \n",
lp_split(pointlist,points,
b,entryindex,target,
pointindex,indexedpoints,
membership,reventry,
0,&pointssofar));
free(membership);
free(reventry);
free(pointindex);
/* hack alert. I should just change the damned splitter and
codebook writer */
for(i=0;i<nt->aux;i++)nt->p[i]*=dim;
for(i=0;i<nt->aux;i++)nt->q[i]*=dim;
/* recount hits. Build new lengthlist. reuse entryindex storage */
for(i=0;i<entries;i++)entryindex[i]=1;
for(i=0;i<points-entries;i++){
int best=_best(b,pointlist+i*dim,1);
float *a=pointlist+i*dim;
if(!(i&0xff))spinnit("counting hits...",i);
if(best==-1){
fprintf(stderr,"\nINTERNAL ERROR; a point count not be matched to a\n"
"codebook entry. The new decision tree is broken.\n");
exit(1);
}
entryindex[best]++;
}
for(i=0;i<nt->aux;i++)nt->p[i]/=dim;
for(i=0;i<nt->aux;i++)nt->q[i]/=dim;
/* the lengthlist builder doesn't actually deal with 0 hit entries.
So, we pack the 'sparse' hit list into a dense list, then unpack
the lengths after the build */
{
int upper=0;
long *lengthlist=_ogg_calloc(entries,sizeof(long));
for(i=0;i<entries;i++){
if(b->c->lengthlist[i]>0)
entryindex[upper++]=entryindex[i];
else{
if(entryindex[i]>1){
fprintf(stderr,"\nINTERNAL ERROR; _best matched to unused entry\n");
exit(1);
}
}
}
/* sanity check */
if(upper != target){
fprintf(stderr,"\nINTERNAL ERROR; packed the wrong number of entries\n");
exit(1);
}
build_tree_from_lengths(upper,entryindex,lengthlist);
upper=0;
for(i=0;i<entries;i++){
if(b->c->lengthlist[i]>0)
b->c->lengthlist[i]=lengthlist[upper++];
}
}
}
/* we're done. write it out. */
write_codebook(out,basename,b->c);
fprintf(stderr,"\r \nDone.\n");
return(0);
}