1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-04-30 07:39:42 +02:00
2013-10-24 04:32:13 +00:00

536 lines
19 KiB
C

/*
* TwoLAME: an optimized MPEG Audio Layer Two encoder
*
* Copyright (C) 2001-2004 Michael Cheng
* Copyright (C) 2004-2006 The TwoLAME Project
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* $Id$
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include "twolame.h"
#include "common.h"
#include "mem.h"
#include "fft.h"
#include "psycho_2.h"
/* The static variables "r", "phi_sav", "new", "old" and "oldest" have */
/* to be remembered for the unpredictability measure. For "r" and */
/* "phi_sav", the first index from the left is the channel select and */
/* the second index is the "age" of the data. */
/* The following static variables are constants. */
static const FLOAT nmt = 5.5;
static const FLOAT crit_band[27] = { 0, 100, 200, 300, 400, 510, 630, 770,
920, 1080, 1270, 1480, 1720, 2000, 2320, 2700,
3150, 3700, 4400, 5300, 6400, 7700, 9500, 12000,
15500, 25000, 30000
};
static const FLOAT bmax[27] = { 20.0, 20.0, 20.0, 20.0, 20.0, 17.0, 15.0,
10.0, 7.0, 4.4, 4.5, 4.5, 4.5, 4.5,
4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5,
4.5, 4.5, 4.5, 3.5, 3.5, 3.5
};
static void psycho_2_read_absthr(absthr, table)
FLOAT *absthr;
int table;
{
int j;
#include "psycho_2_absthr.h"
if ((table < 0) || (table > 3)) {
fprintf(stderr, "internal error: wrong table number");
return;
}
for (j = 0; j < HBLKSIZE; j++) {
absthr[j] = absthr_table[table][j];
}
return;
}
/********************************
* init psycho model 2
********************************/
psycho_2_mem *psycho_2_init(twolame_options * glopts, int sfreq)
{
psycho_2_mem *mem;
FLOAT *cbval, *rnorm;
FLOAT *window;
FLOAT *ath;
int *numlines;
int *partition;
FCB *s;
FLOAT *tmn;
int i, j, itemp2;
FLOAT freq_mult;
FLOAT temp1, ftemp2, temp3;
FLOAT bval_lo, *fthr;
int sfreq_idx;
{
mem = (psycho_2_mem *) TWOLAME_MALLOC(sizeof(psycho_2_mem));
if (!mem)
return NULL;
mem->tmn = (FLOAT *) TWOLAME_MALLOC(sizeof(DCB));
mem->s = (FCB *) TWOLAME_MALLOC(sizeof(FCBCB));
mem->lthr = (FHBLK *) TWOLAME_MALLOC(sizeof(F2HBLK));
mem->r = (F2HBLK *) TWOLAME_MALLOC(sizeof(F22HBLK));
mem->phi_sav = (F2HBLK *) TWOLAME_MALLOC(sizeof(F22HBLK));
// static int new = 0, old = 1, oldest = 0;
mem->new = 0;
mem->old = 1;
mem->oldest = 0;
mem->flush = (int) (384 * 3.0 / 2.0);
mem->syncsize = 1056;
mem->sync_flush = mem->syncsize - mem->flush;
}
{
cbval = mem->cbval;
rnorm = mem->rnorm;
window = mem->window;
ath = mem->ath;
numlines = mem->numlines;
partition = mem->partition;
s = mem->s;
tmn = mem->tmn;
fthr = mem->fthr;
}
switch (sfreq) {
case 32000:
case 16000:
sfreq_idx = 0;
break;
case 44100:
case 22050:
sfreq_idx = 1;
break;
case 48000:
case 24000:
sfreq_idx = 2;
break;
default:
fprintf(stderr, "error, invalid sampling frequency: %d Hz\n", sfreq);
return NULL;
}
fprintf(stderr, "absthr[][] sampling frequency index: %d\n", sfreq_idx);
psycho_2_read_absthr(mem->absthr, sfreq_idx);
/* calculate HANN window coefficients */
/* for(i=0;i<BLKSIZE;i++)window[i]=0.5*(1-cos(2.0*PI*i/(BLKSIZE-1.0))); */
for (i = 0; i < BLKSIZE; i++)
window[i] = 0.5 * (1 - cos(2.0 * PI * (i - 0.5) / BLKSIZE));
/* reset states used in unpredictability measure */
for (i = 0; i < HBLKSIZE; i++) {
mem->r[0][0][i] = mem->r[1][0][i] = mem->r[0][1][i] = mem->r[1][1][i] = 0;
mem->phi_sav[0][0][i] = mem->phi_sav[1][0][i] = 0;
mem->phi_sav[0][1][i] = mem->phi_sav[1][1][i] = 0;
mem->lthr[0][i] = 60802371420160.0;
mem->lthr[1][i] = 60802371420160.0;
}
/*****************************************************************************
* Initialization: Compute the following constants for use later *
* partition[HBLKSIZE] = the partition number associated with each *
* frequency line *
* cbval[CBANDS] = the center (average) bark value of each *
* partition *
* numlines[CBANDS] = the number of frequency lines in each partition *
* tmn[CBANDS] = tone masking noise *
*****************************************************************************/
/* compute fft frequency multiplicand */
freq_mult = (FLOAT) sfreq / (FLOAT) BLKSIZE;
/* calculate fft frequency, then bval of each line (use fthr[] as tmp storage) */
for (i = 0; i < HBLKSIZE; i++) {
temp1 = i * freq_mult;
j = 1;
while (temp1 > crit_band[j])
j++;
fthr[i] = j - 1 + (temp1 - crit_band[j - 1]) / (crit_band[j] - crit_band[j - 1]);
}
partition[0] = 0;
/* temp2 is the counter of the number of frequency lines in each partition */
itemp2 = 1;
cbval[0] = fthr[0];
bval_lo = fthr[0];
for (i = 1; i < HBLKSIZE; i++) {
if ((fthr[i] - bval_lo) > 0.33) {
partition[i] = partition[i - 1] + 1;
cbval[partition[i - 1]] = cbval[partition[i - 1]] / itemp2;
cbval[partition[i]] = fthr[i];
bval_lo = fthr[i];
numlines[partition[i - 1]] = itemp2;
itemp2 = 1;
} else {
partition[i] = partition[i - 1];
cbval[partition[i]] += fthr[i];
itemp2++;
}
}
numlines[partition[i - 1]] = itemp2;
cbval[partition[i - 1]] = cbval[partition[i - 1]] / itemp2;
/************************************************************************
* Now compute the spreading function, s[j][i], the value of the spread-*
* ing function, centered at band j, for band i, store for later use *
************************************************************************/
for (j = 0; j < CBANDS; j++) {
for (i = 0; i < CBANDS; i++) {
temp1 = (cbval[i] - cbval[j]) * 1.05;
if (temp1 >= 0.5 && temp1 <= 2.5) {
ftemp2 = temp1 - 0.5;
ftemp2 = 8.0 * (ftemp2 * ftemp2 - 2.0 * ftemp2);
} else
ftemp2 = 0.0;
temp1 += 0.474;
temp3 = 15.811389 + 7.5 * temp1 - 17.5 * sqrt((FLOAT) (1.0 + temp1 * temp1));
if (temp3 <= -100)
s[i][j] = 0;
else {
temp3 = (ftemp2 + temp3) * LN_TO_LOG10;
s[i][j] = exp(temp3);
}
}
}
/* Calculate Tone Masking Noise values */
for (j = 0; j < CBANDS; j++) {
temp1 = 15.5 + cbval[j];
tmn[j] = (temp1 > 24.5) ? temp1 : 24.5;
/* Calculate normalization factors for the net spreading functions */
rnorm[j] = 0;
for (i = 0; i < CBANDS; i++) {
rnorm[j] += s[j][i];
}
}
if (glopts->verbosity > 5) {
/* Dump All the Values to stderr and exit */
int wlow, whigh = 0;
fprintf(stderr, "psy model 2 init\n");
fprintf(stderr, "index \tnlines \twlow \twhigh \tbval \tminval \ttmn\n");
for (i = 0; i < CBANDS; i++) {
wlow = whigh + 1;
whigh = wlow + numlines[i] - 1;
fprintf(stderr, "%i \t%i \t%i \t%i \t%5.2f \t%4.2f \t%4.2f\n", i + 1, numlines[i], wlow,
whigh, cbval[i], bmax[(int) (cbval[i] + 0.5)], tmn[i]);
}
}
return (mem);
}
void psycho_2(twolame_options * glopts, short int buffer[2][1152],
short int savebuf[2][1056], FLOAT smr[2][32])
{
psycho_2_mem *mem;
unsigned int i, j, k, ch;
int new, old, oldest;
FLOAT r_prime, phi_prime;
FLOAT minthres, sum_energy;
FLOAT tb, temp1, temp2, temp3;
FLOAT *grouped_c, *grouped_e;
FLOAT *nb, *cb, *ecb, *bc;
FLOAT *cbval, *rnorm;
FLOAT *wsamp_r, *phi, *energy, *window;
FLOAT *ath, *thr, *c;
FLOAT *fthr;
FLOAT *snrtmp[2];
int *numlines;
int *partition;
FLOAT *tmn;
FCB *s;
FHBLK *lthr;
F2HBLK *r, *phi_sav;
FLOAT *absthr;
int nch = glopts->num_channels_out;
int sfreq = glopts->samplerate_out;
if (!glopts->p2mem) {
glopts->p2mem = psycho_2_init(glopts, sfreq);
}
mem = glopts->p2mem;
{
grouped_c = mem->grouped_c;
grouped_e = mem->grouped_e;
nb = mem->nb;
cb = mem->cb;
ecb = mem->ecb;
bc = mem->bc;
rnorm = mem->rnorm;
cbval = mem->cbval;
wsamp_r = mem->wsamp_r;
phi = mem->phi;
energy = mem->energy;
window = mem->window;
ath = mem->ath;
thr = mem->thr;
c = mem->c;
snrtmp[0] = mem->snrtmp[0];
snrtmp[1] = mem->snrtmp[1];
numlines = mem->numlines;
partition = mem->partition;
tmn = mem->tmn;
s = mem->s;
lthr = mem->lthr;
r = mem->r;
phi_sav = mem->phi_sav;
fthr = mem->fthr;
absthr = mem->absthr;
}
for (ch = 0; ch < nch; ch++) {
for (i = 0; i < 2; i++) {
/*****************************************************************************
* Net offset is 480 samples (1056-576) for layer 2; this is because one must*
* stagger input data by 256 samples to synchronize psychoacoustic model with*
* filter bank outputs, then stagger so that center of 1024 FFT window lines *
* up with center of 576 "new" audio samples. *
flush = 384*3.0/2.0; = 576
syncsize = 1056;
sync_flush = syncsize - flush; 480
BLKSIZE = 1024
*****************************************************************************/
{
short int *bufferp = buffer[ch];
for (j = 0; j < 480; j++) {
savebuf[ch][j] = savebuf[ch][j + mem->flush];
wsamp_r[j] = window[j] * ((FLOAT) savebuf[ch][j]);
}
for (; j < 1024; j++) {
savebuf[ch][j] = *bufferp++;
wsamp_r[j] = window[j] * ((FLOAT) savebuf[ch][j]);
}
for (; j < 1056; j++)
savebuf[ch][j] = *bufferp++;
}
/**Compute FFT****************************************************************/
psycho_2_fft(wsamp_r, energy, phi);
/*****************************************************************************
* calculate the unpredictability measure, given energy[f] and phi[f] *
*****************************************************************************/
/* only update data "age" pointers after you are done with both channels */
/* for layer 1 computations, for the layer 2 FLOAT computations, the pointers */
/* are reset automatically on the second pass */
{
if (mem->new == 0) {
mem->new = 1;
mem->oldest = 1;
} else {
mem->new = 0;
mem->oldest = 0;
}
if (mem->old == 0)
mem->old = 1;
else
mem->old = 0;
new = mem->new;
old = mem->old;
oldest = mem->oldest;
}
for (j = 0; j < HBLKSIZE; j++) {
r_prime = 2.0 * r[ch][old][j] - r[ch][oldest][j];
phi_prime = 2.0 * phi_sav[ch][old][j] - phi_sav[ch][oldest][j];
r[ch][new][j] = sqrt((FLOAT) energy[j]);
phi_sav[ch][new][j] = phi[j];
#ifdef SINCOS
{
// 12% faster
// #warning "Use __sincos"
FLOAT sphi, cphi, sprime, cprime;
__sincos((FLOAT) phi[j], &sphi, &cphi);
__sincos((FLOAT) phi_prime, &sprime, &cprime);
temp1 = r[chn][new][j] * cphi - r_prime * cprime;
temp2 = r[chn][new][j] * sphi - r_prime * sprime;
}
#else
temp1 = r[ch][new][j] * cos((FLOAT) phi[j]) - r_prime * cos((FLOAT) phi_prime);
temp2 = r[ch][new][j] * sin((FLOAT) phi[j]) - r_prime * sin((FLOAT) phi_prime);
#endif
temp3 = r[ch][new][j] + fabs((FLOAT) r_prime);
if (temp3 != 0)
c[j] = sqrt(temp1 * temp1 + temp2 * temp2) / temp3;
else
c[j] = 0;
}
/*****************************************************************************
* Calculate the grouped, energy-weighted, unpredictability measure, *
* grouped_c[], and the grouped energy. grouped_e[] *
*****************************************************************************/
for (j = 1; j < CBANDS; j++) {
grouped_e[j] = 0;
grouped_c[j] = 0;
}
grouped_e[0] = energy[0];
grouped_c[0] = energy[0] * c[0];
for (j = 1; j < HBLKSIZE; j++) {
grouped_e[partition[j]] += energy[j];
grouped_c[partition[j]] += energy[j] * c[j];
}
/*****************************************************************************
* convolve the grouped energy-weighted unpredictability measure *
* and the grouped energy with the spreading function, s[j][k] *
*****************************************************************************/
for (j = 0; j < CBANDS; j++) {
ecb[j] = 0;
cb[j] = 0;
for (k = 0; k < CBANDS; k++) {
if (s[j][k] != 0.0) {
ecb[j] += s[j][k] * grouped_e[k];
cb[j] += s[j][k] * grouped_c[k];
}
}
if (ecb[j] != 0)
cb[j] = cb[j] / ecb[j];
else
cb[j] = 0;
}
/*****************************************************************************
* Calculate the required SNR for each of the frequency partitions *
* this whole section can be accomplished by a table lookup *
*****************************************************************************/
for (j = 0; j < CBANDS; j++) {
if (cb[j] < .05)
cb[j] = 0.05;
else if (cb[j] > .5)
cb[j] = 0.5;
tb = -0.434294482 * log((FLOAT) cb[j]) - 0.301029996;
cb[j] = tb;
bc[j] = tmn[j] * tb + nmt * (1.0 - tb);
k = (int) (cbval[j] + 0.5);
bc[j] = (bc[j] > bmax[k]) ? bc[j] : bmax[k];
bc[j] = exp((FLOAT) - bc[j] * LN_TO_LOG10);
}
/*****************************************************************************
* Calculate the permissible noise energy level in each of the frequency *
* partitions. Include absolute threshold and pre-echo controls *
* this whole section can be accomplished by a table lookup *
*****************************************************************************/
for (j = 0; j < CBANDS; j++)
if (rnorm[j] && numlines[j])
nb[j] = ecb[j] * bc[j] / (rnorm[j] * numlines[j]);
else
nb[j] = 0;
for (j = 0; j < HBLKSIZE; j++) {
/* temp1 is the preliminary threshold */
temp1 = nb[partition[j]];
temp1 = (temp1 > absthr[j]) ? temp1 : absthr[j];
#ifdef LAYERI
/* do not use pre-echo control for layer 2 because it may do bad things to the */
/* MUSICAM bit allocation algorithm */
if (lay == 1) {
fthr[j] = (temp1 < lthr[ch][j]) ? temp1 : lthr[ch][j];
temp2 = temp1 * 0.00316;
fthr[j] = (temp2 > fthr[j]) ? temp2 : fthr[j];
} else
fthr[j] = temp1;
lthr[ch][j] = LXMIN * temp1;
#else
fthr[j] = temp1;
lthr[ch][j] = LXMIN * temp1;
#endif
}
/*****************************************************************************
* Translate the 512 threshold values to the 32 filter bands of the coder *
*****************************************************************************/
for (j = 0; j < 193; j += 16) {
minthres = 60802371420160.0;
sum_energy = 0.0;
for (k = 0; k < 17; k++) {
if (minthres > fthr[j + k])
minthres = fthr[j + k];
sum_energy += energy[j + k];
}
snrtmp[i][j / 16] = sum_energy / (minthres * 17.0);
snrtmp[i][j / 16] = 4.342944819 * log((FLOAT) snrtmp[i][j / 16]);
}
for (j = 208; j < (HBLKSIZE - 1); j += 16) {
minthres = 0.0;
sum_energy = 0.0;
for (k = 0; k < 17; k++) {
minthres += fthr[j + k];
sum_energy += energy[j + k];
}
snrtmp[i][j / 16] = sum_energy / minthres;
snrtmp[i][j / 16] = 4.342944819 * log((FLOAT) snrtmp[i][j / 16]);
}
/*****************************************************************************
* End of Psychoacuostic calculation loop *
*****************************************************************************/
}
for (i = 0; i < 32; i++) {
smr[ch][i] = (snrtmp[0][i] > snrtmp[1][i]) ? snrtmp[0][i] : snrtmp[1][i];
}
} // next channel
}
void psycho_2_deinit(psycho_2_mem ** mem)
{
if (mem == NULL || *mem == NULL)
return;
TWOLAME_FREE((*mem)->tmn);
TWOLAME_FREE((*mem)->s);
TWOLAME_FREE((*mem)->lthr);
TWOLAME_FREE((*mem)->r);
TWOLAME_FREE((*mem)->phi_sav);
TWOLAME_FREE((*mem));
}
// vim:ts=4:sw=4:nowrap: