mirror of
				https://github.com/cookiengineer/audacity
				synced 2025-11-03 23:53:55 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			583 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			583 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 *	TwoLAME: an optimized MPEG Audio Layer Two encoder
 | 
						|
 *
 | 
						|
 *	Copyright (C) 2001-2004 Michael Cheng
 | 
						|
 *	Copyright (C) 2004-2006 The TwoLAME Project
 | 
						|
 *
 | 
						|
 *	This library is free software; you can redistribute it and/or
 | 
						|
 *	modify it under the terms of the GNU Lesser General Public
 | 
						|
 *	License as published by the Free Software Foundation; either
 | 
						|
 *	version 2.1 of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 *	This library is distributed in the hope that it will be useful,
 | 
						|
 *	but WITHOUT ANY WARRANTY; without even the impelied warranty of
 | 
						|
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
 *	Lesser General Public License for more details.
 | 
						|
 *
 | 
						|
 *	You should have received a copy of the GNU Lesser General Public
 | 
						|
 *	License along with this library; if not, write to the Free Software
 | 
						|
 *	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 | 
						|
 *
 | 
						|
 *  $Id: psycho_3.c,v 1.3 2008-02-01 19:44:35 richardash1981 Exp $
 | 
						|
 *
 | 
						|
 */
 | 
						|
 | 
						|
#include <stdio.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <math.h>
 | 
						|
#include <string.h>
 | 
						|
 | 
						|
#include "twolame.h"
 | 
						|
#include "common.h"
 | 
						|
#include "mem.h"
 | 
						|
#include "fft.h"
 | 
						|
#include "ath.h"
 | 
						|
#include "psycho_3.h"
 | 
						|
 | 
						|
/* This is a reimplementation of psy model 1 using the ISO11172 standard.
 | 
						|
   I found the original dist10 code (which is full of pointers) to be 
 | 
						|
   a horrible thing to try and understand and debug.
 | 
						|
   This implementation is not built for speed, but is rather meant to 
 | 
						|
   clearly outline the steps specified by the standard (still, it's only
 | 
						|
   a tiny fraction slower than the dist10 code, and nothing has been optimized)
 | 
						|
   MFC Feb 2003 */
 | 
						|
 | 
						|
static inline FLOAT psycho_3_add_db (psycho_3_mem *mem, FLOAT a, FLOAT b)
 | 
						|
{
 | 
						|
  /* MFC - if the difference between a and b is large (>99), then just return the
 | 
						|
	 largest one. (about 10% of the time)
 | 
						|
	 - For differences between 0 and 99, return the largest value, but add
 | 
						|
	 in a pre-calculated difference value.	
 | 
						|
	 - the value 99 was chosen arbitarily.
 | 
						|
	 - maximum (a-b) i've seen is 572 */
 | 
						|
  FLOAT fdiff;
 | 
						|
  int idiff;
 | 
						|
  fdiff = (10.0 * (a - b));
 | 
						|
 | 
						|
  if (fdiff > 990.0) {
 | 
						|
	return a;
 | 
						|
  }
 | 
						|
  if (fdiff < -990.0) {
 | 
						|
	return (b);
 | 
						|
  }
 | 
						|
 | 
						|
  idiff = (int) fdiff;
 | 
						|
  if (idiff >= 0) {
 | 
						|
	return (a + mem->dbtable[idiff]);
 | 
						|
  }
 | 
						|
 | 
						|
  return (b + mem->dbtable[-idiff]);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
/* ISO11172 Sec D.1 Step 1 - Window with HANN and then perform the FFT */
 | 
						|
static void psycho_3_fft(FLOAT sample[BLKSIZE], FLOAT energy[BLKSIZE])
 | 
						|
{
 | 
						|
  FLOAT x_real[BLKSIZE];
 | 
						|
  int i;
 | 
						|
  static int init = 0;
 | 
						|
  static FLOAT window[FFT_SIZE];
 | 
						|
 | 
						|
  if (!init) { /* calculate window function for the Fourier transform */
 | 
						|
	FLOAT sqrt_8_over_3 = pow (8.0 / 3.0, 0.5);
 | 
						|
	for (i = 0; i < BLKSIZE; i++) {
 | 
						|
	  window[i] = sqrt_8_over_3 * 0.5 * (1 - cos (2.0 * PI * i / (BLKSIZE))) / BLKSIZE;
 | 
						|
	}
 | 
						|
	init++;
 | 
						|
  }
 | 
						|
 | 
						|
  /* convolve the samples with the hann window */
 | 
						|
  for (i = 0; i < BLKSIZE; i++)
 | 
						|
	x_real[i] = (FLOAT) (sample[i] * window[i]);
 | 
						|
  /* do the FFT */
 | 
						|
  psycho_1_fft (x_real, energy, BLKSIZE);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* Sect D.1 Step 1 - convert the energies into dB */
 | 
						|
static void psycho_3_powerdensityspectrum(FLOAT energy[BLKSIZE], FLOAT power[HBLKSIZE]) {
 | 
						|
  int i;
 | 
						|
  for (i=1;i<HBLKSIZE;i++) {	
 | 
						|
	if (energy[i] < 1E-20)
 | 
						|
	  power[i] = -200.0 + POWERNORM;
 | 
						|
	else
 | 
						|
	  power[i] = 10 * log10 (energy[i]) + POWERNORM;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* Sect D.1 Step 2 - Determine the sound pressure level in each subband */
 | 
						|
static void psycho_3_spl(FLOAT *Lsb, FLOAT *power, FLOAT *scale) {
 | 
						|
  int i;
 | 
						|
  FLOAT Xmax[SBLIMIT];
 | 
						|
 | 
						|
  for (i=0;i<SBLIMIT;i++) {
 | 
						|
	Xmax[i] = DBMIN;
 | 
						|
  }
 | 
						|
  /* Find the maximum SPL in the power spectrum */
 | 
						|
  for (i=1;i<HBLKSIZE;i++) {
 | 
						|
	int index = (i - 1) >>4;
 | 
						|
	if (Xmax[index] < power[i])
 | 
						|
	  Xmax[index] = power[i];
 | 
						|
  }
 | 
						|
 | 
						|
  /* Compare it to the sound pressure based upon the scale for this subband 
 | 
						|
	 and pick the maximum one */
 | 
						|
  for (i=0;i<SBLIMIT;i++) {
 | 
						|
	FLOAT val =	 20 * log10 (scale[i] * 32768) - 10;
 | 
						|
	Lsb[i] = MAX(Xmax[i], val);
 | 
						|
  }	 
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* Sect D.1 Step4b 
 | 
						|
   A tone within the range (start -> end), must be 7.0 dB greater than
 | 
						|
   all it's neighbours within +/- srange. Don't count its immediate neighbours. */
 | 
						|
static void psycho_3_tonal_label_range(psycho_3_mem *mem, FLOAT *power, int *tonelabel, int *maxima, FLOAT *Xtm, int start, int end, int srange) {
 | 
						|
  int j,k;
 | 
						|
 | 
						|
  for (k=start;k<end;k++)  /* Search for all the maxima in this range */
 | 
						|
	if (maxima[k] == 1) {
 | 
						|
	  tonelabel[k] = TONE; /* assume it's a TONE and then prove otherwise */
 | 
						|
	  for (j=-srange;j<=+srange;j++) /* Check the neighbours within +/- srange */
 | 
						|
	if (abs(j) > 1) /* Don't count the immediate neighbours, or itself */
 | 
						|
	  if ((power[k] - power[k+j]) < 7.0)
 | 
						|
		tonelabel[k] = 0; /* Not greater by 7dB, therefore not a tone */
 | 
						|
	  if (tonelabel[k] == TONE) {
 | 
						|
	/* Calculate the sound pressure level for this tone by summing 
 | 
						|
	   the adjacent spectral lines
 | 
						|
	   Xtm[k] = 10 * log10( pow(10.0, 0.1*power[k-1]) + pow(10.0, 0.1*power[k]) 
 | 
						|
						  + pow(10.0, 0.1*power[k+1]) ); */
 | 
						|
	FLOAT temp = psycho_3_add_db(mem, power[k-1], power[k]);
 | 
						|
	Xtm[k] = psycho_3_add_db(mem, temp, power[k+1]);
 | 
						|
	
 | 
						|
	/* *ALL* spectral lines within +/- srange are set to -inf dB 
 | 
						|
	   So that when we do the noise calculate, they are not counted */
 | 
						|
	for (j=-srange;j<=+srange;j++)
 | 
						|
		power[k+j] = DBMIN;
 | 
						|
	  }
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* Sect D.1 Step 4 Label the Tonal Components */
 | 
						|
static void psycho_3_tonal_label (psycho_3_mem *mem, FLOAT power[HBLKSIZE], int *tonelabel, FLOAT Xtm[HBLKSIZE])
 | 
						|
{
 | 
						|
  int i;
 | 
						|
  int maxima[HBLKSIZE];
 | 
						|
 | 
						|
  /* Find the maxima as per ISO11172 D.1.4.a */
 | 
						|
  maxima[0]=maxima[HBLKSIZE-1]=0;
 | 
						|
  tonelabel[0]=tonelabel[HBLKSIZE-1]=0;
 | 
						|
  Xtm[0] = Xtm[HBLKSIZE-1] = DBMIN;
 | 
						|
  for (i=1;i<HBLKSIZE-1;i++) {
 | 
						|
	tonelabel[i] = 0;
 | 
						|
	Xtm[i] = DBMIN;
 | 
						|
	if (power[i]>power[i-1] && power[i]>power[i+1]) /* The first criteria for a maximum */
 | 
						|
	  maxima[i]=1;
 | 
						|
	else
 | 
						|
	  maxima[i]=0;
 | 
						|
  }
 | 
						|
 | 
						|
  {
 | 
						|
	/* Now find the tones as per ISO11172 D.1 Step4.b */
 | 
						|
	/* The standard is a bit vague (surprise surprise).
 | 
						|
	   So I'm going to assume that 
 | 
						|
	   - a tone must be 7dB greater than *all* the relevant neighbours 
 | 
						|
	   - once a tone is found, the neighbours are immediately set to -inf dB
 | 
						|
	*/
 | 
						|
 | 
						|
	psycho_3_tonal_label_range(mem, power, tonelabel, maxima, Xtm, 2, 63, 2);
 | 
						|
	psycho_3_tonal_label_range(mem, power, tonelabel, maxima, Xtm, 63,127,3);
 | 
						|
	psycho_3_tonal_label_range(mem, power, tonelabel, maxima, Xtm, 127,255,6);
 | 
						|
	psycho_3_tonal_label_range(mem, power, tonelabel, maxima, Xtm, 255,500,12);
 | 
						|
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
 | 
						|
static void psycho_3_init_add_db (psycho_3_mem *mem)
 | 
						|
{
 | 
						|
  int i;
 | 
						|
  FLOAT x;
 | 
						|
  for (i = 0; i < DBTAB; i++) {
 | 
						|
	x = (FLOAT) i / 10.0;
 | 
						|
	mem->dbtable[i] = 10 * log10 (1 + pow (10.0, x / 10.0)) - x;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* D.1 Step 4.c Labelling non-tonal (noise) components 
 | 
						|
   Sum the energies in each critical band (the tone energies have been removed 
 | 
						|
   during the tone labelling).
 | 
						|
   Find the "geometric mean" of these energies - i.e. find the best spot to put the
 | 
						|
   sum of energies within this critical band. */
 | 
						|
static void psycho_3_noise_label (psycho_3_mem *mem, FLOAT power[HBLKSIZE], FLOAT energy[BLKSIZE], int *tonelabel, int *noiselabel, FLOAT Xnm[HBLKSIZE]) {
 | 
						|
  int i,j;
 | 
						|
  int cbands = mem->cbands;
 | 
						|
  int *cbandindex = mem->cbandindex;
 | 
						|
 | 
						|
  Xnm[0] = DBMIN;
 | 
						|
  for (i=0;i<cbands;i++) {
 | 
						|
	/* for each critical band */
 | 
						|
	FLOAT sum = DBMIN;
 | 
						|
	FLOAT esum=0;
 | 
						|
	FLOAT centreweight = 0;
 | 
						|
	int centre;
 | 
						|
	for (j=cbandindex[i]; j<cbandindex[i+1]; j++) {
 | 
						|
	  Xnm[j] = DBMIN;
 | 
						|
	  /* go through all the spectral lines within the critical band, 
 | 
						|
	 adding the energies. The tone energies have already been removed */
 | 
						|
	  if (power[j] != DBMIN) {
 | 
						|
	/* Found a noise energy, add it to the sum */
 | 
						|
	sum = psycho_3_add_db(mem, power[j], sum);
 | 
						|
	
 | 
						|
	/* calculations for the geometric mean 
 | 
						|
	   FIXME MFC Feb 2003: Would it just be easier to
 | 
						|
	   do the *whole* of psycho_1 in the energy domain rather than 
 | 
						|
	   in the dB domain? 
 | 
						|
	   FIXME: This is just a lazy arsed arithmetic mean. Don't know 
 | 
						|
	   if it's really going to make that much difference */
 | 
						|
	esum += energy[j]; /* Calculate the sum of energies */
 | 
						|
	centreweight += (j - cbandindex[i]) * energy[j]; /* And the energy moment */
 | 
						|
	  }
 | 
						|
	}
 | 
						|
 | 
						|
	/* MEANX, crash on AMD64 without this hack.
 | 
						|
	   See https://sourceforge.net/tracker/?func=detail&atid=735435&aid=1453400&group_id=136040
 | 
						|
	   Probably a better way to do this */
 | 
						|
	if (sum<=DBMIN || esum<0.00001) 
 | 
						|
	  /* If the energy sum is really small, just pretend the noise occurs 
 | 
						|
	 in the centre frequency line */
 | 
						|
	  centre = (cbandindex[i] + cbandindex[i+1])/2;
 | 
						|
	else
 | 
						|
	{
 | 
						|
	  /* Otherwise, work out the mean position of the noise, and put it there. */
 | 
						|
	  centre = cbandindex[i] + (int)(centreweight/esum);
 | 
						|
	}
 | 
						|
	// /MEANX
 | 
						|
	Xnm[centre] = sum;
 | 
						|
	noiselabel[centre] = NOISE;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* ISO11172 D.1 Step 5
 | 
						|
   Get rid of noise/tones that aren't greater than the ATH
 | 
						|
   If two tones are within 0.5bark, then delete the tone with the lower energy */
 | 
						|
static void psycho_3_decimation(FLOAT *ath, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark) {
 | 
						|
  int i;
 | 
						|
 | 
						|
  /* Delete components which aren't above the ATH */
 | 
						|
  for (i=1;i<HBLKSIZE;i++) {
 | 
						|
	if (noiselabel[i]==NOISE) {
 | 
						|
	  if (Xnm[i] < ath[i]) {
 | 
						|
	/* this masker isn't above the ATH : delete it */
 | 
						|
	Xnm[i] = DBMIN;
 | 
						|
	noiselabel[i]=0;
 | 
						|
	  }
 | 
						|
	} 
 | 
						|
	if (tonelabel[i] == TONE) {
 | 
						|
	  if (Xtm[i] < ath[i]) {
 | 
						|
	Xtm[i] = DBMIN;
 | 
						|
	tonelabel[i]=0;
 | 
						|
	  }
 | 
						|
	}
 | 
						|
  }
 | 
						|
  /* Search for tones that are within 0.5 bark */
 | 
						|
  /* MFC FIXME Feb 2003: haven't done this yet */
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* ISO11172 Sect D.1 Step 6
 | 
						|
   Calculation of individual masking thresholds
 | 
						|
   Work out how each of the tones&noises maskes other frequencies 
 | 
						|
   NOTE: Only a subset of other frequencies is checked. According to the 
 | 
						|
   standard different subbands are subsampled to different amounts.
 | 
						|
   See psycho_3_init and freq_subset */
 | 
						|
static void psycho_3_threshold(psycho_3_mem *mem, FLOAT *LTg, int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm, FLOAT *bark, FLOAT *ath, int bit_rate, int *freq_subset) {
 | 
						|
  int i,j,k;
 | 
						|
  FLOAT LTtm[SUBSIZE];
 | 
						|
  FLOAT LTnm[SUBSIZE];
 | 
						|
 | 
						|
  for (i=0;i<SUBSIZE;i++) {
 | 
						|
	LTtm[i] = DBMIN;
 | 
						|
	LTnm[i] = DBMIN;
 | 
						|
  }
 | 
						|
  /* Loop over the entire spectrum and find every noise and tone 
 | 
						|
	 And then with each noise/tone work out how it masks 
 | 
						|
	 the spectral lines around it */
 | 
						|
  for (k=1;k<HBLKSIZE;k++) {
 | 
						|
	/* Find every tone */
 | 
						|
	if (tonelabel[k]==TONE) {
 | 
						|
	  for (j=0;j<SUBSIZE;j++) {
 | 
						|
	/* figure out how it masks the levels around it */	
 | 
						|
	FLOAT dz = bark[freq_subset[j]] - bark[k];	   
 | 
						|
	if (dz >= -3.0 && dz < 8.0) {
 | 
						|
	  FLOAT vf;
 | 
						|
	  FLOAT av = -1.525 - 0.275 * bark[k] - 4.5 + Xtm[k];
 | 
						|
	  /* masking function for lower & upper slopes */
 | 
						|
	  if (dz < -1)
 | 
						|
		vf = 17 * (dz + 1) - (0.4 * Xtm[k] + 6);
 | 
						|
	  else if (dz < 0)
 | 
						|
		vf = (0.4 * Xtm[k] + 6) * dz;
 | 
						|
	  else if (dz < 1)
 | 
						|
		vf = (-17 * dz);
 | 
						|
	  else
 | 
						|
		vf = -(dz - 1) * (17 - 0.15 * Xtm[k]) - 17;
 | 
						|
	  LTtm[j] = psycho_3_add_db (mem, LTtm[j], av + vf);
 | 
						|
	}	 
 | 
						|
	  }
 | 
						|
	}
 | 
						|
 | 
						|
	/* find every noise label */
 | 
						|
	if (noiselabel[k]==NOISE) {
 | 
						|
	  for (j=0;j<SUBSIZE;j++) {
 | 
						|
	/* figure out how it masks the levels around it */	
 | 
						|
	FLOAT dz = bark[freq_subset[j]] - bark[k];	   
 | 
						|
	if (dz >= -3.0 && dz < 8.0) {
 | 
						|
	  FLOAT vf;
 | 
						|
	  FLOAT av = -1.525 - 0.175 * bark[k] - 0.5 + Xnm[k]; 
 | 
						|
	  /* masking function for lower & upper slopes */
 | 
						|
	  if (dz < -1)
 | 
						|
		vf = 17 * (dz + 1) - (0.4 * Xnm[k] + 6);
 | 
						|
	  else if (dz < 0)
 | 
						|
		vf = (0.4 * Xnm[k] + 6) * dz;
 | 
						|
	  else if (dz < 1)
 | 
						|
		vf = (-17 * dz);
 | 
						|
	  else
 | 
						|
		vf = -(dz - 1) * (17 - 0.15 * Xnm[k]) - 17;
 | 
						|
	  LTnm[j] = psycho_3_add_db (mem, LTnm[j], av + vf);
 | 
						|
	}	 
 | 
						|
	  }
 | 
						|
	}
 | 
						|
  }
 | 
						|
 | 
						|
  /* ISO11172 D.1 Step 7
 | 
						|
	 Calculate the global masking threhold */
 | 
						|
  for (i=0;i<SUBSIZE;i++) {
 | 
						|
	LTg[i] = psycho_3_add_db(mem, LTnm[i], LTtm[i]);
 | 
						|
	if (bit_rate < 96)
 | 
						|
	  LTg[i] = psycho_3_add_db(mem, ath[freq_subset[i]], LTg[i]);
 | 
						|
	else
 | 
						|
	  LTg[i] = psycho_3_add_db(mem, ath[freq_subset[i]]-12.0, LTg[i]);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* Find the minimum LTg for each subband. ISO11172 Sec D.1 Step 8 */
 | 
						|
static void psycho_3_minimummasking(FLOAT *LTg, FLOAT *LTmin, int *freq_subset) {
 | 
						|
  int i;
 | 
						|
 | 
						|
  for (i=0;i<SBLIMIT;i++)
 | 
						|
	LTmin[i] = 999999.9;
 | 
						|
 | 
						|
  for (i=0;i<SUBSIZE;i++) {
 | 
						|
	int index = freq_subset[i]>>4;
 | 
						|
	if (LTmin[index] > LTg[i]) {
 | 
						|
	  LTmin[index] = LTg[i];
 | 
						|
	}
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
/* ISO11172 Sect D.1 Step 9
 | 
						|
   Calculate the signal-to-mask ratio 
 | 
						|
   MFC FIXME Feb 2003 for better calling from
 | 
						|
   twolame, add a "float SMR[]" array and return it */
 | 
						|
static void psycho_3_smr(FLOAT *LTmin, FLOAT *Lsb) {
 | 
						|
  int i;
 | 
						|
  for (i=0;i<SBLIMIT;i++) {
 | 
						|
	LTmin[i] = Lsb[i] - LTmin[i];
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static psycho_3_mem *psycho_3_init( twolame_options *glopts ) {
 | 
						|
  int i;
 | 
						|
  int cbase = 0; /* current base index for the bark range calculation */
 | 
						|
  FLOAT sfreq;
 | 
						|
  psycho_3_mem *mem;
 | 
						|
  int numlines[HBLKSIZE];
 | 
						|
  FLOAT cbval[HBLKSIZE];
 | 
						|
  int partition[HBLKSIZE];
 | 
						|
  int *freq_subset;
 | 
						|
  FLOAT *bark, *ath;
 | 
						|
  int cbands=0;
 | 
						|
  int *cbandindex;
 | 
						|
 | 
						|
  mem = (psycho_3_mem *)TWOLAME_MALLOC(sizeof(psycho_3_mem));
 | 
						|
  mem->off[0]=mem->off[1]=256;
 | 
						|
  freq_subset = mem->freq_subset;
 | 
						|
  bark = mem->bark;
 | 
						|
  ath = mem->ath;
 | 
						|
  cbandindex = mem->cbandindex;
 | 
						|
 | 
						|
  /* Initialise the tables for the adding dB */
 | 
						|
  psycho_3_init_add_db(mem);
 | 
						|
  
 | 
						|
  /* For each spectral line calculate the bark and the ATH (in dB) */
 | 
						|
  sfreq = (FLOAT)glopts->samplerate_out;
 | 
						|
  for (i=1;i<HBLKSIZE; i++) {
 | 
						|
	FLOAT freq = i * sfreq/BLKSIZE;
 | 
						|
	bark[i] = ath_freq2bark(freq);
 | 
						|
	ath[i] = ath_db(freq,glopts->athlevel);
 | 
						|
  }
 | 
						|
  
 | 
						|
  { /* Work out the critical bands
 | 
						|
	   Starting from line 0, all lines within 1 bark of the starting
 | 
						|
	   bark are added to the same critical band. When a line is greater
 | 
						|
	   by 1.0 of a bark, start a new critical band.	 */
 | 
						|
	
 | 
						|
	cbandindex[0] = 1;
 | 
						|
	for (i=1;i<HBLKSIZE;i++) {
 | 
						|
	  if ((bark[i] - bark[cbase]) > 1.0) { /* 1 critical band? 1 bark? */
 | 
						|
	/* this frequency line is too different from the starting line,
 | 
						|
	   (in terms of the bark distance)
 | 
						|
	   so make this spectral line the first member of the next critical band */
 | 
						|
	cbase = i; /* Start the new critical band from this frequency line */
 | 
						|
	cbands++;
 | 
						|
	cbandindex[cbands] = cbase;
 | 
						|
	  } 
 | 
						|
	  /* partition[i] tells us which critical band the i'th frequency line is in */
 | 
						|
	  partition[i] = cbands;
 | 
						|
	  /* keep a count of how many frequency lines are in each partition */
 | 
						|
	  numlines[cbands]++;
 | 
						|
	}
 | 
						|
	
 | 
						|
	cbands++;
 | 
						|
	cbandindex[cbands] = 513; /* Set the top of the last critical band */
 | 
						|
	mem->cbands = cbands; // make a not of the number of cbands
 | 
						|
 | 
						|
	/* For each crtical band calculate the average bark value 
 | 
						|
	   cbval [central bark value] */
 | 
						|
	for (i=1;i<HBLKSIZE;i++) 
 | 
						|
	  cbval[partition[i]] += bark[i]; /* sum up all the bark values */
 | 
						|
	for (i=1;i<CBANDS;i++) {
 | 
						|
	  if (numlines[i] != 0)
 | 
						|
	cbval[i] /= numlines[i]; /* divide by the number of values */
 | 
						|
	  else {
 | 
						|
	cbval[i]=0; /* this isn't a partition */
 | 
						|
	  }
 | 
						|
	}	  
 | 
						|
  }
 | 
						|
  
 | 
						|
  {
 | 
						|
	/* For Step6 - For the calculation of individual masking thresholds
 | 
						|
	   the spectral lines are subsampled 
 | 
						|
	   i.e. no need to work out the masking for every single spectral line.
 | 
						|
	   Depending upon which subband the calculation is for, you
 | 
						|
	   can skip a number of lines 
 | 
						|
	   There are 16 lines per subband -> 32 * 16 = 512 
 | 
						|
	   Subband 0-2 : Every line		   (3 * 16 = 48 lines)
 | 
						|
	   Subband 3-5 : Every Second line (3 * 16/2 = 24 lines)
 | 
						|
	   Subband 6-11 : Every 4th line   (6 * 16/4 = 24 lines)
 | 
						|
	   Subband 12-31 : Every 12th line (20 * 16/8 = 40 lines) 
 | 
						|
	   
 | 
						|
	   create this subset of frequencies (freq_subset) */
 | 
						|
	int freq_index=0;
 | 
						|
	for (i=1;i<(3*16)+1;i++) 
 | 
						|
	  freq_subset[freq_index++] = i;
 | 
						|
	for (;i<(6*16)+1;i+=2)
 | 
						|
	  freq_subset[freq_index++] = i;
 | 
						|
	for (;i<(12*16)+1;i+=4)
 | 
						|
	  freq_subset[freq_index++] = i;
 | 
						|
	for (;i<(32*16)+1;i+=8)
 | 
						|
	  freq_subset[freq_index++] = i;
 | 
						|
  }
 | 
						|
 | 
						|
  if (glopts->verbosity > 4) {
 | 
						|
	fprintf(stdout,"%i critical bands\n",cbands);
 | 
						|
	for (i=0;i<cbands;i++)
 | 
						|
	  fprintf(stdout,"cband %i spectral line index %i\n",i,cbandindex[i]);
 | 
						|
	fprintf(stdout,"%i Subsampled spectral lines\n",SUBSIZE);
 | 
						|
	for (i=0;i<SUBSIZE;i++) 
 | 
						|
	  fprintf(stdout,"%i Spectral line %i Bark %.2f\n",i,freq_subset[i], bark[freq_subset[i]]);
 | 
						|
  }
 | 
						|
 | 
						|
  return(mem);
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
static void psycho_3_dump(int *tonelabel, FLOAT *Xtm, int *noiselabel, FLOAT *Xnm) {
 | 
						|
  int i;
 | 
						|
  fprintf(stdout,"3 Ton:");
 | 
						|
  for (i=1;i<HAN_SIZE;i++) {
 | 
						|
	if (tonelabel[i] == TONE)
 | 
						|
	  fprintf(stdout,"[%i] %3.0f ",i,Xtm[i]);
 | 
						|
  }
 | 
						|
  fprintf(stdout,"\n");	 
 | 
						|
 | 
						|
  fprintf(stdout,"3 Nos:");
 | 
						|
  for (i=1;i<HAN_SIZE;i++) {
 | 
						|
	if (noiselabel[i] == NOISE)
 | 
						|
	  fprintf(stdout,"[%i] %3.0f ",i,Xnm[i]);
 | 
						|
  }
 | 
						|
  fprintf(stdout,"\n");
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void psycho_3 (twolame_options *glopts, short int buffer[2][1152], FLOAT scale[2][32], FLOAT ltmin[2][32])
 | 
						|
{
 | 
						|
  psycho_3_mem *mem;
 | 
						|
  int nch = glopts->num_channels_out;
 | 
						|
  int k, i;
 | 
						|
  FLOAT sample[BLKSIZE];
 | 
						|
 | 
						|
  FLOAT energy[BLKSIZE];
 | 
						|
  FLOAT power[HBLKSIZE];
 | 
						|
  FLOAT Xtm[HBLKSIZE], Xnm[HBLKSIZE];
 | 
						|
  int tonelabel[HBLKSIZE], noiselabel[HBLKSIZE];
 | 
						|
  FLOAT LTg[HBLKSIZE];
 | 
						|
  FLOAT Lsb[SBLIMIT];
 | 
						|
 | 
						|
  if (!glopts->p3mem) {		
 | 
						|
	glopts->p3mem = psycho_3_init( glopts );
 | 
						|
  }
 | 
						|
  mem = glopts->p3mem;
 | 
						|
 | 
						|
  for (k = 0; k < nch; k++) {
 | 
						|
	int ok = mem->off[k] % 1408;
 | 
						|
	for (i = 0; i < 1152; i++) {
 | 
						|
	  mem->fft_buf[k][ok++] = (FLOAT) buffer[k][i] / SCALE;
 | 
						|
	  if (ok >= 1408)
 | 
						|
	ok = 0;
 | 
						|
	}
 | 
						|
	ok = (mem->off[k] + 1216) % 1408;
 | 
						|
	for (i = 0; i < BLKSIZE; i++) {
 | 
						|
	  sample[i] = mem->fft_buf[k][ok++];
 | 
						|
	  if (ok >= 1408)
 | 
						|
	ok = 0;
 | 
						|
	}
 | 
						|
 | 
						|
	mem->off[k] += 1152;
 | 
						|
	mem->off[k] %= 1408;
 | 
						|
 | 
						|
	psycho_3_fft(sample, energy);
 | 
						|
	psycho_3_powerdensityspectrum(energy, power);	 
 | 
						|
	psycho_3_spl(Lsb, power, &scale[k][0]);
 | 
						|
	psycho_3_tonal_label (mem, power, tonelabel, Xtm);
 | 
						|
	psycho_3_noise_label (mem, power, energy, tonelabel, noiselabel, Xnm);
 | 
						|
	if (glopts->verbosity > 8)
 | 
						|
	  psycho_3_dump(tonelabel, Xtm, noiselabel, Xnm);
 | 
						|
	psycho_3_decimation(mem->ath, tonelabel, Xtm, noiselabel, Xnm, mem->bark);
 | 
						|
	psycho_3_threshold(mem, LTg, tonelabel, Xtm, noiselabel, Xnm, mem->bark, mem->ath, glopts->bitrate / nch, mem->freq_subset);
 | 
						|
	psycho_3_minimummasking(LTg, <min[k][0], mem->freq_subset);
 | 
						|
	psycho_3_smr(<min[k][0], Lsb);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
void psycho_3_deinit(psycho_3_mem **mem) {
 | 
						|
 | 
						|
	if (mem==NULL||*mem==NULL) return;
 | 
						|
 | 
						|
	TWOLAME_FREE( *mem );
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
// vim:ts=4:sw=4:nowrap: 
 |