1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-10-26 00:03:52 +02:00

Andrew Hallendorff's SSE accelerated Equalization.

This commit is contained in:
james.k.crook@gmail.com
2014-01-16 17:55:35 +00:00
parent 3f59126949
commit d847ee7162
13 changed files with 3161 additions and 307 deletions

View File

@@ -30,6 +30,10 @@
#ifndef __EXPERIMENTAL__ #ifndef __EXPERIMENTAL__
#define __EXPERIMENTAL__ #define __EXPERIMENTAL__
// ACH 08 Jan 2014
// EQ accelerated code
//#define EXPERIMENTAL_EQ_SSE_THREADED
// LLL, 09 Nov 2013: // LLL, 09 Nov 2013:
// Allow all WASAPI devices, not just loopback // Allow all WASAPI devices, not just loopback
#define EXPERIMENTAL_FULL_WASAPI #define EXPERIMENTAL_FULL_WASAPI

View File

@@ -1,54 +1,59 @@
/* /*
* Program: REALFFTF.C * Program: REALFFTF.C
* Author: Philip Van Baren * Author: Philip Van Baren
* Date: 2 September 1993 * Date: 2 September 1993
* *
* Description: These routines perform an FFT on real data to get a conjugate-symmetric * Description: These routines perform an FFT on real data to get a conjugate-symmetric
* output, and an inverse FFT on conjugate-symmetric input to get a real * output, and an inverse FFT on conjugate-symmetric input to get a real
* output sequence. * output sequence.
* *
* This code is for floating point data. * This code is for floating point data.
* *
* Modified 8/19/1998 by Philip Van Baren * Modified 8/19/1998 by Philip Van Baren
* - made the InitializeFFT and EndFFT routines take a structure * - made the InitializeFFT and EndFFT routines take a structure
* holding the length and pointers to the BitReversed and SinTable * holding the length and pointers to the BitReversed and SinTable
* tables. * tables.
* Modified 5/23/2009 by Philip Van Baren * Modified 5/23/2009 by Philip Van Baren
* - Added GetFFT and ReleaseFFT routines to retain common SinTable * - Added GetFFT and ReleaseFFT routines to retain common SinTable
* and BitReversed tables so they don't need to be reallocated * and BitReversed tables so they don't need to be reallocated
* and recomputed on every call. * and recomputed on every call.
* - Added Reorder* functions to undo the bit-reversal * - Added Reorder* functions to undo the bit-reversal
* *
* Copyright (C) 2009 Philip VanBaren * Copyright (C) 2009 Philip VanBaren
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or * the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version. * (at your option) any later version.
* *
* This program is distributed in the hope that it will be useful, * This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of * but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details. * GNU General Public License for more details.
* *
* You should have received a copy of the GNU General Public License * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software * along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/ */
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <math.h> #include <math.h>
#include "Experimental.h"
#include "RealFFTf.h" #include "RealFFTf.h"
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
#include "RealFFTf48x.h"
#endif
#ifndef M_PI #ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */ #define M_PI 3.14159265358979323846 /* pi */
#endif #endif
/* /*
* Initialize the Sine table and Twiddle pointers (bit-reversed pointers) * Initialize the Sine table and Twiddle pointers (bit-reversed pointers)
* for the FFT routine. * for the FFT routine.
*/ */
HFFT InitializeFFT(int fftlen) HFFT InitializeFFT(int fftlen)
{ {
int i; int i;
@@ -62,10 +67,10 @@ HFFT InitializeFFT(int fftlen)
exit(8); exit(8);
} }
/* /*
* FFT size is only half the number of data points * FFT size is only half the number of data points
* The full FFT output can be reconstructed from this FFT's output. * The full FFT output can be reconstructed from this FFT's output.
* (This optimization can be made since the data is real.) * (This optimization can be made since the data is real.)
*/ */
h->Points = fftlen/2; h->Points = fftlen/2;
if((h->SinTable=(fft_type *)malloc(2*h->Points*sizeof(fft_type)))==NULL) if((h->SinTable=(fft_type *)malloc(2*h->Points*sizeof(fft_type)))==NULL)
@@ -73,6 +78,7 @@ HFFT InitializeFFT(int fftlen)
fprintf(stderr,"Error allocating memory for Sine table.\n"); fprintf(stderr,"Error allocating memory for Sine table.\n");
exit(8); exit(8);
} }
if((h->BitReversed=(int *)malloc(h->Points*sizeof(int)))==NULL) if((h->BitReversed=(int *)malloc(h->Points*sizeof(int)))==NULL)
{ {
fprintf(stderr,"Error allocating memory for BitReversed.\n"); fprintf(stderr,"Error allocating memory for BitReversed.\n");
@@ -93,12 +99,21 @@ HFFT InitializeFFT(int fftlen)
h->SinTable[h->BitReversed[i] ]=(fft_type)-sin(2*M_PI*i/(2*h->Points)); h->SinTable[h->BitReversed[i] ]=(fft_type)-sin(2*M_PI*i/(2*h->Points));
h->SinTable[h->BitReversed[i]+1]=(fft_type)-cos(2*M_PI*i/(2*h->Points)); h->SinTable[h->BitReversed[i]+1]=(fft_type)-cos(2*M_PI*i/(2*h->Points));
} }
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
// new SSE FFT routines work on live data
for(i=0;i<32;i++)
if((1<<i)&fftlen)
h->pow2Bits=i;
InitializeFFT1x(fftlen);
#endif
return h; return h;
} }
/* /*
* Free up the memory allotted for Sin table and Twiddle Pointers * Free up the memory allotted for Sin table and Twiddle Pointers
*/ */
void EndFFT(HFFT h) void EndFFT(HFFT h)
{ {
if(h->Points>0) { if(h->Points>0) {
@@ -157,23 +172,23 @@ void CleanupFFT()
} }
/* /*
* Forward FFT routine. Must call InitializeFFT(fftlen) first! * Forward FFT routine. Must call InitializeFFT(fftlen) first!
* *
* Note: Output is BIT-REVERSED! so you must use the BitReversed to * Note: Output is BIT-REVERSED! so you must use the BitReversed to
* get legible output, (i.e. Real_i = buffer[ h->BitReversed[i] ] * get legible output, (i.e. Real_i = buffer[ h->BitReversed[i] ]
* Imag_i = buffer[ h->BitReversed[i]+1 ] ) * Imag_i = buffer[ h->BitReversed[i]+1 ] )
* Input is in normal order. * Input is in normal order.
* *
* Output buffer[0] is the DC bin, and output buffer[1] is the Fs/2 bin * Output buffer[0] is the DC bin, and output buffer[1] is the Fs/2 bin
* - this can be done because both values will always be real only * - this can be done because both values will always be real only
* - this allows us to not have to allocate an extra complex value for the Fs/2 bin * - this allows us to not have to allocate an extra complex value for the Fs/2 bin
* *
* Note: The scaling on this is done according to the standard FFT definition, * Note: The scaling on this is done according to the standard FFT definition,
* so a unit amplitude DC signal will output an amplitude of (N) * so a unit amplitude DC signal will output an amplitude of (N)
* (Older revisions would progressively scale the input, so the output * (Older revisions would progressively scale the input, so the output
* values would be similar in amplitude to the input values, which is * values would be similar in amplitude to the input values, which is
* good when using fixed point arithmetic) * good when using fixed point arithmetic)
*/ */
void RealFFTf(fft_type *buffer,HFFT h) void RealFFTf(fft_type *buffer,HFFT h)
{ {
fft_type *A,*B; fft_type *A,*B;
@@ -186,12 +201,12 @@ void RealFFTf(fft_type *buffer,HFFT h)
int ButterfliesPerGroup=h->Points/2; int ButterfliesPerGroup=h->Points/2;
/* /*
* Butterfly: * Butterfly:
* Ain-----Aout * Ain-----Aout
* \ / * \ /
* / \ * / \
* Bin-----Bout * Bin-----Bout
*/ */
endptr1=buffer+h->Points*2; endptr1=buffer+h->Points*2;
@@ -258,24 +273,24 @@ void RealFFTf(fft_type *buffer,HFFT h)
/* Description: This routine performs an inverse FFT to real data. /* Description: This routine performs an inverse FFT to real data.
* This code is for floating point data. * This code is for floating point data.
* *
* Note: Output is BIT-REVERSED! so you must use the BitReversed to * Note: Output is BIT-REVERSED! so you must use the BitReversed to
* get legible output, (i.e. wave[2*i] = buffer[ BitReversed[i] ] * get legible output, (i.e. wave[2*i] = buffer[ BitReversed[i] ]
* wave[2*i+1] = buffer[ BitReversed[i]+1 ] ) * wave[2*i+1] = buffer[ BitReversed[i]+1 ] )
* Input is in normal order, interleaved (real,imaginary) complex data * Input is in normal order, interleaved (real,imaginary) complex data
* You must call InitializeFFT(fftlen) first to initialize some buffers! * You must call InitializeFFT(fftlen) first to initialize some buffers!
* *
* Input buffer[0] is the DC bin, and input buffer[1] is the Fs/2 bin * Input buffer[0] is the DC bin, and input buffer[1] is the Fs/2 bin
* - this can be done because both values will always be real only * - this can be done because both values will always be real only
* - this allows us to not have to allocate an extra complex value for the Fs/2 bin * - this allows us to not have to allocate an extra complex value for the Fs/2 bin
* *
* Note: The scaling on this is done according to the standard FFT definition, * Note: The scaling on this is done according to the standard FFT definition,
* so a unit amplitude DC signal will output an amplitude of (N) * so a unit amplitude DC signal will output an amplitude of (N)
* (Older revisions would progressively scale the input, so the output * (Older revisions would progressively scale the input, so the output
* values would be similar in amplitude to the input values, which is * values would be similar in amplitude to the input values, which is
* good when using fixed point arithmetic) * good when using fixed point arithmetic)
*/ */
void InverseRealFFTf(fft_type *buffer,HFFT h) void InverseRealFFTf(fft_type *buffer,HFFT h)
{ {
fft_type *A,*B; fft_type *A,*B;
@@ -323,12 +338,12 @@ void InverseRealFFTf(fft_type *buffer,HFFT h)
buffer[1]=v2; buffer[1]=v2;
/* /*
* Butterfly: * Butterfly:
* Ain-----Aout * Ain-----Aout
* \ / * \ /
* / \ * / \
* Bin-----Bout * Bin-----Bout
*/ */
endptr1=buffer+h->Points*2; endptr1=buffer+h->Points*2;

View File

@@ -6,6 +6,9 @@ typedef struct FFTParamType {
int *BitReversed; int *BitReversed;
fft_type *SinTable; fft_type *SinTable;
int Points; int Points;
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
int pow2Bits;
#endif
} FFTParam; } FFTParam;
#define HFFT FFTParam * #define HFFT FFTParam *

754
src/RealFFTf48x.cpp Normal file
View File

@@ -0,0 +1,754 @@
/**********************************************************************
Audacity: A Digital Audio Editor
RealFFT48x.cpp
Philip Van Baren
Andrew Hallendorff (SSE Mods)
*******************************************************************//**
\file RealFFT48x.cpp
\brief Real FFT with SSE acceleration.
*//****************************************************************/
/*
* Program: REALFFTF.C
* Author: Philip Van Baren
* Date: 2 September 1993
*
* Description: These routines perform an FFT on real data to get a conjugate-symmetric
* output, and an inverse FFT on conjugate-symmetric input to get a real
* output sequence.
*
* This code is for floating point data.
*
* Modified 8/19/1998 by Philip Van Baren
* - made the InitializeFFT and EndFFT routines take a structure
* holding the length and pointers to the BitReversed and SinTable
* tables.
* Modified 5/23/2009 by Philip Van Baren
* - Added GetFFT and ReleaseFFT routines to retain common SinTable
* and BitReversed tables so they don't need to be reallocated
* and recomputed on every call.
* - Added Reorder* functions to undo the bit-reversal
*
* Copyright (C) 2009 Philip VanBaren
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "Experimental.h"
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
#ifndef USE_SSE2
#define USE_SSE2
#endif
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include "RealFFTf.h"
#ifdef __WXMSW__
#pragma warning(disable:4305)
#else
#endif
#include "SseMathFuncs.h"
#include <xmmintrin.h>
#ifndef M_PI
#define M_PI 3.14159265358979323846 /* pi */
#endif
unsigned char smallReverseBitsTable[256];
int tableMask=0;
bool useBitReverseTable=false;
bool useSinCosTable=false;
void TableUsage(int iMask)
{
tableMask=iMask;
useBitReverseTable=((iMask & 1)!=0);
useSinCosTable=((iMask&2)!=0);
}
// note !!! number of bits must be between 9-16
int SmallReverseBits(int bits, int numberBits)
{
return (smallReverseBitsTable[*((unsigned char *)&bits)]<<(numberBits-8))+(smallReverseBitsTable[*(((unsigned char *)&bits)+1)]>>(16-numberBits));
}
/*
* Initialize the Sine table and Twiddle pointers (bit-reversed pointers)
* for the FFT routine.
*/
HFFT InitializeFFT1x(int WXUNUSED( fftlen ) )
{
int i;
//int temp;
//int mask;
//HFFT h;
// this needs to move out but ehh... Andrew Hallendorff
for(i=0;i<256;i++) {
smallReverseBitsTable[i]=0;
for(int maskLow=1, maskHigh=128;maskLow<256;maskLow<<=1,maskHigh>>=1)
if(i&maskLow)
smallReverseBitsTable[i]|=maskHigh;
}
return NULL;
}
/*
* Free up the memory allotted for Sin table and Twiddle Pointers
*/
void EndFFT1x(HFFT h)
{
if(h->Points>0) {
free(h->BitReversed);
free(h->SinTable);
}
h->Points=0;
free(h);
}
#define MAX_HFFT 10
static HFFT hFFTArray[MAX_HFFT] = { NULL };
static int nFFTLockCount[MAX_HFFT] = { 0 };
/* Get a handle to the FFT tables of the desired length */
/* This version keeps common tables rather than allocating a new table every time */
HFFT GetFFT1x(int fftlen)
{
int h,n = fftlen/2;
for(h=0; (h<MAX_HFFT) && (hFFTArray[h] != NULL) && (n != hFFTArray[h]->Points); h++);
if(h<MAX_HFFT) {
if(hFFTArray[h] == NULL) {
hFFTArray[h] = InitializeFFT(fftlen);
nFFTLockCount[h] = 0;
}
nFFTLockCount[h]++;
return hFFTArray[h];
} else {
// All buffers used, so fall back to allocating a new set of tables
return InitializeFFT(fftlen);;
}
}
/* Release a previously requested handle to the FFT tables */
void ReleaseFFT1x(HFFT hFFT)
{
int h;
for(h=0; (h<MAX_HFFT) && (hFFTArray[h] != hFFT); h++);
if(h<MAX_HFFT) {
nFFTLockCount[h]--;
} else {
EndFFT(hFFT);
}
}
/* Deallocate any unused FFT tables */
void CleanupFFT1x()
{
int h;
for(h=0; (h<MAX_HFFT); h++) {
if((nFFTLockCount[h] <= 0) && (hFFTArray[h] != NULL)) {
EndFFT(hFFTArray[h]);
hFFTArray[h] = NULL;
}
}
}
/*
* Forward FFT routine. Must call InitializeFFT(fftlen) first!
*
* Note: Output is BIT-REVERSED! so you must use the BitReversed to
* get legible output, (i.e. Real_i = buffer[ h->BitReversed[i] ]
* Imag_i = buffer[ h->BitReversed[i]+1 ] )
* Input is in normal order.
*
* Output buffer[0] is the DC bin, and output buffer[1] is the Fs/2 bin
* - this can be done because both values will always be real only
* - this allows us to not have to allocate an extra complex value for the Fs/2 bin
*
* Note: The scaling on this is done according to the standard FFT definition,
* so a unit amplitude DC signal will output an amplitude of (N)
* (Older revisions would progressively scale the input, so the output
* values would be similar in amplitude to the input values, which is
* good when using fixed point arithmetic)
*/
void RealFFTf1x(fft_type *buffer,HFFT h)
{
fft_type *A,*B;
fft_type *sptr;
fft_type *endptr1,*endptr2;
int *br1,*br2;
fft_type HRplus,HRminus,HIplus,HIminus;
fft_type v1,v2,sin,cos;
int ButterfliesPerGroup=h->Points/2;
/*
* Butterfly:
* Ain-----Aout
* \ /
* / \
* Bin-----Bout
*/
endptr1=buffer+h->Points*2;
while(ButterfliesPerGroup>0)
{
A=buffer;
B=buffer+ButterfliesPerGroup*2;
sptr=h->SinTable;
while(A<endptr1)
{
sin=*sptr;
cos=*(sptr+1);
endptr2=B;
while(A<endptr2)
{
v1=*B*cos + *(B+1)*sin;
v2=*B*sin - *(B+1)*cos;
*B=(*A+v1);
*(A++)=*(B++)-2*v1;
*B=(*A-v2);
*(A++)=*(B++)+2*v2;
}
A=B;
B+=ButterfliesPerGroup*2;
sptr+=2;
}
ButterfliesPerGroup >>= 1;
}
/* Massage output to get the output for a real input sequence. */
br1=h->BitReversed+1;
br2=h->BitReversed+h->Points-1;
while(br1<br2)
{
sin=h->SinTable[*br1];
cos=h->SinTable[*br1+1];
A=buffer+*br1;
B=buffer+*br2;
HRplus = (HRminus = *A - *B ) + (*B * 2);
HIplus = (HIminus = *(A+1) - *(B+1)) + (*(B+1) * 2);
v1 = (sin*HRminus - cos*HIplus);
v2 = (cos*HRminus + sin*HIplus);
*A = (HRplus + v1) * (fft_type)0.5;
*B = *A - v1;
*(A+1) = (HIminus + v2) * (fft_type)0.5;
*(B+1) = *(A+1) - HIminus;
br1++;
br2--;
}
/* Handle the center bin (just need a conjugate) */
A=buffer+*br1+1;
*A=-*A;
/* Handle DC bin separately - and ignore the Fs/2 bin
buffer[0]+=buffer[1];
buffer[1]=(fft_type)0;*/
/* Handle DC and Fs/2 bins separately */
/* Put the Fs/2 value into the imaginary part of the DC bin */
v1=buffer[0]-buffer[1];
buffer[0]+=buffer[1];
buffer[1]=v1;
}
/* Description: This routine performs an inverse FFT to real data.
* This code is for floating point data.
*
* Note: Output is BIT-REVERSED! so you must use the BitReversed to
* get legible output, (i.e. wave[2*i] = buffer[ BitReversed[i] ]
* wave[2*i+1] = buffer[ BitReversed[i]+1 ] )
* Input is in normal order, interleaved (real,imaginary) complex data
* You must call InitializeFFT(fftlen) first to initialize some buffers!
*
* Input buffer[0] is the DC bin, and input buffer[1] is the Fs/2 bin
* - this can be done because both values will always be real only
* - this allows us to not have to allocate an extra complex value for the Fs/2 bin
*
* Note: The scaling on this is done according to the standard FFT definition,
* so a unit amplitude DC signal will output an amplitude of (N)
* (Older revisions would progressively scale the input, so the output
* values would be similar in amplitude to the input values, which is
* good when using fixed point arithmetic)
*/
void InverseRealFFTf1x(fft_type *buffer,HFFT h)
{
fft_type *A,*B;
fft_type *sptr;
fft_type *endptr1,*endptr2;
int *br1;
fft_type HRplus,HRminus,HIplus,HIminus;
fft_type v1,v2,sin,cos;
int ButterfliesPerGroup=h->Points/2;
/* Massage input to get the input for a real output sequence. */
A=buffer+2;
B=buffer+h->Points*2-2;
br1=h->BitReversed+1;
while(A<B)
{
sin=h->SinTable[*br1];
cos=h->SinTable[*br1+1];
HRplus = (HRminus = *A - *B ) + (*B * 2);
HIplus = (HIminus = *(A+1) - *(B+1)) + (*(B+1) * 2);
v1 = (sin*HRminus + cos*HIplus);
v2 = (cos*HRminus - sin*HIplus);
*A = (HRplus + v1) * (fft_type)0.5;
*B = *A - v1;
*(A+1) = (HIminus - v2) * (fft_type)0.5;
*(B+1) = *(A+1) - HIminus;
A+=2;
B-=2;
br1++;
}
/* Handle center bin (just need conjugate) */
*(A+1)=-*(A+1);
/* Handle DC bin separately - this ignores any Fs/2 component
buffer[1]=buffer[0]=buffer[0]/2;*/
/* Handle DC and Fs/2 bins specially */
/* The DC bin is passed in as the real part of the DC complex value */
/* The Fs/2 bin is passed in as the imaginary part of the DC complex value */
/* (v1+v2) = buffer[0] == the DC component */
/* (v1-v2) = buffer[1] == the Fs/2 component */
v1=0.5f*(buffer[0]+buffer[1]);
v2=0.5f*(buffer[0]-buffer[1]);
buffer[0]=v1;
buffer[1]=v2;
/*
* Butterfly:
* Ain-----Aout
* \ /
* / \
* Bin-----Bout
*/
endptr1=buffer+h->Points*2;
while(ButterfliesPerGroup>0)
{
A=buffer;
B=buffer+ButterfliesPerGroup*2;
sptr=h->SinTable;
while(A<endptr1)
{
sin=*(sptr++);
cos=*(sptr++);
endptr2=B;
while(A<endptr2)
{
v1=*B*cos - *(B+1)*sin;
v2=*B*sin + *(B+1)*cos;
*B=(*A+v1)*(fft_type)0.5;
*(A++)=*(B++)-v1;
*B=(*A+v2)*(fft_type)0.5;
*(A++)=*(B++)-v2;
}
A=B;
B+=ButterfliesPerGroup*2;
}
ButterfliesPerGroup >>= 1;
}
}
void ReorderToFreq1x(HFFT hFFT, fft_type *buffer, fft_type *RealOut, fft_type *ImagOut)
{
// Copy the data into the real and imaginary outputs
for(int i=1;i<hFFT->Points;i++) {
RealOut[i]=buffer[hFFT->BitReversed[i] ];
ImagOut[i]=buffer[hFFT->BitReversed[i]+1];
}
RealOut[0] = buffer[0]; // DC component
ImagOut[0] = 0;
RealOut[hFFT->Points] = buffer[1]; // Fs/2 component
ImagOut[hFFT->Points] = 0;
}
void ReorderToTime1x(HFFT hFFT, fft_type *buffer, fft_type *TimeOut)
{
// Copy the data into the real outputs
for(int i=0;i<hFFT->Points;i++) {
TimeOut[i*2 ]=buffer[hFFT->BitReversed[i] ];
TimeOut[i*2+1]=buffer[hFFT->BitReversed[i]+1];
}
}
// 4x processing simd
void RealFFTf4x(fft_type *buffer,HFFT h)
{
__m128 *localBuffer=(__m128 *)buffer;
__m128 *A,*B;
fft_type *sptr;
__m128 *endptr1,*endptr2;
int br1Index, br2Index;
int br1Value, br2Value;
__m128 HRplus,HRminus,HIplus,HIminus;
__m128 v1,v2,sin,cos;
fft_type iToRad=2*M_PI/(2*h->Points);
int ButterfliesPerGroup=h->Points/2;
/*
* Butterfly:
* Ain-----Aout
* \ /
* / \
* Bin-----Bout
*/
endptr1=&localBuffer[h->Points*2];
while(ButterfliesPerGroup>0)
{
A=localBuffer;
B=&localBuffer[ButterfliesPerGroup*2];
sptr=h->SinTable;
int iSinCosIndex=0;
int iSinCosCalIndex=0;
while(A<endptr1)
{
v4sfu sin4_2, cos4_2;
if(useSinCosTable) {
sin=_mm_set1_ps(*(sptr++));
cos=_mm_set1_ps(*(sptr++));
} else {
if(!iSinCosCalIndex)
{
v4sfu vx;
for(int i=0;i<4;i++)
vx.m128_f32[i]=((fft_type )SmallReverseBits(iSinCosIndex+i,h->pow2Bits-1))*iToRad;
sincos_ps(&vx, &sin4_2, &cos4_2);
sin=_mm_set1_ps(-sin4_2.m128_f32[0]);
cos=_mm_set1_ps(-cos4_2.m128_f32[0]);
iSinCosCalIndex++;
} else {
sin=_mm_set1_ps(-sin4_2.m128_f32[iSinCosCalIndex]);
cos=_mm_set1_ps(-cos4_2.m128_f32[iSinCosCalIndex]);
if(iSinCosCalIndex==3)
iSinCosCalIndex=0;
else
iSinCosCalIndex++;
}
iSinCosIndex++;
}
endptr2=B;
while(A<endptr2)
{
v1 = _mm_add_ps( _mm_mul_ps(*B, cos), _mm_mul_ps(*(B+1), sin));
v2 = _mm_sub_ps( _mm_mul_ps(*B, sin), _mm_mul_ps(*(B+1), cos));
*B=_mm_add_ps( *A, v1);
__m128 temp128 = _mm_set1_ps( 2.0);
*(A++)=_mm_sub_ps(*(B++), _mm_mul_ps(temp128, v1));
*B=_mm_sub_ps(*A,v2);
*(A++)=_mm_add_ps(*(B++), _mm_mul_ps(temp128, v2));
}
A=B;
B=&B[ButterfliesPerGroup*2];
}
ButterfliesPerGroup >>= 1;
}
/* Massage output to get the output for a real input sequence. */
br1Index=1; // h->BitReversed+1;
br2Index=h->Points-1; //h->BitReversed+h->Points-1;
int iSinCosCalIndex=0;
while(br1Index<br2Index)
{
v4sfu sin4_2, cos4_2;
if(useBitReverseTable) {
br1Value=h->BitReversed[br1Index];
br2Value=h->BitReversed[br2Index];
} else {
br1Value=SmallReverseBits(br1Index,h->pow2Bits);
br2Value=SmallReverseBits(br2Index,h->pow2Bits);
}
if(useSinCosTable) {
sin=_mm_set1_ps(h->SinTable[br1Value]);
cos=_mm_set1_ps(h->SinTable[br1Value+1]);
} else {
if(!iSinCosCalIndex)
{
v4sfu vx;
for(int i=0;i<4;i++)
vx.m128_f32[i]=((float)(br1Index+i))*iToRad;
sincos_ps(&vx, &sin4_2, &cos4_2);
sin=_mm_set1_ps(-sin4_2.m128_f32[0]);
cos=_mm_set1_ps(-cos4_2.m128_f32[0]);
iSinCosCalIndex++;
} else {
sin=_mm_set1_ps(-sin4_2.m128_f32[iSinCosCalIndex]);
cos=_mm_set1_ps(-cos4_2.m128_f32[iSinCosCalIndex]);
if(iSinCosCalIndex==3)
iSinCosCalIndex=0;
else
iSinCosCalIndex++;
}
}
A=&localBuffer[br1Value];
B=&localBuffer[br2Value];
__m128 temp128 = _mm_set1_ps( 2.0);
HRplus = _mm_add_ps(HRminus = _mm_sub_ps( *A, *B ), _mm_mul_ps(*B, temp128));
HIplus = _mm_add_ps(HIminus = _mm_sub_ps(*(A+1), *(B+1) ), _mm_mul_ps(*(B+1), temp128));
v1 = _mm_sub_ps(_mm_mul_ps(sin, HRminus), _mm_mul_ps(cos, HIplus));
v2 = _mm_add_ps(_mm_mul_ps(cos, HRminus), _mm_mul_ps(sin, HIplus));
temp128 = _mm_set1_ps( 0.5);
*A = _mm_mul_ps(_mm_add_ps(HRplus, v1), temp128);
*B = _mm_sub_ps(*A, v1);
*(A+1) = _mm_mul_ps(_mm_add_ps(HIminus, v2), temp128);
*(B+1) = _mm_sub_ps(*(A+1), HIminus);
br1Index++;
br2Index--;
}
/* Handle the center bin (just need a conjugate) */
if(useBitReverseTable)
A=&localBuffer[h->BitReversed[br1Index]+1];
else
A=&localBuffer[SmallReverseBits(br1Index,h->pow2Bits)+1];
// negate sse style
*A=_mm_xor_ps(*A, _mm_set1_ps(-0.f));
/* Handle DC and Fs/2 bins separately */
/* Put the Fs/2 value into the imaginary part of the DC bin */
v1=_mm_sub_ps(localBuffer[0], localBuffer[1]);
localBuffer[0]=_mm_add_ps(localBuffer[0], localBuffer[1]);
localBuffer[1]=v1;
}
/* Description: This routine performs an inverse FFT to real data.
* This code is for floating point data.
*
* Note: Output is BIT-REVERSED! so you must use the BitReversed to
* get legible output, (i.e. wave[2*i] = buffer[ BitReversed[i] ]
* wave[2*i+1] = buffer[ BitReversed[i]+1 ] )
* Input is in normal order, interleaved (real,imaginary) complex data
* You must call InitializeFFT(fftlen) first to initialize some buffers!
*
* Input buffer[0] is the DC bin, and input buffer[1] is the Fs/2 bin
* - this can be done because both values will always be real only
* - this allows us to not have to allocate an extra complex value for the Fs/2 bin
*
* Note: The scaling on this is done according to the standard FFT definition,
* so a unit amplitude DC signal will output an amplitude of (N)
* (Older revisions would progressively scale the input, so the output
* values would be similar in amplitude to the input values, which is
* good when using fixed point arithmetic)
*/
void InverseRealFFTf4x(fft_type *buffer,HFFT h)
{
__m128 *localBuffer=(__m128 *)buffer;
__m128 *A,*B;
fft_type *sptr;
__m128 *endptr1,*endptr2;
int br1Index, br1Value;
__m128 HRplus,HRminus,HIplus,HIminus;
__m128 v1,v2,sin,cos;
fft_type iToRad=2*M_PI/(2*h->Points);
int ButterfliesPerGroup=h->Points/2;
/* Massage input to get the input for a real output sequence. */
A=localBuffer+2;
B=localBuffer+h->Points*2-2;
br1Index=1; //h->BitReversed+1;
int iSinCosCalIndex=0;
while(A<B)
{
v4sfu sin4_2, cos4_2;
if(useBitReverseTable) {
br1Value=h->BitReversed[br1Index];
} else {
br1Value=SmallReverseBits(br1Index,h->pow2Bits);
}
if(useSinCosTable) {
sin=_mm_set1_ps(h->SinTable[br1Value]);
cos=_mm_set1_ps(h->SinTable[br1Value+1]);
} else {
if(!iSinCosCalIndex)
{
v4sfu vx;
for(int i=0;i<4;i++)
vx.m128_f32[i]=((float)(br1Index+i))*iToRad;
sincos_ps(&vx, &sin4_2, &cos4_2);
sin=_mm_set1_ps(-sin4_2.m128_f32[0]);
cos=_mm_set1_ps(-cos4_2.m128_f32[0]);
iSinCosCalIndex++;
} else {
sin=_mm_set1_ps(-sin4_2.m128_f32[iSinCosCalIndex]);
cos=_mm_set1_ps(-cos4_2.m128_f32[iSinCosCalIndex]);
if(iSinCosCalIndex==3)
iSinCosCalIndex=0;
else
iSinCosCalIndex++;
}
}
HRminus = _mm_sub_ps(*A, *B);
HRplus = _mm_add_ps(HRminus, _mm_mul_ps(*B, _mm_set1_ps(2.0)));
HIminus = _mm_sub_ps( *(A+1), *(B+1));
HIplus = _mm_add_ps(HIminus, _mm_mul_ps(*(B+1), _mm_set1_ps(2.0)));
v1 = _mm_add_ps(_mm_mul_ps(sin, HRminus), _mm_mul_ps(cos, HIplus));
v2 = _mm_sub_ps(_mm_mul_ps(cos, HRminus), _mm_mul_ps(sin, HIplus));
*A = _mm_mul_ps(_mm_add_ps(HRplus, v1), _mm_set1_ps(0.5));
*B = _mm_sub_ps(*A, v1);
*(A+1) = _mm_mul_ps(_mm_sub_ps(HIminus, v2) , _mm_set1_ps(0.5));
*(B+1) = _mm_sub_ps(*(A+1), HIminus);
A=&A[2];
B=&B[-2];
br1Index++;
}
/* Handle center bin (just need conjugate) */
// negate sse style
*(A+1)=_mm_xor_ps(*(A+1), _mm_set1_ps(-0.f));
/* Handle DC bin separately - this ignores any Fs/2 component
buffer[1]=buffer[0]=buffer[0]/2;*/
/* Handle DC and Fs/2 bins specially */
/* The DC bin is passed in as the real part of the DC complex value */
/* The Fs/2 bin is passed in as the imaginary part of the DC complex value */
/* (v1+v2) = buffer[0] == the DC component */
/* (v1-v2) = buffer[1] == the Fs/2 component */
v1=_mm_mul_ps(_mm_set1_ps(0.5), _mm_add_ps(localBuffer[0], localBuffer[1]));
v2=_mm_mul_ps(_mm_set1_ps(0.5), _mm_sub_ps(localBuffer[0], localBuffer[1]));
localBuffer[0]=v1;
localBuffer[1]=v2;
/*
* Butterfly:
* Ain-----Aout
* \ /
* / \
* Bin-----Bout
*/
endptr1=localBuffer+h->Points*2;
while(ButterfliesPerGroup>0)
{
A=localBuffer;
B=localBuffer+ButterfliesPerGroup*2;
sptr=h->SinTable;
int iSinCosIndex=0;
int iSinCosCalIndex=0;
while(A<endptr1)
{
v4sfu sin4_2, cos4_2;
if(useSinCosTable) {
sin=_mm_set1_ps(*(sptr++));
cos=_mm_set1_ps(*(sptr++));
} else {
if(!iSinCosCalIndex)
{
v4sfu vx;
for(int i=0;i<4;i++)
vx.m128_f32[i]=((fft_type )SmallReverseBits(iSinCosIndex+i,h->pow2Bits-1))*iToRad;
sincos_ps(&vx, &sin4_2, &cos4_2);
sin=_mm_set1_ps(-sin4_2.m128_f32[0]);
cos=_mm_set1_ps(-cos4_2.m128_f32[0]);
iSinCosCalIndex++;
} else {
sin=_mm_set1_ps(-sin4_2.m128_f32[iSinCosCalIndex]);
cos=_mm_set1_ps(-cos4_2.m128_f32[iSinCosCalIndex]);
if(iSinCosCalIndex==3)
iSinCosCalIndex=0;
else
iSinCosCalIndex++;
}
iSinCosIndex++;
}
endptr2=B;
while(A<endptr2)
{
v1=_mm_sub_ps( _mm_mul_ps(*B, cos), _mm_mul_ps(*(B+1), sin));
v2=_mm_add_ps( _mm_mul_ps(*B, sin), _mm_mul_ps(*(B+1), cos));
*B=_mm_mul_ps( _mm_add_ps(*A, v1), _mm_set1_ps(0.5));
*(A++)=_mm_sub_ps(*(B++), v1);
*B=_mm_mul_ps(_mm_add_ps(*A, v2), _mm_set1_ps(0.5));
*(A++)=_mm_sub_ps(*(B++),v2);
}
A=B;
B=&B[ButterfliesPerGroup*2];
}
ButterfliesPerGroup >>= 1;
}
}
void ReorderToFreq4x(HFFT hFFT, fft_type *buffer, fft_type *RealOut, fft_type *ImagOut)
{
__m128 *localBuffer=(__m128 *)buffer;
__m128 *localRealOut=(__m128 *)RealOut;
__m128 *localImagOut=(__m128 *)ImagOut;
// Copy the data into the real and imaginary outputs
for(int i=1;i<hFFT->Points;i++) {
int brValue;
if(useBitReverseTable)
brValue=hFFT->BitReversed[i];
else
brValue=SmallReverseBits(i,hFFT->pow2Bits);
localRealOut[i]=localBuffer[brValue ];
localImagOut[i]=localBuffer[brValue+1];
}
localRealOut[0] = localBuffer[0]; // DC component
localImagOut[0] = _mm_set1_ps(0.0);
localRealOut[hFFT->Points] = localBuffer[1]; // Fs/2 component
localImagOut[hFFT->Points] = _mm_set1_ps(0.0);
}
void ReorderToTime4x(HFFT hFFT, fft_type *buffer, fft_type *TimeOut)
{
__m128 *localBuffer=(__m128 *)buffer;
__m128 *localTimeOut=(__m128 *)TimeOut;
// Copy the data into the real outputs
for(int i=0;i<hFFT->Points;i++) {
int brValue;
if(useBitReverseTable)
brValue=hFFT->BitReversed[i];
else
brValue=SmallReverseBits(i,hFFT->pow2Bits);
localTimeOut[i*2 ]=localBuffer[brValue ];
localTimeOut[i*2+1]=localBuffer[brValue+1];
}
}
#endif

23
src/RealFFTf48x.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef __realfftf48x_h
#define __realfftf48x_h
#define fft_type float
HFFT InitializeFFT1x(int);
void EndFFT1x(HFFT);
HFFT GetFFT1x(int);
void ReleaseFFT1x(HFFT);
void CleanupFFT1x();
void RealFFTf1x(fft_type *,HFFT);
void InverseRealFFTf1x(fft_type *,HFFT);
void ReorderToTime1x(HFFT hFFT, fft_type *buffer, fft_type *TimeOut);
void ReorderToFreq1x(HFFT hFFT, fft_type *buffer, fft_type *RealOut, fft_type *ImagOut);
int SmallReverseBits(int bits, int numberBits);
void RealFFTf4x(fft_type *,HFFT);
void InverseRealFFTf4x(fft_type *,HFFT);
void ReorderToTime4x(HFFT hFFT, fft_type *buffer, fft_type *TimeOut);
void ReorderToFreq4x(HFFT hFFT, fft_type *buffer, fft_type *RealOut, fft_type *ImagOut);
void TableUsage(int iMask);
#endif

698
src/SseMathFuncs.cpp Normal file
View File

@@ -0,0 +1,698 @@
/**********************************************************************
Audacity: A Digital Audio Editor
SseMathFuncs.cpp
Stephen Moshier (wrote original C version, The Cephes Library)
Julien Pommier (converted to use SSE)
Andrew Hallendorff (adapted for Audacity)
*******************************************************************//**
\file SseMathfuncs.cpp
\brief SSE maths functions (for FFTs)
*//****************************************************************/
#include "SseMathFuncs.h"
/* JKC: The trig functions use Taylor's series, on the range 0 to Pi/4
* computing both Sin and Cos, and using one or the other (in the
* solo functions), or both in the more useful for us for FFTs sincos
* function.
* The constants minus_cephes_DP1 to minus_cephes_DP3 are used in the
* angle reduction modulo function.
* 4 sincos are done at a time.
* If we wanted to do just sin or just cos, we could speed things up
* by queuing up the Sines and Cosines into batches of 4 separately.*/
#ifndef USE_SSE2 //sry this is all sse2 now
#define USE_SSE2
#endif
/* declare some SSE constants -- why can't I figure a better way to do that? */
#define _PS_CONST(Name, Val) \
static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { (float)Val, (float)Val, (float)Val, (float)Val }
#define _PI32_CONST(Name, Val) \
static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
#define _PS_CONST_TYPE(Name, Type, Val) \
static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
_PS_CONST(1 , 1.0f);
_PS_CONST(0p5, 0.5f);
/* the smallest non denormalized float number */
_PS_CONST_TYPE(min_norm_pos, int, 0x00800000);
_PS_CONST_TYPE(mant_mask, int, 0x7f800000);
_PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000);
_PS_CONST_TYPE(sign_mask, int, (int)0x80000000);
_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
_PI32_CONST(1, 1);
_PI32_CONST(inv1, ~1);
_PI32_CONST(2, 2);
_PI32_CONST(4, 4);
_PI32_CONST(0x7f, 0x7f);
_PS_CONST(cephes_SQRTHF, 0.707106781186547524);
_PS_CONST(cephes_log_p0, 7.0376836292E-2);
_PS_CONST(cephes_log_p1, - 1.1514610310E-1);
_PS_CONST(cephes_log_p2, 1.1676998740E-1);
_PS_CONST(cephes_log_p3, - 1.2420140846E-1);
_PS_CONST(cephes_log_p4, + 1.4249322787E-1);
_PS_CONST(cephes_log_p5, - 1.6668057665E-1);
_PS_CONST(cephes_log_p6, + 2.0000714765E-1);
_PS_CONST(cephes_log_p7, - 2.4999993993E-1);
_PS_CONST(cephes_log_p8, + 3.3333331174E-1);
_PS_CONST(cephes_log_q1, -2.12194440e-4);
_PS_CONST(cephes_log_q2, 0.693359375);
#ifndef USE_SSE2
typedef union xmm_mm_union {
__m128 xmm;
__m64 mm[2];
} xmm_mm_union;
#define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) { \
xmm_mm_union u; u.xmm = xmm_; \
mm0_ = u.mm[0]; \
mm1_ = u.mm[1]; \
}
#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) { \
xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm; \
}
#endif // USE_SSE2
/* natural logarithm computed for 4 simultaneous float
return NaN for x <= 0
*/
__m128 log_ps(v4sfu *xPtr) {
__m128 x=*((__m128 *)xPtr);
#ifdef USE_SSE2
__m128i emm0;
#else
__m64 mm0, mm1;
#endif
__m128 one = *(__m128*)_ps_1;
__m128 invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
x = _mm_max_ps(x, *(__m128*)_ps_min_norm_pos); /* cut off denormalized stuff */
#ifndef USE_SSE2
/* part 1: x = frexpf(x, &e); */
COPY_XMM_TO_MM(x, mm0, mm1);
mm0 = _mm_srli_pi32(mm0, 23);
mm1 = _mm_srli_pi32(mm1, 23);
#else
emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
#endif
/* keep only the fractional part */
x = _mm_and_ps(x, *(__m128*)_ps_inv_mant_mask);
x = _mm_or_ps(x, *(__m128*)_ps_0p5);
#ifndef USE_SSE2
/* now e=mm0:mm1 contain the really base-2 exponent */
mm0 = _mm_sub_pi32(mm0, *(__m64*)_pi32_0x7f);
mm1 = _mm_sub_pi32(mm1, *(__m64*)_pi32_0x7f);
__m128 e = _mm_cvtpi32x2_ps(mm0, mm1);
_mm_empty(); /* bye bye mmx */
#else
emm0 = _mm_sub_epi32(emm0, *(__m128i*)_pi32_0x7f);
__m128 e = _mm_cvtepi32_ps(emm0);
#endif
e = _mm_add_ps(e, one);
/* part2:
if( x < SQRTHF ) {
e -= 1;
x = x + x - 1.0;
} else { x = x - 1.0; }
*/
__m128 mask = _mm_cmplt_ps(x, *(__m128*)_ps_cephes_SQRTHF);
__m128 tmp = _mm_and_ps(x, mask);
x = _mm_sub_ps(x, one);
e = _mm_sub_ps(e, _mm_and_ps(one, mask));
x = _mm_add_ps(x, tmp);
__m128 z = _mm_mul_ps(x,x);
__m128 y = *(__m128*)_ps_cephes_log_p0;
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_log_p1);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_log_p2);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_log_p3);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_log_p4);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_log_p5);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_log_p6);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_log_p7);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_log_p8);
y = _mm_mul_ps(y, x);
y = _mm_mul_ps(y, z);
tmp = _mm_mul_ps(e, *(__m128*)_ps_cephes_log_q1);
y = _mm_add_ps(y, tmp);
tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
tmp = _mm_mul_ps(e, *(__m128*)_ps_cephes_log_q2);
x = _mm_add_ps(x, y);
x = _mm_add_ps(x, tmp);
x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
return x;
}
_PS_CONST(exp_hi, 88.3762626647949f);
_PS_CONST(exp_lo, -88.3762626647949f);
_PS_CONST(cephes_LOG2EF, 1.44269504088896341);
_PS_CONST(cephes_exp_C1, 0.693359375);
_PS_CONST(cephes_exp_C2, -2.12194440e-4);
_PS_CONST(cephes_exp_p0, 1.9875691500E-4);
_PS_CONST(cephes_exp_p1, 1.3981999507E-3);
_PS_CONST(cephes_exp_p2, 8.3334519073E-3);
_PS_CONST(cephes_exp_p3, 4.1665795894E-2);
_PS_CONST(cephes_exp_p4, 1.6666665459E-1);
_PS_CONST(cephes_exp_p5, 5.0000001201E-1);
__m128 exp_ps(v4sfu *xPtr) {
__m128 x=*((__m128 *)xPtr);
__m128 tmp = _mm_setzero_ps(), fx;
#ifdef USE_SSE2
__m128i emm0;
#else
__m64 mm0, mm1;
#endif
__m128 one = *(__m128*)_ps_1;
x = _mm_min_ps(x, *(__m128*)_ps_exp_hi);
x = _mm_max_ps(x, *(__m128*)_ps_exp_lo);
/* express exp(x) as exp(g + n*log(2)) */
fx = _mm_mul_ps(x, *(__m128*)_ps_cephes_LOG2EF);
fx = _mm_add_ps(fx, *(__m128*)_ps_0p5);
/* how to perform a floorf with SSE: just below */
#ifndef USE_SSE2
/* step 1 : cast to int */
tmp = _mm_movehl_ps(tmp, fx);
mm0 = _mm_cvttps_pi32(fx);
mm1 = _mm_cvttps_pi32(tmp);
/* step 2 : cast back to float */
tmp = _mm_cvtpi32x2_ps(mm0, mm1);
#else
emm0 = _mm_cvttps_epi32(fx);
tmp = _mm_cvtepi32_ps(emm0);
#endif
/* if greater, substract 1 */
__m128 mask = _mm_cmpgt_ps(tmp, fx);
mask = _mm_and_ps(mask, one);
fx = _mm_sub_ps(tmp, mask);
tmp = _mm_mul_ps(fx, *(__m128*)_ps_cephes_exp_C1);
__m128 z = _mm_mul_ps(fx, *(__m128*)_ps_cephes_exp_C2);
x = _mm_sub_ps(x, tmp);
x = _mm_sub_ps(x, z);
z = _mm_mul_ps(x,x);
__m128 y = *(__m128*)_ps_cephes_exp_p0;
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p1);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p2);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p3);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p4);
y = _mm_mul_ps(y, x);
y = _mm_add_ps(y, *(__m128*)_ps_cephes_exp_p5);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, x);
y = _mm_add_ps(y, one);
/* build 2^n */
#ifndef USE_SSE2
z = _mm_movehl_ps(z, fx);
mm0 = _mm_cvttps_pi32(fx);
mm1 = _mm_cvttps_pi32(z);
mm0 = _mm_add_pi32(mm0, *(__m64*)_pi32_0x7f);
mm1 = _mm_add_pi32(mm1, *(__m64*)_pi32_0x7f);
mm0 = _mm_slli_pi32(mm0, 23);
mm1 = _mm_slli_pi32(mm1, 23);
__m128 pow2n;
COPY_MM_TO_XMM(mm0, mm1, pow2n);
_mm_empty();
#else
emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, *(__m128i*)_pi32_0x7f);
emm0 = _mm_slli_epi32(emm0, 23);
__m128 pow2n = _mm_castsi128_ps(emm0);
#endif
y = _mm_mul_ps(y, pow2n);
return y;
}
_PS_CONST(minus_cephes_DP1, -0.78515625);
_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
_PS_CONST(sincof_p0, -1.9515295891E-4);
_PS_CONST(sincof_p1, 8.3321608736E-3);
_PS_CONST(sincof_p2, -1.6666654611E-1);
_PS_CONST(coscof_p0, 2.443315711809948E-005);
_PS_CONST(coscof_p1, -1.388731625493765E-003);
_PS_CONST(coscof_p2, 4.166664568298827E-002);
_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
/* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
it runs also on old athlons XPs and the pentium III of your grand
mother.
The code is the exact rewriting of the cephes sinf function.
Precision is excellent as long as x < 8192 (I did not bother to
take into account the special handling they have for greater values
-- it does not return garbage for arguments over 8192, though, but
the extra precision is missing).
Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
surprising but correct result.
Performance is also surprisingly good, 1.33 times faster than the
macos vsinf SSE2 function, and 1.5 times faster than the
__vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
too bad for an SSE1 function (with no special tuning) !
However the latter libraries probably have a much better handling of NaN,
Inf, denormalized and other special arguments..
On my core 1 duo, the execution of this function takes approximately 95 cycles.
From what I have observed on the experiments with Intel AMath lib, switching to an
SSE2 version would improve the perf by only 10%.
Since it is based on SSE intrinsics, it has to be compiled at -O2 to
deliver full speed.
*/
__m128 sin_ps(v4sfu *xPtr) { // any x
__m128 x=*((__m128 *)xPtr);
__m128 xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
#ifdef USE_SSE2
__m128i emm0, emm2;
#else
__m64 mm0, mm1, mm2, mm3;
#endif
sign_bit = x;
/* take the absolute value */
x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask);
/* extract the sign bit (upper one) */
sign_bit = _mm_and_ps(sign_bit, *(__m128*)_ps_sign_mask);
/* scale by 4/Pi */
y = _mm_mul_ps(x, *(__m128*)_ps_cephes_FOPI);
#ifdef USE_SSE2
/* store the integer part of y in mm0 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, *(__m128i*)_pi32_1);
emm2 = _mm_and_si128(emm2, *(__m128i*)_pi32_inv1);
y = _mm_cvtepi32_ps(emm2);
/* get the swap sign flag */
emm0 = _mm_and_si128(emm2, *(__m128i*)_pi32_4);
emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask
there is one polynom for 0 <= x <= Pi/4
and another one for Pi/4<x<=Pi/2
Both branches will be computed.
*/
emm2 = _mm_and_si128(emm2, *(__m128i*)_pi32_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
__m128 swap_sign_bit = _mm_castsi128_ps(emm0);
__m128 poly_mask = _mm_castsi128_ps(emm2);
sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
#else
/* store the integer part of y in mm0:mm1 */
xmm2 = _mm_movehl_ps(xmm2, y);
mm2 = _mm_cvttps_pi32(y);
mm3 = _mm_cvttps_pi32(xmm2);
/* j=(j+1) & (~1) (see the cephes sources) */
mm2 = _mm_add_pi32(mm2, *(__m64*)_pi32_1);
mm3 = _mm_add_pi32(mm3, *(__m64*)_pi32_1);
mm2 = _mm_and_si64(mm2, *(__m64*)_pi32_inv1);
mm3 = _mm_and_si64(mm3, *(__m64*)_pi32_inv1);
y = _mm_cvtpi32x2_ps(mm2, mm3);
/* get the swap sign flag */
mm0 = _mm_and_si64(mm2, *(__m64*)_pi32_4);
mm1 = _mm_and_si64(mm3, *(__m64*)_pi32_4);
mm0 = _mm_slli_pi32(mm0, 29);
mm1 = _mm_slli_pi32(mm1, 29);
/* get the polynom selection mask */
mm2 = _mm_and_si64(mm2, *(__m64*)_pi32_2);
mm3 = _mm_and_si64(mm3, *(__m64*)_pi32_2);
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
__m128 swap_sign_bit, poly_mask;
COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit);
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
_mm_empty(); /* good-bye mmx */
#endif
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = *(__m128*)_ps_minus_cephes_DP1;
xmm2 = *(__m128*)_ps_minus_cephes_DP2;
xmm3 = *(__m128*)_ps_minus_cephes_DP3;
xmm1 = _mm_mul_ps(y, xmm1);
xmm2 = _mm_mul_ps(y, xmm2);
xmm3 = _mm_mul_ps(y, xmm3);
x = _mm_add_ps(x, xmm1);
x = _mm_add_ps(x, xmm2);
x = _mm_add_ps(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = *(__m128*)_ps_coscof_p0;
__m128 z = _mm_mul_ps(x,x);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(__m128*)_ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(__m128*)_ps_coscof_p2);
y = _mm_mul_ps(y, z);
y = _mm_mul_ps(y, z);
__m128 tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, *(__m128*)_ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
__m128 y2 = *(__m128*)_ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p2);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_mul_ps(y2, x);
y2 = _mm_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
y2 = _mm_and_ps(xmm3, y2); //, xmm3);
y = _mm_andnot_ps(xmm3, y);
y = _mm_add_ps(y,y2);
/* update the sign */
y = _mm_xor_ps(y, sign_bit);
return y;
}
/* almost the same as sin_ps */
__m128 cos_ps(v4sfu *xPtr) { // any x
__m128 x=*((__m128 *)xPtr);
__m128 xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
#ifdef USE_SSE2
__m128i emm0, emm2;
#else
__m64 mm0, mm1, mm2, mm3;
#endif
/* take the absolute value */
x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask);
/* scale by 4/Pi */
y = _mm_mul_ps(x, *(__m128*)_ps_cephes_FOPI);
#ifdef USE_SSE2
/* store the integer part of y in mm0 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, *(__m128i*)_pi32_1);
emm2 = _mm_and_si128(emm2, *(__m128i*)_pi32_inv1);
y = _mm_cvtepi32_ps(emm2);
emm2 = _mm_sub_epi32(emm2, *(__m128i*)_pi32_2);
/* get the swap sign flag */
emm0 = _mm_andnot_si128(emm2, *(__m128i*)_pi32_4);
emm0 = _mm_slli_epi32(emm0, 29);
/* get the polynom selection mask */
emm2 = _mm_and_si128(emm2, *(__m128i*)_pi32_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
__m128 sign_bit = _mm_castsi128_ps(emm0);
__m128 poly_mask = _mm_castsi128_ps(emm2);
#else
/* store the integer part of y in mm0:mm1 */
xmm2 = _mm_movehl_ps(xmm2, y);
mm2 = _mm_cvttps_pi32(y);
mm3 = _mm_cvttps_pi32(xmm2);
/* j=(j+1) & (~1) (see the cephes sources) */
mm2 = _mm_add_pi32(mm2, *(__m64*)_pi32_1);
mm3 = _mm_add_pi32(mm3, *(__m64*)_pi32_1);
mm2 = _mm_and_si64(mm2, *(__m64*)_pi32_inv1);
mm3 = _mm_and_si64(mm3, *(__m64*)_pi32_inv1);
y = _mm_cvtpi32x2_ps(mm2, mm3);
mm2 = _mm_sub_pi32(mm2, *(__m64*)_pi32_2);
mm3 = _mm_sub_pi32(mm3, *(__m64*)_pi32_2);
/* get the swap sign flag in mm0:mm1 and the
polynom selection mask in mm2:mm3 */
mm0 = _mm_andnot_si64(mm2, *(__m64*)_pi32_4);
mm1 = _mm_andnot_si64(mm3, *(__m64*)_pi32_4);
mm0 = _mm_slli_pi32(mm0, 29);
mm1 = _mm_slli_pi32(mm1, 29);
mm2 = _mm_and_si64(mm2, *(__m64*)_pi32_2);
mm3 = _mm_and_si64(mm3, *(__m64*)_pi32_2);
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
__m128 sign_bit, poly_mask;
COPY_MM_TO_XMM(mm0, mm1, sign_bit);
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
_mm_empty(); /* good-bye mmx */
#endif
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = *(__m128*)_ps_minus_cephes_DP1;
xmm2 = *(__m128*)_ps_minus_cephes_DP2;
xmm3 = *(__m128*)_ps_minus_cephes_DP3;
xmm1 = _mm_mul_ps(y, xmm1);
xmm2 = _mm_mul_ps(y, xmm2);
xmm3 = _mm_mul_ps(y, xmm3);
x = _mm_add_ps(x, xmm1);
x = _mm_add_ps(x, xmm2);
x = _mm_add_ps(x, xmm3);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
y = *(__m128*)_ps_coscof_p0;
__m128 z = _mm_mul_ps(x,x);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(__m128*)_ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(__m128*)_ps_coscof_p2);
y = _mm_mul_ps(y, z);
y = _mm_mul_ps(y, z);
__m128 tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, *(__m128*)_ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
__m128 y2 = *(__m128*)_ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p2);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_mul_ps(y2, x);
y2 = _mm_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
y2 = _mm_and_ps(xmm3, y2); //, xmm3);
y = _mm_andnot_ps(xmm3, y);
y = _mm_add_ps(y,y2);
/* update the sign */
y = _mm_xor_ps(y, sign_bit);
return y;
}
/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
it is almost as fast, and gives you a free cosine with your sine */
void sincos_ps(v4sfu *xptr, v4sfu *sptr, v4sfu *cptr) {
__m128 x=*((__m128 *)xptr), *s=(__m128 *)sptr, *c=(__m128 *)cptr, xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
#ifdef USE_SSE2
__m128i emm0, emm2, emm4;
#else
__m64 mm0, mm1, mm2, mm3, mm4, mm5;
#endif
sign_bit_sin = x;
/* take the absolute value */
x = _mm_and_ps(x, *(__m128*)_ps_inv_sign_mask);
/* extract the sign bit (upper one) */
sign_bit_sin = _mm_and_ps(sign_bit_sin, *(__m128*)_ps_sign_mask);
/* scale by 4/Pi */
y = _mm_mul_ps(x, *(__m128*)_ps_cephes_FOPI);
#ifdef USE_SSE2
/* store the integer part of y in emm2 */
emm2 = _mm_cvttps_epi32(y);
/* j=(j+1) & (~1) (see the cephes sources) */
emm2 = _mm_add_epi32(emm2, *(__m128i*)_pi32_1);
emm2 = _mm_and_si128(emm2, *(__m128i*)_pi32_inv1);
y = _mm_cvtepi32_ps(emm2);
emm4 = emm2;
/* get the swap sign flag for the sine */
emm0 = _mm_and_si128(emm2, *(__m128i*)_pi32_4);
emm0 = _mm_slli_epi32(emm0, 29);
__m128 swap_sign_bit_sin = _mm_castsi128_ps(emm0);
/* get the polynom selection mask for the sine*/
emm2 = _mm_and_si128(emm2, *(__m128i*)_pi32_2);
emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
__m128 poly_mask = _mm_castsi128_ps(emm2);
#else
/* store the integer part of y in mm2:mm3 */
xmm3 = _mm_movehl_ps(xmm3, y);
mm2 = _mm_cvttps_pi32(y);
mm3 = _mm_cvttps_pi32(xmm3);
/* j=(j+1) & (~1) (see the cephes sources) */
mm2 = _mm_add_pi32(mm2, *(__m64*)_pi32_1);
mm3 = _mm_add_pi32(mm3, *(__m64*)_pi32_1);
mm2 = _mm_and_si64(mm2, *(__m64*)_pi32_inv1);
mm3 = _mm_and_si64(mm3, *(__m64*)_pi32_inv1);
y = _mm_cvtpi32x2_ps(mm2, mm3);
mm4 = mm2;
mm5 = mm3;
/* get the swap sign flag for the sine */
mm0 = _mm_and_si64(mm2, *(__m64*)_pi32_4);
mm1 = _mm_and_si64(mm3, *(__m64*)_pi32_4);
mm0 = _mm_slli_pi32(mm0, 29);
mm1 = _mm_slli_pi32(mm1, 29);
__m128 swap_sign_bit_sin;
COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
/* get the polynom selection mask for the sine */
mm2 = _mm_and_si64(mm2, *(__m64*)_pi32_2);
mm3 = _mm_and_si64(mm3, *(__m64*)_pi32_2);
mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
__m128 poly_mask;
COPY_MM_TO_XMM(mm2, mm3, poly_mask);
#endif
/* The magic pass: "Extended precision modular arithmetic"
x = ((x - y * DP1) - y * DP2) - y * DP3; */
xmm1 = *(__m128*)_ps_minus_cephes_DP1;
xmm2 = *(__m128*)_ps_minus_cephes_DP2;
xmm3 = *(__m128*)_ps_minus_cephes_DP3;
xmm1 = _mm_mul_ps(y, xmm1);
xmm2 = _mm_mul_ps(y, xmm2);
xmm3 = _mm_mul_ps(y, xmm3);
x = _mm_add_ps(x, xmm1);
x = _mm_add_ps(x, xmm2);
x = _mm_add_ps(x, xmm3);
#ifdef USE_SSE2
emm4 = _mm_sub_epi32(emm4, *(__m128i*)_pi32_2);
emm4 = _mm_andnot_si128(emm4, *(__m128i*)_pi32_4);
emm4 = _mm_slli_epi32(emm4, 29);
__m128 sign_bit_cos = _mm_castsi128_ps(emm4);
#else
/* get the sign flag for the cosine */
mm4 = _mm_sub_pi32(mm4, *(__m64*)_pi32_2);
mm5 = _mm_sub_pi32(mm5, *(__m64*)_pi32_2);
mm4 = _mm_andnot_si64(mm4, *(__m64*)_pi32_4);
mm5 = _mm_andnot_si64(mm5, *(__m64*)_pi32_4);
mm4 = _mm_slli_pi32(mm4, 29);
mm5 = _mm_slli_pi32(mm5, 29);
__m128 sign_bit_cos;
COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
_mm_empty(); /* good-bye mmx */
#endif
sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
/* Evaluate the first polynom (0 <= x <= Pi/4) */
__m128 z = _mm_mul_ps(x,x);
y = *(__m128*)_ps_coscof_p0;
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(__m128*)_ps_coscof_p1);
y = _mm_mul_ps(y, z);
y = _mm_add_ps(y, *(__m128*)_ps_coscof_p2);
y = _mm_mul_ps(y, z);
y = _mm_mul_ps(y, z);
__m128 tmp = _mm_mul_ps(z, *(__m128*)_ps_0p5);
y = _mm_sub_ps(y, tmp);
y = _mm_add_ps(y, *(__m128*)_ps_1);
/* Evaluate the second polynom (Pi/4 <= x <= 0) */
__m128 y2 = *(__m128*)_ps_sincof_p0;
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p1);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_add_ps(y2, *(__m128*)_ps_sincof_p2);
y2 = _mm_mul_ps(y2, z);
y2 = _mm_mul_ps(y2, x);
y2 = _mm_add_ps(y2, x);
/* select the correct result from the two polynoms */
xmm3 = poly_mask;
__m128 ysin2 = _mm_and_ps(xmm3, y2);
__m128 ysin1 = _mm_andnot_ps(xmm3, y);
y2 = _mm_sub_ps(y2,ysin2);
y = _mm_sub_ps(y, ysin1);
xmm1 = _mm_add_ps(ysin1,ysin2);
xmm2 = _mm_add_ps(y,y2);
/* update the sign */
*s = _mm_xor_ps(xmm1, sign_bit_sin);
*c = _mm_xor_ps(xmm2, sign_bit_cos);
}

80
src/SseMathFuncs.h Normal file
View File

@@ -0,0 +1,80 @@
/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
Inspired by Intel Approximate Math library, and based on the
corresponding algorithms of the cephes math library
The default is to use the SSE1 version. If you define USE_SSE2 the
the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
not expect any significant performance improvement with SSE2.
*/
/* Copyright (C) 2007 Julien Pommier
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
(this is the zlib license)
*/
#ifndef SSE_MATHFUN
#define SSE_MATHFUN
#include <inttypes.h>
#include <xmmintrin.h>
/* yes I know, the top of this file is quite ugly */
#ifdef _MSC_VER /* visual c++ */
# define ALIGN16_BEG __declspec(align(16))
# define ALIGN16_END
#else /* gcc or icc */
# define ALIGN16_BEG
# define ALIGN16_END __attribute__((aligned(16)))
#endif
/* __m128 is ugly to write */
//typedef __m128 _v4sfu; // vector of 4 float (sse1)
#ifndef USE_SSE2 //sry this is all sse2 now
#define USE_SSE2
#endif
#ifdef USE_SSE2
# include <emmintrin.h>
#else
typedef __m64 v2si; // vector of 2 int (mmx)
#endif
// !!! Andrew Hallendorff Warning changed call structure to make compatible with gcc
typedef ALIGN16_BEG union {
float m128_f32[4];
int8_t m128_i8[16];
int16_t m128_i16[8];
int32_t m128_i32[4];
int64_t m128_i64[2];
uint8_t m128_u8[16];
uint16_t m128_u16[8];
uint32_t m128_u32[4];
uint64_t m128_u64[2];
} ALIGN16_END v4sfu;
__m128 log_ps(v4sfu *xPtr);
__m128 sin_ps(v4sfu *xPtr);
void sincos_ps(v4sfu *xptr, v4sfu *sptr, v4sfu *cptr);
#endif

View File

@@ -1,62 +1,64 @@
/********************************************************************** /**********************************************************************
Audacity: A Digital Audio Editor Audacity: A Digital Audio Editor
EffectEqualization.cpp EffectEqualization.cpp
Mitch Golden Mitch Golden
Vaughan Johnson (Preview) Vaughan Johnson (Preview)
Martyn Shaw (FIR filters, response curve, graphic EQ) Martyn Shaw (FIR filters, response curve, graphic EQ)
*******************************************************************//** *******************************************************************//**
\file Equalization.cpp \file Equalization.cpp
\brief Implements EffectEqualiztaion, EqualizationDialog, \brief Implements EffectEqualiztaion, EqualizationDialog,
EqualizationPanel, EQCurve and EQPoint. EqualizationPanel, EQCurve and EQPoint.
*//****************************************************************//** *//****************************************************************//**
\class EffectEqualization \class EffectEqualization
\brief An Effect. \brief An Effect.
Performs filtering, using an FFT to do a FIR filter. Performs filtering, using an FFT to do a FIR filter.
It lets the user draw an arbitrary envelope (using the same It lets the user draw an arbitrary envelope (using the same
envelope editing code that is used to edit the track's envelope editing code that is used to edit the track's
amplitude envelope). amplitude envelope).
Also allows the curve to be specified with a series of 'graphic EQ' Also allows the curve to be specified with a series of 'graphic EQ'
sliders. sliders.
The filter is applied using overlap/add of Hanning windows. The filter is applied using overlap/add of Hanning windows.
Clone of the FFT Filter effect, no longer part of Audacity. Clone of the FFT Filter effect, no longer part of Audacity.
*//****************************************************************//** *//****************************************************************//**
\class EqualizationDialog \class EqualizationDialog
\brief Dialog used with EffectEqualization \brief Dialog used with EffectEqualization
*//****************************************************************//** *//****************************************************************//**
\class EqualizationPanel \class EqualizationPanel
\brief EqualizationPanel is used with EqualizationDialog and controls \brief EqualizationPanel is used with EqualizationDialog and controls
a graph for EffectEqualization. We should look at amalgamating the a graph for EffectEqualization. We should look at amalgamating the
various graphing code, such as provided by FreqWindow and FilterPanel. various graphing code, such as provided by FreqWindow and FilterPanel.
*//****************************************************************//** *//****************************************************************//**
\class EQCurve \class EQCurve
\brief EQCurve is used with EffectEqualization. \brief EQCurve is used with EffectEqualization.
*//****************************************************************//** *//****************************************************************//**
\class EQPoint \class EQPoint
\brief EQPoint is used with EQCurve and hence EffectEqualization. \brief EQPoint is used with EQCurve and hence EffectEqualization.
*//*******************************************************************/ *//*******************************************************************/
#include "../Audacity.h" #include "../Audacity.h"
#include "../Experimental.h"
#include "Equalization.h" #include "Equalization.h"
#include "../AColor.h" #include "../AColor.h"
#include "../ShuttleGui.h" #include "../ShuttleGui.h"
@@ -97,6 +99,10 @@ various graphing code, such as provided by FreqWindow and FilterPanel.
#include <wx/settings.h> #include <wx/settings.h>
#include <wx/checkbox.h> #include <wx/checkbox.h>
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
#include "Equalization48x.h"
#endif
#if wxUSE_TOOLTIPS #if wxUSE_TOOLTIPS
#include <wx/tooltip.h> #include <wx/tooltip.h>
#endif #endif
@@ -123,33 +129,33 @@ const double EqualizationDialog::thirdOct[] =
// Leaving here for now...just disabling // Leaving here for now...just disabling
#ifdef __WXGTK__disabled #ifdef __WXGTK__disabled
/* /*
* wxSlider exhibits strange behaviour on wxGTK when wxSL_INVERSE and/or * wxSlider exhibits strange behaviour on wxGTK when wxSL_INVERSE and/or
* negative scale values are used. This affects at least SUSE 9.3 with * negative scale values are used. This affects at least SUSE 9.3 with
* self-compiled wxWidgets 2.6.2 and Kubuntu 6.06. This class is used * self-compiled wxWidgets 2.6.2 and Kubuntu 6.06. This class is used
* to transparently work around those problems. Use wxSliderBugfix wherever * to transparently work around those problems. Use wxSliderBugfix wherever
* you would have used a slider with wxSL_INVERSE and/or negative scale * you would have used a slider with wxSL_INVERSE and/or negative scale
* values. * values.
* *
* The equalization class uses wxSliderBugfix everywhere even when not * The equalization class uses wxSliderBugfix everywhere even when not
* strictly needed. This is important because we override some wxSlider * strictly needed. This is important because we override some wxSlider
* functions which are not virtual, and so we need to cast to wxSliderBugfix * functions which are not virtual, and so we need to cast to wxSliderBugfix
* instead of to wxSlider to allow the right function to be called statically. * instead of to wxSlider to allow the right function to be called statically.
*/ */
class wxSliderBugfix : public wxSlider class wxSliderBugfix : public wxSlider
{ {
public: public:
wxSliderBugfix(wxWindow* parent, wxWindowID id, int value, wxSliderBugfix(wxWindow* parent, wxWindowID id, int value,
int minValue, int maxValue, int minValue, int maxValue,
const wxPoint& point = wxDefaultPosition, const wxPoint& point = wxDefaultPosition,
const wxSize& size = wxDefaultSize, const wxSize& size = wxDefaultSize,
long style = wxSL_HORIZONTAL, long style = wxSL_HORIZONTAL,
const wxValidator& validator = wxDefaultValidator, const wxValidator& validator = wxDefaultValidator,
const wxString& name = wxT("slider")) : const wxString& name = wxT("slider")) :
wxSlider(parent, id, 0, 0, maxValue - minValue, point, wxSlider(parent, id, 0, 0, maxValue - minValue, point,
size, style & ~wxSL_INVERSE, validator, name) { size, style & ~wxSL_INVERSE, validator, name) {
isInverse = (style & wxSL_INVERSE) != 0; isInverse = (style & wxSL_INVERSE) != 0;
originalMinValue = minValue; originalMinValue = minValue;
SetValue(value); SetValue(value);
} }
int GetMin() const { int GetMin() const {
@@ -182,8 +188,8 @@ private:
#else #else
/* /*
* On platforms other than wxGTK, the workaround is not needed * On platforms other than wxGTK, the workaround is not needed
*/ */
#define wxSliderBugfix wxSlider #define wxSliderBugfix wxSlider
#endif // ifdef __WXGTK__ #endif // ifdef __WXGTK__
@@ -195,7 +201,7 @@ void EffectEqualization::ReadPrefs()
{ {
gPrefs->Read(wxT("/Effects/Equalization/FilterLength"), gPrefs->Read(wxT("/Effects/Equalization/FilterLength"),
&mM, 4001); &mM, 4001);
if ((mM < 21) || (mM > 8191)) { // corrupted Prefs? if ((mM < 21) || (mM > 8191)) { // corrupted Prefs?
mM = 4001; //default mM = 4001; //default
gPrefs->Write(wxT("/Effects/Equalization/FilterLength"), mM); gPrefs->Write(wxT("/Effects/Equalization/FilterLength"), mM);
@@ -216,6 +222,18 @@ void EffectEqualization::ReadPrefs()
gPrefs->Read(wxT("/Effects/Equalization/Interp"), &mInterp, 0); gPrefs->Read(wxT("/Effects/Equalization/Interp"), &mInterp, 0);
gPrefs->Read(wxT("/Effects/Equalization/DrawGrid"), &mDrawGrid, true); gPrefs->Read(wxT("/Effects/Equalization/DrawGrid"), &mDrawGrid, true);
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
bool useSSE;
gPrefs->Read(wxT("SSE/GUI"), &useSSE);
if(useSSE && !mEffectEqualization48x)
mEffectEqualization48x=new EffectEqualization48x;
else
if(!useSSE && mEffectEqualization48x) {
delete mEffectEqualization48x;
mEffectEqualization48x=NULL;
}
mBench=false;
#endif
gPrefs->Flush(); gPrefs->Flush();
} }
@@ -225,6 +243,11 @@ EffectEqualization::EffectEqualization()
mFFTBuffer = new float[windowSize]; mFFTBuffer = new float[windowSize];
mFilterFuncR = new float[windowSize]; mFilterFuncR = new float[windowSize];
mFilterFuncI = new float[windowSize]; mFilterFuncI = new float[windowSize];
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
mEffectEqualization48x=NULL;
#endif
ReadPrefs(); ReadPrefs();
mPrompting = false; mPrompting = false;
@@ -250,6 +273,10 @@ EffectEqualization::~EffectEqualization()
delete[] mFilterFuncI; delete[] mFilterFuncI;
mFilterFuncR = NULL; mFilterFuncR = NULL;
mFilterFuncI = NULL; mFilterFuncI = NULL;
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
if(mEffectEqualization48x)
delete mEffectEqualization48x;
#endif
} }
bool EffectEqualization::Init() bool EffectEqualization::Init()
@@ -296,7 +323,7 @@ bool EffectEqualization::PromptUser()
EqualizationDialog dlog(this, ((double)loFreqI), hiFreq, mFilterFuncR, mFilterFuncI, EqualizationDialog dlog(this, ((double)loFreqI), hiFreq, mFilterFuncR, mFilterFuncI,
windowSize, mCurveName, mEditingBatchParams, mParent, -1, _("Equalization")); windowSize, mCurveName, mEditingBatchParams, mParent, -1, _("Equalization"));
dlog.M = mM; dlog.M = mM;
dlog.curveName = mCurveName; dlog.curveName = mCurveName;
@@ -351,7 +378,7 @@ bool EffectEqualization::DontPromptUser()
hiFreq = ((float)(GetActiveProject()->GetRate())/2.); hiFreq = ((float)(GetActiveProject()->GetRate())/2.);
EqualizationDialog dlog(this, ((double)loFreqI), hiFreq, mFilterFuncR, mFilterFuncI, EqualizationDialog dlog(this, ((double)loFreqI), hiFreq, mFilterFuncR, mFilterFuncI,
windowSize, mCurveName, false, NULL, -1, _("Equalization")); windowSize, mCurveName, false, NULL, -1, _("Equalization"));
dlog.M = mM; dlog.M = mM;
dlog.curveName = mCurveName; dlog.curveName = mCurveName;
@@ -378,6 +405,14 @@ bool EffectEqualization::TransferParameters( Shuttle & shuttle )
bool EffectEqualization::Process() bool EffectEqualization::Process()
{ {
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
if(mEffectEqualization48x)
if(mBench) {
mBench=false;
return mEffectEqualization48x->Benchmark(this);
} else
return mEffectEqualization48x->Process(this);
#endif
this->CopyInputTracks(); // Set up mOutputTracks. this->CopyInputTracks(); // Set up mOutputTracks.
bool bGoodResult = true; bool bGoodResult = true;
@@ -412,7 +447,7 @@ bool EffectEqualization::Process()
bool EffectEqualization::ProcessOne(int count, WaveTrack * t, bool EffectEqualization::ProcessOne(int count, WaveTrack * t,
sampleCount start, sampleCount len) sampleCount start, sampleCount len)
{ {
// create a new WaveTrack to hold all of the output, including 'tails' each end // create a new WaveTrack to hold all of the output, including 'tails' each end
AudacityProject *p = GetActiveProject(); AudacityProject *p = GetActiveProject();
@@ -560,9 +595,9 @@ bool EffectEqualization::ProcessOne(int count, WaveTrack * t,
//if the clip was only partially selected, the Paste will have created a split line. Join is needed to take care of this //if the clip was only partially selected, the Paste will have created a split line. Join is needed to take care of this
//This is not true when the selection is fully contained within one clip (second half of conditional) //This is not true when the selection is fully contained within one clip (second half of conditional)
if( (clipRealStartEndTimes[i].first != clipStartEndTimes[i].first || if( (clipRealStartEndTimes[i].first != clipStartEndTimes[i].first ||
clipRealStartEndTimes[i].second != clipStartEndTimes[i].second) && clipRealStartEndTimes[i].second != clipStartEndTimes[i].second) &&
!(clipRealStartEndTimes[i].first <= startT && !(clipRealStartEndTimes[i].first <= startT &&
clipRealStartEndTimes[i].second >= startT+lenT) ) clipRealStartEndTimes[i].second >= startT+lenT) )
t->Join(clipRealStartEndTimes[i].first,clipRealStartEndTimes[i].second); t->Join(clipRealStartEndTimes[i].first,clipRealStartEndTimes[i].second);
delete toClipOutput; delete toClipOutput;
} }
@@ -578,7 +613,7 @@ bool EffectEqualization::ProcessOne(int count, WaveTrack * t,
} }
void EffectEqualization::Filter(sampleCount len, void EffectEqualization::Filter(sampleCount len,
float *buffer) float *buffer)
{ {
int i; int i;
float re,im; float re,im;
@@ -610,18 +645,18 @@ void EffectEqualization::Filter(sampleCount len,
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
BEGIN_EVENT_TABLE(EqualizationPanel, wxPanel) BEGIN_EVENT_TABLE(EqualizationPanel, wxPanel)
EVT_PAINT(EqualizationPanel::OnPaint) EVT_PAINT(EqualizationPanel::OnPaint)
EVT_MOUSE_EVENTS(EqualizationPanel::OnMouseEvent) EVT_MOUSE_EVENTS(EqualizationPanel::OnMouseEvent)
EVT_MOUSE_CAPTURE_LOST(EqualizationPanel::OnCaptureLost) EVT_MOUSE_CAPTURE_LOST(EqualizationPanel::OnCaptureLost)
EVT_SIZE(EqualizationPanel::OnSize) EVT_SIZE(EqualizationPanel::OnSize)
END_EVENT_TABLE() END_EVENT_TABLE()
EqualizationPanel::EqualizationPanel( double loFreq, double hiFreq, EqualizationPanel::EqualizationPanel( double loFreq, double hiFreq,
Envelope *env, Envelope *env,
EqualizationDialog *parent, EqualizationDialog *parent,
float *filterFuncR, float *filterFuncI, long windowSize, float *filterFuncR, float *filterFuncI, long windowSize,
wxWindowID id, const wxPoint& pos, const wxSize& size): wxWindowID id, const wxPoint& pos, const wxSize& size):
wxPanel(parent, id, pos, size) wxPanel(parent, id, pos, size)
{ {
mOutr = NULL; mOutr = NULL;
mOuti = NULL; mOuti = NULL;
@@ -732,8 +767,8 @@ void EqualizationPanel::OnPaint(wxPaintEvent & WXUNUSED(event))
memDC.SetPen(wxPen(theTheme.Colour( clrGraphLines ), 1, wxSOLID)); memDC.SetPen(wxPen(theTheme.Colour( clrGraphLines ), 1, wxSOLID));
int center = (int) (mEnvRect.height * dBMax/(dBMax-dBMin) + .5); int center = (int) (mEnvRect.height * dBMax/(dBMax-dBMin) + .5);
AColor::Line(memDC, AColor::Line(memDC,
mEnvRect.GetLeft(), mEnvRect.y + center, mEnvRect.GetLeft(), mEnvRect.y + center,
mEnvRect.GetRight(), mEnvRect.y + center); mEnvRect.GetRight(), mEnvRect.y + center);
// Draw the grid, if asked for. Do it now so it's underneath the main plots. // Draw the grid, if asked for. Do it now so it's underneath the main plots.
if( mParent->drawGrid ) if( mParent->drawGrid )
@@ -767,7 +802,7 @@ void EqualizationPanel::OnPaint(wxPaintEvent & WXUNUSED(event))
if ( (i != 0) & (!off1) ) if ( (i != 0) & (!off1) )
{ {
AColor::Line(memDC, xlast, ylast, AColor::Line(memDC, xlast, ylast,
x, mEnvRect.y + y); x, mEnvRect.y + y);
} }
off1 = off; off1 = off;
xlast = x; xlast = x;
@@ -839,7 +874,7 @@ void EqualizationPanel::OnPaint(wxPaintEvent & WXUNUSED(event))
mEnvelope->DrawPoints(memDC, mEnvRect, 0.0, mEnvRect.width-1, false, dBMin, dBMax); mEnvelope->DrawPoints(memDC, mEnvRect, 0.0, mEnvRect.width-1, false, dBMin, dBMax);
dc.Blit(0, 0, mWidth, mHeight, dc.Blit(0, 0, mWidth, mHeight,
&memDC, 0, 0, wxCOPY, FALSE); &memDC, 0, 0, wxCOPY, FALSE);
} }
void EqualizationPanel::OnMouseEvent(wxMouseEvent & event) void EqualizationPanel::OnMouseEvent(wxMouseEvent & event)
@@ -850,7 +885,7 @@ void EqualizationPanel::OnMouseEvent(wxMouseEvent & event)
} }
if (mEnvelope->MouseEvent(event, mEnvRect, 0.0, mEnvRect.width, false, if (mEnvelope->MouseEvent(event, mEnvRect, 0.0, mEnvRect.width, false,
dBMin, dBMax)) dBMin, dBMax))
{ {
mParent->EnvelopeUpdated(); mParent->EnvelopeUpdated();
RecalcRequired = true; RecalcRequired = true;
@@ -881,48 +916,58 @@ void EqualizationPanel::OnCaptureLost(wxMouseCaptureLostEvent & WXUNUSED(event))
// WDR: event table for EqualizationDialog // WDR: event table for EqualizationDialog
BEGIN_EVENT_TABLE(EqualizationDialog,wxDialog) BEGIN_EVENT_TABLE(EqualizationDialog,wxDialog)
EVT_SIZE( EqualizationDialog::OnSize ) EVT_SIZE( EqualizationDialog::OnSize )
EVT_PAINT( EqualizationDialog::OnPaint ) EVT_PAINT( EqualizationDialog::OnPaint )
EVT_ERASE_BACKGROUND( EqualizationDialog::OnErase ) EVT_ERASE_BACKGROUND( EqualizationDialog::OnErase )
EVT_SLIDER( ID_LENGTH, EqualizationDialog::OnSliderM ) EVT_SLIDER( ID_LENGTH, EqualizationDialog::OnSliderM )
EVT_SLIDER( ID_DBMAX, EqualizationDialog::OnSliderDBMAX ) EVT_SLIDER( ID_DBMAX, EqualizationDialog::OnSliderDBMAX )
EVT_SLIDER( ID_DBMIN, EqualizationDialog::OnSliderDBMIN ) EVT_SLIDER( ID_DBMIN, EqualizationDialog::OnSliderDBMIN )
EVT_COMMAND_RANGE( ID_SLIDER, EVT_COMMAND_RANGE( ID_SLIDER,
ID_SLIDER + NUMBER_OF_BANDS - 1, ID_SLIDER + NUMBER_OF_BANDS - 1,
wxEVT_COMMAND_SLIDER_UPDATED, wxEVT_COMMAND_SLIDER_UPDATED,
EqualizationDialog::OnSlider) EqualizationDialog::OnSlider)
EVT_CHOICE( ID_INTERP, EqualizationDialog::OnInterp ) EVT_CHOICE( ID_INTERP, EqualizationDialog::OnInterp )
EVT_CHOICE( ID_CURVE, EqualizationDialog::OnCurve ) EVT_CHOICE( ID_CURVE, EqualizationDialog::OnCurve )
EVT_BUTTON( ID_MANAGE, EqualizationDialog::OnManage ) EVT_BUTTON( ID_MANAGE, EqualizationDialog::OnManage )
EVT_BUTTON( ID_CLEAR, EqualizationDialog::OnClear ) EVT_BUTTON( ID_CLEAR, EqualizationDialog::OnClear )
EVT_BUTTON( ID_INVERT, EqualizationDialog::OnInvert ) EVT_BUTTON( ID_INVERT, EqualizationDialog::OnInvert )
EVT_BUTTON( ID_EFFECT_PREVIEW, EqualizationDialog::OnPreview ) EVT_BUTTON( ID_EFFECT_PREVIEW, EqualizationDialog::OnPreview )
EVT_BUTTON( wxID_OK, EqualizationDialog::OnOk ) EVT_BUTTON( wxID_OK, EqualizationDialog::OnOk )
EVT_BUTTON( wxID_CANCEL, EqualizationDialog::OnCancel ) EVT_BUTTON( wxID_CANCEL, EqualizationDialog::OnCancel )
EVT_RADIOBUTTON(drawRadioID, EqualizationDialog::OnDrawRadio) EVT_RADIOBUTTON(drawRadioID, EqualizationDialog::OnDrawRadio)
EVT_RADIOBUTTON(sliderRadioID, EqualizationDialog::OnSliderRadio) EVT_RADIOBUTTON(sliderRadioID, EqualizationDialog::OnSliderRadio)
EVT_CHECKBOX(ID_LIN_FREQ, EqualizationDialog::OnLinFreq)
EVT_CHECKBOX(GridOnOffID, EqualizationDialog::OnGridOnOff) #ifdef EXPERIMENTAL_EQ_SSE_THREADED
EVT_RADIOBUTTON(defaultMathRadioID, EqualizationDialog::OnProcessingRadio)
EVT_RADIOBUTTON(sSERadioID, EqualizationDialog::OnProcessingRadio)
EVT_RADIOBUTTON(sSEThreadedRadioID, EqualizationDialog::OnProcessingRadio)
EVT_RADIOBUTTON(aVXRadioID, EqualizationDialog::OnProcessingRadio)
EVT_RADIOBUTTON(aVXThreadedRadioID, EqualizationDialog::OnProcessingRadio)
EVT_BUTTON( ID_BENCH, EqualizationDialog::OnBench )
#endif
EVT_CHECKBOX(ID_LIN_FREQ, EqualizationDialog::OnLinFreq)
EVT_CHECKBOX(GridOnOffID, EqualizationDialog::OnGridOnOff)
END_EVENT_TABLE() END_EVENT_TABLE()
EqualizationDialog::EqualizationDialog(EffectEqualization * effect, EqualizationDialog::EqualizationDialog(EffectEqualization * effect,
double loFreq, double hiFreq, double loFreq, double hiFreq,
float *filterFuncR, float *filterFuncR,
float *filterFuncI, float *filterFuncI,
long windowSize, long windowSize,
wxString curveName, wxString curveName,
bool disallowCustom, bool disallowCustom,
wxWindow *parent, wxWindowID id, wxWindow *parent, wxWindowID id,
const wxString &title, const wxString &title,
const wxPoint &position, const wxPoint &position,
const wxSize& size, const wxSize& size,
long style): long style):
wxDialog( parent, id, title, position, size, style | wxRESIZE_BORDER | wxMAXIMIZE_BOX ), wxDialog( parent, id, title, position, size, style | wxRESIZE_BORDER | wxMAXIMIZE_BOX ),
mDisallowCustom(disallowCustom), mDisallowCustom(disallowCustom),
mCustomBackup(wxT("unnamed")) mCustomBackup(wxT("unnamed"))
{ {
m_pEffect = effect; m_pEffect = effect;
@@ -1050,8 +1095,8 @@ void EqualizationDialog::LoadCurves(wxString fileName, bool append)
msg.Printf(_("Error Loading EQ Curves from file:\n%s\nError message says:\n%s"), fn.GetFullPath().c_str(), reader.GetErrorStr().c_str()); msg.Printf(_("Error Loading EQ Curves from file:\n%s\nError message says:\n%s"), fn.GetFullPath().c_str(), reader.GetErrorStr().c_str());
// Inform user of load failure // Inform user of load failure
wxMessageBox( msg, wxMessageBox( msg,
_("Error Loading EQ Curves"), _("Error Loading EQ Curves"),
wxOK | wxCENTRE); wxOK | wxCENTRE);
mCurves.Add( _("unnamed") ); // we always need a default curve to use mCurves.Add( _("unnamed") ); // we always need a default curve to use
return; return;
} }
@@ -1144,10 +1189,10 @@ void EqualizationDialog::MakeEqualizationDialog()
szr2 = new wxBoxSizer( wxVERTICAL ); szr2 = new wxBoxSizer( wxVERTICAL );
dBMaxSlider = new wxSliderBugfix(this, ID_DBMAX, 30, 0, 60, dBMaxSlider = new wxSliderBugfix(this, ID_DBMAX, 30, 0, 60,
wxDefaultPosition, wxDefaultSize, wxSL_VERTICAL|wxSL_INVERSE); wxDefaultPosition, wxDefaultSize, wxSL_VERTICAL|wxSL_INVERSE);
szr2->Add( dBMaxSlider, 1, wxALIGN_LEFT|wxALIGN_CENTER_VERTICAL|wxALL, 4 ); szr2->Add( dBMaxSlider, 1, wxALIGN_LEFT|wxALIGN_CENTER_VERTICAL|wxALL, 4 );
dBMinSlider = new wxSliderBugfix(this, ID_DBMIN, -30, -120, -10, dBMinSlider = new wxSliderBugfix(this, ID_DBMIN, -30, -120, -10,
wxDefaultPosition, wxDefaultSize, wxSL_VERTICAL|wxSL_INVERSE); wxDefaultPosition, wxDefaultSize, wxSL_VERTICAL|wxSL_INVERSE);
szr2->Add( dBMinSlider, 1, wxALIGN_LEFT|wxALIGN_CENTER_VERTICAL|wxALL, 4 ); szr2->Add( dBMinSlider, 1, wxALIGN_LEFT|wxALIGN_CENTER_VERTICAL|wxALL, 4 );
szr1->Add( szr2, 0, wxEXPAND|wxALIGN_CENTRE|wxALL, 4 ); szr1->Add( szr2, 0, wxEXPAND|wxALIGN_CENTRE|wxALL, 4 );
@@ -1177,10 +1222,10 @@ void EqualizationDialog::MakeEqualizationDialog()
szr1->Add( szr4, 0, wxEXPAND|wxALIGN_LEFT|wxALL ); szr1->Add( szr4, 0, wxEXPAND|wxALIGN_LEFT|wxALL );
mPanel = new EqualizationPanel( mLoFreq, mHiFreq, mPanel = new EqualizationPanel( mLoFreq, mHiFreq,
mLogEnvelope, mLogEnvelope,
this, this,
mFilterFuncR, mFilterFuncI, mWindowSize, mFilterFuncR, mFilterFuncI, mWindowSize,
ID_FILTERPANEL); ID_FILTERPANEL);
szr1->Add( mPanel, 1, wxEXPAND|wxALIGN_CENTRE); szr1->Add( mPanel, 1, wxEXPAND|wxALIGN_CENTRE);
szr3 = new wxBoxSizer( wxVERTICAL ); szr3 = new wxBoxSizer( wxVERTICAL );
szr1->Add( szr3, 0, wxALIGN_CENTRE|wxRIGHT, 0); //spacer for last EQ szr1->Add( szr3, 0, wxALIGN_CENTRE|wxRIGHT, 0); //spacer for last EQ
@@ -1218,8 +1263,8 @@ void EqualizationDialog::MakeEqualizationDialog()
for (int i = 0; (i < NUMBER_OF_BANDS) && (thirdOct[i] <= mHiFreq); ++i) for (int i = 0; (i < NUMBER_OF_BANDS) && (thirdOct[i] <= mHiFreq); ++i)
{ {
m_sliders[i] = new wxSliderBugfix(this, ID_SLIDER + i, 0, -20, +20, m_sliders[i] = new wxSliderBugfix(this, ID_SLIDER + i, 0, -20, +20,
wxDefaultPosition, wxSize(20, 124), wxSL_VERTICAL| wxDefaultPosition, wxSize(20, 124), wxSL_VERTICAL|
wxSL_INVERSE); wxSL_INVERSE);
szrG->Add( m_sliders[i], 0, wxEXPAND|wxALIGN_CENTER ); szrG->Add( m_sliders[i], 0, wxEXPAND|wxALIGN_CENTER );
szrG->Add(0, 0, 0); // horizontal spacer - used to put EQ sliders in correct position szrG->Add(0, 0, 0); // horizontal spacer - used to put EQ sliders in correct position
m_sliders[i]->Connect(wxEVT_ERASE_BACKGROUND,wxEraseEventHandler(EqualizationDialog::OnErase)); m_sliders[i]->Connect(wxEVT_ERASE_BACKGROUND,wxEraseEventHandler(EqualizationDialog::OnErase));
@@ -1248,20 +1293,20 @@ void EqualizationDialog::MakeEqualizationDialog()
szrH = new wxBoxSizer( wxHORIZONTAL ); szrH = new wxBoxSizer( wxHORIZONTAL );
mFaderOrDraw[0] = new wxRadioButton( mFaderOrDraw[0] = new wxRadioButton(
this, drawRadioID, _("&Draw Curves"), this, drawRadioID, _("&Draw Curves"),
wxDefaultPosition, wxDefaultSize, wxRB_GROUP ); wxDefaultPosition, wxDefaultSize, wxRB_GROUP );
mFaderOrDraw[0]->SetName(_("Draw Curves")); mFaderOrDraw[0]->SetName(_("Draw Curves"));
szrH->Add( mFaderOrDraw[0], 0, wxRIGHT, 10 ); szrH->Add( mFaderOrDraw[0], 0, wxRIGHT, 10 );
mFaderOrDraw[1] = new wxRadioButton( mFaderOrDraw[1] = new wxRadioButton(
this, sliderRadioID, _("&Graphic EQ"), this, sliderRadioID, _("&Graphic EQ"),
wxDefaultPosition, wxDefaultSize, 0 ); wxDefaultPosition, wxDefaultSize, 0 );
mFaderOrDraw[1]->SetName(_("Graphic EQ")); mFaderOrDraw[1]->SetName(_("Graphic EQ"));
szrH->Add( mFaderOrDraw[1], 0, wxRIGHT, 4 ); szrH->Add( mFaderOrDraw[1], 0, wxRIGHT, 4 );
mInterpChoice = new wxChoice(this, ID_INTERP, mInterpChoice = new wxChoice(this, ID_INTERP,
wxDefaultPosition, wxDefaultSize, wxDefaultPosition, wxDefaultSize,
NUM_INTERP_CHOICES, interpChoiceStrings); NUM_INTERP_CHOICES, interpChoiceStrings);
mInterpChoice->SetSelection(0); mInterpChoice->SetSelection(0);
szrI = new wxBoxSizer( wxHORIZONTAL ); szrI = new wxBoxSizer( wxHORIZONTAL );
@@ -1284,7 +1329,7 @@ void EqualizationDialog::MakeEqualizationDialog()
// length of filter (M) slider // length of filter (M) slider
MSlider = new wxSliderBugfix(this, ID_LENGTH, (M -1)/2, 10, 4095, MSlider = new wxSliderBugfix(this, ID_LENGTH, (M -1)/2, 10, 4095,
wxDefaultPosition, wxSize(200, -1), wxSL_HORIZONTAL); wxDefaultPosition, wxSize(200, -1), wxSL_HORIZONTAL);
MSlider->SetName(_("Length of Filter")); MSlider->SetName(_("Length of Filter"));
szrH->Add( MSlider, 0, wxEXPAND ); szrH->Add( MSlider, 0, wxEXPAND );
@@ -1321,18 +1366,85 @@ void EqualizationDialog::MakeEqualizationDialog()
btn = new wxButton( this, ID_INVERT, _("&Invert")); btn = new wxButton( this, ID_INVERT, _("&Invert"));
szrC->Add( btn, 0, wxALIGN_CENTRE | wxALL, 4 ); szrC->Add( btn, 0, wxALIGN_CENTRE | wxALL, 4 );
mGridOnOff = new wxCheckBox(this, GridOnOffID, _("G&rids"), mGridOnOff = new wxCheckBox(this, GridOnOffID, _("G&rids"),
wxDefaultPosition, wxDefaultSize, wxDefaultPosition, wxDefaultSize,
#if defined(__WXGTK__) #if defined(__WXGTK__)
// Fixes bug #662 // Fixes bug #662
wxALIGN_LEFT); wxALIGN_LEFT);
#else #else
wxALIGN_RIGHT); wxALIGN_RIGHT);
#endif #endif
mGridOnOff->SetName(_("Grids")); mGridOnOff->SetName(_("Grids"));
szrC->Add( mGridOnOff, 0, wxALIGN_CENTRE | wxALL, 4 ); szrC->Add( mGridOnOff, 0, wxALIGN_CENTRE | wxALL, 4 );
szrV->Add( szrC, 0, wxALIGN_CENTER | wxALL, 0 ); szrV->Add( szrC, 0, wxALIGN_CENTER | wxALL, 0 );
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
// -------------------------------------------------------------------
// Processing routine selection
// -------------------------------------------------------------------
if(m_pEffect->mEffectEqualization48x) {
szrM = new wxBoxSizer( wxHORIZONTAL );
txt = new wxStaticText( this, wxID_ANY, _("&Processing: ") );
szrM->Add( txt, 0, wxALIGN_LEFT | wxALIGN_CENTER_VERTICAL | wxLEFT, 4 );
mMathProcessingType[0] = new wxRadioButton(
this, defaultMathRadioID, _("D&efault"),
wxDefaultPosition, wxDefaultSize, wxRB_GROUP );
mMathProcessingType[0]->SetName(_("Default"));
szrM->Add( mMathProcessingType[0], 0, wxRIGHT, 10 );
mMathProcessingType[1] = new wxRadioButton(
this, sSERadioID, _("&SSE"),
wxDefaultPosition, wxDefaultSize, 0 );
mMathProcessingType[1]->SetName(_("SSE"));
szrM->Add( mMathProcessingType[1], 0, wxRIGHT, 4 );
mMathProcessingType[2] = new wxRadioButton(
this, sSEThreadedRadioID, _("SSE &Threaded"),
wxDefaultPosition, wxDefaultSize, 0 );
mMathProcessingType[2]->SetName(_("SSE"));
szrM->Add( mMathProcessingType[2], 0, wxRIGHT, 4 );
mMathProcessingType[3] = new wxRadioButton(
this, aVXRadioID, _("A&VX"),
wxDefaultPosition, wxDefaultSize, 0 );
mMathProcessingType[3]->SetName(_("AVX"));
szrM->Add( mMathProcessingType[3], 0, wxRIGHT, 4 );
mMathProcessingType[4] = new wxRadioButton(
this, aVXThreadedRadioID, _("AV&X Threaded"),
wxDefaultPosition, wxDefaultSize, 0 );
mMathProcessingType[4]->SetName(_("AVX Threaded"));
szrM->Add( mMathProcessingType[4], 0, wxRIGHT, 4 );
if(!EffectEqualization48x::GetMathCaps()->SSE) {
mMathProcessingType[1]->Disable();
mMathProcessingType[2]->Disable();
}
if(true) { //!EffectEqualization48x::GetMathCaps()->AVX) { not implemented
mMathProcessingType[3]->Disable();
mMathProcessingType[4]->Disable();
}
// update the control state
mMathProcessingType[0]->SetValue(true);
int mathPath=EffectEqualization48x::GetMathPath();
if(mathPath&MATH_FUNCTION_SSE) {
mMathProcessingType[1]->SetValue(true);
if(mathPath&MATH_FUNCTION_THREADED)
mMathProcessingType[2]->SetValue(true);
}
if(false) { //mathPath&MATH_FUNCTION_AVX) { not implemented
mMathProcessingType[3]->SetValue(true);
if(mathPath&MATH_FUNCTION_THREADED)
mMathProcessingType[4]->SetValue(true);
}
btn = new wxButton( this, ID_BENCH, _("&Bench"));
szrM->Add( btn, 0, wxRIGHT, 4 );
szrV->Add( szrM, 0, wxALIGN_CENTER | wxALL, 0 );
}
#endif
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Preview, OK, & Cancel buttons // Preview, OK, & Cancel buttons
// ------------------------------------------------------------------- // -------------------------------------------------------------------
@@ -1369,13 +1481,13 @@ void EqualizationDialog::CreateChoice()
// Create an array of names // Create an array of names
for( i = 0; i < numCurves; i++ ) for( i = 0; i < numCurves; i++ )
{ {
curveNames[ i ] = mCurves[ i ].Name; curveNames[ i ] = mCurves[ i ].Name;
} }
// Create the control // Create the control
choice = new wxChoice( this, ID_CURVE, choice = new wxChoice( this, ID_CURVE,
wxDefaultPosition, wxDefaultSize, wxDefaultPosition, wxDefaultSize,
numCurves, curveNames ); numCurves, curveNames );
// Have an existing control? // Have an existing control?
if( mCurve ) if( mCurve )
@@ -1404,10 +1516,10 @@ bool EqualizationDialog::Validate()
while (mDisallowCustom && curveName.IsSameAs(wxT("unnamed"))) while (mDisallowCustom && curveName.IsSameAs(wxT("unnamed")))
{ {
wxMessageBox(_("To use this EQ curve in a batch chain, please choose a new name for it.\nChoose the 'Save/Manage Curves...' button and rename the 'unnamed' curve, then use that one."), wxMessageBox(_("To use this EQ curve in a batch chain, please choose a new name for it.\nChoose the 'Save/Manage Curves...' button and rename the 'unnamed' curve, then use that one."),
_("EQ Curve needs a different name"), _("EQ Curve needs a different name"),
wxOK | wxCENTRE, wxOK | wxCENTRE,
this); this);
return false; return false;
} }
return true; return true;
} }
@@ -1555,16 +1667,16 @@ bool EqualizationDialog::CalcFilter()
mFilterFuncR[i] = val0; mFilterFuncR[i] = val0;
} }
else if(when > 1.0) else if(when > 1.0)
{ {
mFilterFuncR[i] = val1; mFilterFuncR[i] = val1;
} }
else else
{ {
if( lin ) if( lin )
mFilterFuncR[i] = mLinEnvelope->GetValue(when); mFilterFuncR[i] = mLinEnvelope->GetValue(when);
else else
mFilterFuncR[i] = mLogEnvelope->GetValue(when); mFilterFuncR[i] = mLogEnvelope->GetValue(when);
} }
freq += delta; freq += delta;
} }
mFilterFuncR[mWindowSize/2] = val1; mFilterFuncR[mWindowSize/2] = val1;
@@ -1588,8 +1700,8 @@ bool EqualizationDialog::CalcFilter()
for(i=0;i<=(M-1)/2;i++) for(i=0;i<=(M-1)/2;i++)
{ //Windowing - could give a choice, fixed for now - MJS { //Windowing - could give a choice, fixed for now - MJS
// double mult=0.54-0.46*cos(2*M_PI*(i+(M-1)/2.0)/(M-1)); //Hamming // double mult=0.54-0.46*cos(2*M_PI*(i+(M-1)/2.0)/(M-1)); //Hamming
//Blackman //Blackman
double mult=0.42-0.5*cos(2*M_PI*(i+(M-1)/2.0)/(M-1))+.08*cos(4*M_PI*(i+(M-1)/2.0)/(M-1)); double mult=0.42-0.5*cos(2*M_PI*(i+(M-1)/2.0)/(M-1))+.08*cos(4*M_PI*(i+(M-1)/2.0)/(M-1));
outr[i]*=mult; outr[i]*=mult;
if(i!=0){ if(i!=0){
@@ -1894,7 +2006,7 @@ bool EqualizationDialog::HandleXMLTag(const wxChar *tag, const wxChar **attrs)
if( !wxStrcmp( attr, wxT("f") ) ) if( !wxStrcmp( attr, wxT("f") ) )
{ {
if (!XMLValueChecker::IsGoodString(strValue) || if (!XMLValueChecker::IsGoodString(strValue) ||
!Internat::CompatibleToDouble(strValue, &dblValue)) !Internat::CompatibleToDouble(strValue, &dblValue))
return false; return false;
f = dblValue; f = dblValue;
} }
@@ -1902,7 +2014,7 @@ bool EqualizationDialog::HandleXMLTag(const wxChar *tag, const wxChar **attrs)
else if( !wxStrcmp( attr, wxT("d") ) ) else if( !wxStrcmp( attr, wxT("d") ) )
{ {
if (!XMLValueChecker::IsGoodString(strValue) || if (!XMLValueChecker::IsGoodString(strValue) ||
!Internat::CompatibleToDouble(strValue, &dblValue)) !Internat::CompatibleToDouble(strValue, &dblValue))
return false; return false;
d = dblValue; d = dblValue;
} }
@@ -2087,7 +2199,7 @@ void EqualizationDialog::GraphicEQ(Envelope *env)
int n = mInterpChoice->GetSelection(); int n = mInterpChoice->GetSelection();
switch( n ) switch( n )
{ {
case 0: // B-spline case 0: // B-spline
{ {
int minF = 0; int minF = 0;
for(int i=0; i<NUM_PTS; i++) for(int i=0; i<NUM_PTS; i++)
@@ -2120,7 +2232,7 @@ void EqualizationDialog::GraphicEQ(Envelope *env)
value = m_EQVals[bandsInUse-1]*(s - 1.5)*(s - 1.5)/2.; value = m_EQVals[bandsInUse-1]*(s - 1.5)*(s - 1.5)/2.;
else else
value = m_EQVals[bandsInUse-1]*(.75 - s*s) + value = m_EQVals[bandsInUse-1]*(.75 - s*s) +
m_EQVals[bandsInUse-2]*(s - .5)*(s - .5)/2.; m_EQVals[bandsInUse-2]*(s - .5)*(s - .5)/2.;
} }
else //normal case else //normal case
{ {
@@ -2153,7 +2265,7 @@ void EqualizationDialog::GraphicEQ(Envelope *env)
break; break;
} }
case 1: // Cosine squared case 1: // Cosine squared
{ {
int minF = 0; int minF = 0;
for(int i=0; i<NUM_PTS; i++) for(int i=0; i<NUM_PTS; i++)
@@ -2186,7 +2298,7 @@ void EqualizationDialog::GraphicEQ(Envelope *env)
span = whenSliders[minF+1] - whenSliders[minF]; span = whenSliders[minF+1] - whenSliders[minF];
dist = whenSliders[minF+1] - whens[i]; dist = whenSliders[minF+1] - whens[i];
value = m_EQVals[minF]*(1. + cos(M_PI*(span-dist)/span))/2. + value = m_EQVals[minF]*(1. + cos(M_PI*(span-dist)/span))/2. +
m_EQVals[minF+1]*(1. + cos(M_PI*dist/span))/2.; m_EQVals[minF+1]*(1. + cos(M_PI*dist/span))/2.;
} }
} }
if(whens[i]<=0.) if(whens[i]<=0.)
@@ -2197,7 +2309,7 @@ void EqualizationDialog::GraphicEQ(Envelope *env)
break; break;
} }
case 2: // Cubic Spline case 2: // Cubic Spline
{ {
double y2[NUMBER_OF_BANDS+1]; double y2[NUMBER_OF_BANDS+1];
m_EQVals[bandsInUse] = m_EQVals[bandsInUse-1]; m_EQVals[bandsInUse] = m_EQVals[bandsInUse-1];
@@ -2587,11 +2699,11 @@ void EqualizationDialog::OnSliderDBMAX(wxCommandEvent & WXUNUSED(event))
void EqualizationDialog::OnCurve(wxCommandEvent & WXUNUSED(event)) void EqualizationDialog::OnCurve(wxCommandEvent & WXUNUSED(event))
{ {
// Select new curve // Select new curve
#if wxCHECK_VERSION(2, 6, 2) && !defined(__WXX11__) #if wxCHECK_VERSION(2, 6, 2) && !defined(__WXX11__)
setCurve( mCurve->GetCurrentSelection() ); setCurve( mCurve->GetCurrentSelection() );
#else #else
setCurve( mCurve->GetSelection() ); setCurve( mCurve->GetSelection() );
#endif #endif
if( !drawMode ) if( !drawMode )
UpdateGraphic(); UpdateGraphic();
} }
@@ -2643,14 +2755,14 @@ void EqualizationDialog::OnInvert(wxCommandEvent & WXUNUSED(event)) // Inverts a
int newPosn = (int)m_EQVals[i]; int newPosn = (int)m_EQVals[i];
m_sliders[i]->SetValue( newPosn ); m_sliders[i]->SetValue( newPosn );
m_sliders_old[i] = newPosn; m_sliders_old[i] = newPosn;
#if wxUSE_TOOLTIPS #if wxUSE_TOOLTIPS
wxString tip; wxString tip;
if( thirdOct[i] < 1000.) if( thirdOct[i] < 1000.)
tip.Printf( wxT("%dHz\n%.1fdB"), (int)thirdOct[i], m_EQVals[i] ); tip.Printf( wxT("%dHz\n%.1fdB"), (int)thirdOct[i], m_EQVals[i] );
else else
tip.Printf( wxT("%gkHz\n%.1fdB"), thirdOct[i]/1000., m_EQVals[i] ); tip.Printf( wxT("%gkHz\n%.1fdB"), thirdOct[i]/1000., m_EQVals[i] );
m_sliders[i]->SetToolTip(tip); m_sliders[i]->SetToolTip(tip);
#endif #endif
} }
GraphicEQ(mLogEnvelope); GraphicEQ(mLogEnvelope);
} }
@@ -2766,10 +2878,10 @@ void EqualizationDialog::RevertCustom()
void EqualizationDialog::Finish(bool ok) void EqualizationDialog::Finish(bool ok)
{ {
if(mLogEnvelope) if(mLogEnvelope)
delete mLogEnvelope; delete mLogEnvelope;
mLogEnvelope = NULL; mLogEnvelope = NULL;
if(mLinEnvelope) if(mLinEnvelope)
delete mLinEnvelope; delete mLinEnvelope;
mLinEnvelope = NULL; mLinEnvelope = NULL;
mPanel = NULL; mPanel = NULL;
@@ -2804,7 +2916,7 @@ void EqualizationDialog::OnOk(wxCommandEvent & event)
for(i=0,j=0;j<numPoints-2;i++,j++) for(i=0,j=0;j<numPoints-2;i++,j++)
{ {
if( (value[i]<value[i+1]+.05) && (value[i]>value[i+1]-.05) && if( (value[i]<value[i+1]+.05) && (value[i]>value[i+1]-.05) &&
(value[i+1]<value[i+2]+.05) && (value[i+1]>value[i+2]-.05) ) (value[i+1]<value[i+2]+.05) && (value[i+1]>value[i+2]-.05) )
{ // within < 0.05 dB? { // within < 0.05 dB?
mLogEnvelope->Delete(j+1); mLogEnvelope->Delete(j+1);
numPoints--; numPoints--;
@@ -2833,21 +2945,21 @@ void EqualizationDialog::OnOk(wxCommandEvent & event)
/// Constructor /// Constructor
BEGIN_EVENT_TABLE(EditCurvesDialog, wxDialog) BEGIN_EVENT_TABLE(EditCurvesDialog, wxDialog)
EVT_BUTTON(UpButtonID, EditCurvesDialog::OnUp) EVT_BUTTON(UpButtonID, EditCurvesDialog::OnUp)
EVT_BUTTON(DownButtonID, EditCurvesDialog::OnDown) EVT_BUTTON(DownButtonID, EditCurvesDialog::OnDown)
EVT_BUTTON(RenameButtonID, EditCurvesDialog::OnRename) EVT_BUTTON(RenameButtonID, EditCurvesDialog::OnRename)
EVT_BUTTON(DeleteButtonID, EditCurvesDialog::OnDelete) EVT_BUTTON(DeleteButtonID, EditCurvesDialog::OnDelete)
EVT_BUTTON(ImportButtonID, EditCurvesDialog::OnImport) EVT_BUTTON(ImportButtonID, EditCurvesDialog::OnImport)
EVT_BUTTON(ExportButtonID, EditCurvesDialog::OnExport) EVT_BUTTON(ExportButtonID, EditCurvesDialog::OnExport)
EVT_BUTTON(LibraryButtonID, EditCurvesDialog::OnLibrary) EVT_BUTTON(LibraryButtonID, EditCurvesDialog::OnLibrary)
EVT_BUTTON(DefaultsButtonID, EditCurvesDialog::OnDefaults) EVT_BUTTON(DefaultsButtonID, EditCurvesDialog::OnDefaults)
EVT_BUTTON(wxID_OK, EditCurvesDialog::OnOK) EVT_BUTTON(wxID_OK, EditCurvesDialog::OnOK)
END_EVENT_TABLE() END_EVENT_TABLE()
EditCurvesDialog::EditCurvesDialog(EqualizationDialog * parent, int position): EditCurvesDialog::EditCurvesDialog(EqualizationDialog * parent, int position):
wxDialog(parent, wxID_ANY, _("Manage Curves List"), wxDialog(parent, wxID_ANY, _("Manage Curves List"),
wxDefaultPosition, wxDefaultSize, wxDefaultPosition, wxDefaultSize,
wxDEFAULT_DIALOG_STYLE | wxRESIZE_BORDER) wxDEFAULT_DIALOG_STYLE | wxRESIZE_BORDER)
{ {
SetLabel(_("Manage Curves")); // Provide visual label SetLabel(_("Manage Curves")); // Provide visual label
SetName(_("Manage Curves List")); // Provide audible label SetName(_("Manage Curves List")); // Provide audible label
@@ -2905,7 +3017,7 @@ void EditCurvesDialog::PopulateOrExchange(ShuttleGui & S)
S.EndHorizontalLay(); S.EndHorizontalLay();
S.AddStandardButtons(); S.AddStandardButtons();
S.StartStatic(_("Help")); S.StartStatic(_("Help"));
S.AddConstTextBox(wxT(""), _("Rename 'unnamed' to save a new entry.\n'OK' saves all changes, 'Cancel' doesn't.")); S.AddConstTextBox(wxT(""), _("Rename 'unnamed' to save a new entry.\n'OK' saves all changes, 'Cancel' doesn't."));
S.EndStatic(); S.EndStatic();
PopulateList(mPosition); PopulateList(mPosition);
Fit(); Fit();
@@ -2998,7 +3110,7 @@ long EditCurvesDialog::GetPreviousItem(long item) // wx doesn't have this
{ {
long lastItem = -1; long lastItem = -1;
long itemTemp = mList->GetNextItem(-1, wxLIST_NEXT_ALL, long itemTemp = mList->GetNextItem(-1, wxLIST_NEXT_ALL,
wxLIST_STATE_SELECTED); wxLIST_STATE_SELECTED);
while( (itemTemp != -1) && (itemTemp < item) ) while( (itemTemp != -1) && (itemTemp < item) )
{ {
lastItem = itemTemp; lastItem = itemTemp;
@@ -3034,8 +3146,8 @@ void EditCurvesDialog::OnRename(wxCommandEvent & WXUNUSED(event))
bad = false; bad = false;
// build the dialog // build the dialog
wxTextEntryDialog dlg( this, wxTextEntryDialog dlg( this,
_("Rename '") + mEditCurves[ item ].Name + _("' to..."), _("Rename '") + mEditCurves[ item ].Name + _("' to..."),
_("Rename...") ); _("Rename...") );
dlg.SetTextValidator( wxFILTER_EXCLUDE_CHAR_LIST ); dlg.SetTextValidator( wxFILTER_EXCLUDE_CHAR_LIST );
dlg.SetName( _("Rename '") + mEditCurves[ item ].Name ); dlg.SetName( _("Rename '") + mEditCurves[ item ].Name );
wxTextValidator *tv = dlg.GetTextValidator(); wxTextValidator *tv = dlg.GetTextValidator();
@@ -3062,7 +3174,7 @@ void EditCurvesDialog::OnRename(wxCommandEvent & WXUNUSED(event))
break; break;
} }
int answer = wxMessageBox( _("Overwrite existing curve '") + name +_("'?"), int answer = wxMessageBox( _("Overwrite existing curve '") + name +_("'?"),
_("Curve exists"), wxYES_NO ); _("Curve exists"), wxYES_NO );
if (answer == wxYES) if (answer == wxYES)
{ {
bad = false; bad = false;
@@ -3134,7 +3246,7 @@ void EditCurvesDialog::OnDelete(wxCommandEvent & WXUNUSED(event))
if(item == mList->GetItemCount()-1) //unnamed if(item == mList->GetItemCount()-1) //unnamed
{ {
wxMessageBox(_("You cannot delete the 'unnamed' curve."), wxMessageBox(_("You cannot delete the 'unnamed' curve."),
_("Can't delete 'unnamed'"), wxOK | wxCENTRE, this); _("Can't delete 'unnamed'"), wxOK | wxCENTRE, this);
} }
else else
{ {
@@ -3183,7 +3295,7 @@ void EditCurvesDialog::OnDelete(wxCommandEvent & WXUNUSED(event))
if(item == mList->GetItemCount()-1) //unnamed if(item == mList->GetItemCount()-1) //unnamed
{ {
wxMessageBox(_("You cannot delete the 'unnamed' curve, it is special."), wxMessageBox(_("You cannot delete the 'unnamed' curve, it is special."),
_("Can't delete 'unnamed'"), wxOK | wxCENTRE, this); _("Can't delete 'unnamed'"), wxOK | wxCENTRE, this);
} }
else else
{ {
@@ -3274,6 +3386,35 @@ void EditCurvesDialog::OnDefaults( wxCommandEvent & WXUNUSED(event))
PopulateList(0); // update the EditCurvesDialog dialog PopulateList(0); // update the EditCurvesDialog dialog
} }
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
void EqualizationDialog::OnProcessingRadio(wxCommandEvent & event)
{
int testEvent=event.GetId();
switch(testEvent)
{
case defaultMathRadioID: EffectEqualization48x::SetMathPath(MATH_FUNCTION_ORIGINAL);
break;
case sSERadioID: EffectEqualization48x::SetMathPath(MATH_FUNCTION_SSE);
break;
case sSEThreadedRadioID: EffectEqualization48x::SetMathPath(MATH_FUNCTION_THREADED | MATH_FUNCTION_SSE);
break;
case aVXRadioID: testEvent=2;
break;
case aVXThreadedRadioID: testEvent=2;
break;
}
};
void EqualizationDialog::OnBench( wxCommandEvent & event)
{
m_pEffect->mBench=true;
OnOk(event);
}
#endif
void EditCurvesDialog::OnOK(wxCommandEvent & WXUNUSED(event)) void EditCurvesDialog::OnOK(wxCommandEvent & WXUNUSED(event))
{ {
// Make a backup of the current curves // Make a backup of the current curves
@@ -3293,8 +3434,8 @@ void EditCurvesDialog::OnOK(wxCommandEvent & WXUNUSED(event))
// Select something sensible // Select something sensible
long item = mList->GetNextItem(-1, long item = mList->GetNextItem(-1,
wxLIST_NEXT_ALL, wxLIST_NEXT_ALL,
wxLIST_STATE_SELECTED); wxLIST_STATE_SELECTED);
if (item == -1) if (item == -1)
item = mList->GetItemCount()-1; // nothing selected, default to 'unnamed' item = mList->GetItemCount()-1; // nothing selected, default to 'unnamed'
mParent->setCurve(item); mParent->setCurve(item);
@@ -3304,7 +3445,7 @@ void EditCurvesDialog::OnOK(wxCommandEvent & WXUNUSED(event))
#if wxUSE_ACCESSIBILITY #if wxUSE_ACCESSIBILITY
SliderAx::SliderAx( wxWindow * window, wxString fmt ): SliderAx::SliderAx( wxWindow * window, wxString fmt ):
wxWindowAccessible( window ) wxWindowAccessible( window )
{ {
mParent = window; mParent = window;
mFmt = fmt; mFmt = fmt;
@@ -3415,17 +3556,17 @@ wxAccStatus SliderAx::GetRole(int childId, wxAccRole* role)
{ {
switch( childId ) switch( childId )
{ {
case 0: case 0:
*role = wxROLE_SYSTEM_SLIDER; *role = wxROLE_SYSTEM_SLIDER;
break; break;
case 1: case 1:
case 3: case 3:
*role = wxROLE_SYSTEM_PUSHBUTTON; *role = wxROLE_SYSTEM_PUSHBUTTON;
break; break;
case 2: case 2:
*role = wxROLE_SYSTEM_INDICATOR; *role = wxROLE_SYSTEM_INDICATOR;
break; break;
} }
@@ -3452,22 +3593,22 @@ wxAccStatus SliderAx::GetState(int childId, long* state)
switch( childId ) switch( childId )
{ {
case 0: case 0:
*state = wxACC_STATE_SYSTEM_FOCUSABLE; *state = wxACC_STATE_SYSTEM_FOCUSABLE;
break; break;
case 1: case 1:
if( s->GetValue() == s->GetMin() ) if( s->GetValue() == s->GetMin() )
{ {
*state = wxACC_STATE_SYSTEM_INVISIBLE; *state = wxACC_STATE_SYSTEM_INVISIBLE;
} }
break; break;
case 3: case 3:
if( s->GetValue() == s->GetMax() ) if( s->GetValue() == s->GetMax() )
{ {
*state = wxACC_STATE_SYSTEM_INVISIBLE; *state = wxACC_STATE_SYSTEM_INVISIBLE;
} }
break; break;
} }
@@ -3495,3 +3636,4 @@ wxAccStatus SliderAx::GetValue(int childId, wxString* strValue)
} }
#endif #endif

View File

@@ -74,6 +74,10 @@ public:
}; };
WX_DECLARE_OBJARRAY( EQCurve, EQCurveArray ); WX_DECLARE_OBJARRAY( EQCurve, EQCurveArray );
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
class EffectEqualization48x;
#endif
class EffectEqualization: public Effect { class EffectEqualization: public Effect {
public: public:
@@ -113,12 +117,15 @@ public:
// low range of human hearing // low range of human hearing
enum {loFreqI=20}; enum {loFreqI=20};
private: private:
bool ProcessOne(int count, WaveTrack * t, bool ProcessOne(int count, WaveTrack * t,
sampleCount start, sampleCount len); sampleCount start, sampleCount len);
void Filter(sampleCount len, void Filter(sampleCount len,
float *buffer); float *buffer);
void ReadPrefs(); void ReadPrefs();
HFFT hFFT; HFFT hFFT;
@@ -135,6 +142,11 @@ private:
bool mPrompting; bool mPrompting;
bool mDrawGrid; bool mDrawGrid;
bool mEditingBatchParams; bool mEditingBatchParams;
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
bool mBench;
EffectEqualization48x *mEffectEqualization48x;
friend class EffectEqualization48x;
#endif
public: public:
@@ -222,6 +234,9 @@ public:
void EnvelopeUpdated(Envelope *env, bool lin); void EnvelopeUpdated(Envelope *env, bool lin);
static const double thirdOct[]; static const double thirdOct[];
wxRadioButton *mFaderOrDraw[2]; wxRadioButton *mFaderOrDraw[2];
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
wxRadioButton *mMathProcessingType[5]; // default, sse, sse threaded, AVX, AVX threaded (note AVX is not implemented yet
#endif
wxChoice *mInterpChoice; wxChoice *mInterpChoice;
wxCheckBox *mLinFreq; wxCheckBox *mLinFreq;
int M; int M;
@@ -276,6 +291,14 @@ private:
ID_INVERT, ID_INVERT,
drawRadioID, drawRadioID,
sliderRadioID, sliderRadioID,
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
defaultMathRadioID,
sSERadioID,
sSEThreadedRadioID,
aVXRadioID,
aVXThreadedRadioID,
ID_BENCH,
#endif
ID_INTERP, ID_INTERP,
ID_LIN_FREQ, ID_LIN_FREQ,
GridOnOffID, GridOnOffID,
@@ -294,6 +317,10 @@ private:
void OnSliderDBMIN( wxCommandEvent &event ); void OnSliderDBMIN( wxCommandEvent &event );
void OnDrawRadio(wxCommandEvent &event ); void OnDrawRadio(wxCommandEvent &event );
void OnSliderRadio(wxCommandEvent &event ); void OnSliderRadio(wxCommandEvent &event );
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
void OnProcessingRadio(wxCommandEvent &event );
void OnBench( wxCommandEvent & event);
#endif
void OnLinFreq(wxCommandEvent &event ); void OnLinFreq(wxCommandEvent &event );
void UpdateGraphic(void); void UpdateGraphic(void);
void EnvLogToLin(void); void EnvLogToLin(void);
@@ -339,6 +366,9 @@ private:
wxBoxSizer *szrH; wxBoxSizer *szrH;
wxBoxSizer *szrI; wxBoxSizer *szrI;
wxBoxSizer *szrL; wxBoxSizer *szrL;
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
wxBoxSizer *szrM;
#endif
wxFlexGridSizer *szr1; wxFlexGridSizer *szr1;
wxBoxSizer *szr2; wxBoxSizer *szr2;
wxBoxSizer *szr3; wxBoxSizer *szr3;

View File

@@ -0,0 +1,924 @@
/**********************************************************************
Audacity: A Digital Audio Editor
EffectEqualization.cpp
Andrew Hallendorff
*******************************************************************//**
\file Equalization48x.cpp
\brief Fast SSE based implementation of equalization.
*//****************************************************************/
#include "../Audacity.h"
#include "../Project.h"
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
#include "Equalization.h"
#include "../WaveTrack.h"
#include "float_cast.h"
#include <vector>
#include <wx/dcmemory.h>
#include <wx/event.h>
#include <wx/string.h>
#if wxUSE_TOOLTIPS
#include <wx/tooltip.h>
#endif
#include <wx/utils.h>
#include <math.h>
#include <wx/arrimpl.cpp>
#include "Equalization48x.h"
#include "../RealFFTf.h"
#include "../RealFFTf48x.h"
#ifndef USE_SSE2
#define USE_SSE2
#endif
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <math.h>
#include <xmmintrin.h>
#ifdef _WIN32
// Windows
#include <intrin.h>
#define cpuid __cpuid
#else
// GCC Inline Assembly
void cpuid(int CPUInfo[4],int InfoType){
__asm__ __volatile__ (
"cpuid":
"=a" (CPUInfo[0]),
"=b" (CPUInfo[1]),
"=c" (CPUInfo[2]),
"=d" (CPUInfo[3]) :
"a" (InfoType)
);
}
#endif
bool sMathCapsInitialized = false;
MathCaps sMathCaps;
// dirty switcher
int sMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED;
void EffectEqualization48x::SetMathPath(int mathPath) { sMathPath=mathPath; };
int EffectEqualization48x::GetMathPath() { return sMathPath; };
void EffectEqualization48x::AddMathPathOption(int mathPath) { sMathPath|=mathPath; };
void EffectEqualization48x::RemoveMathPathOption(int mathPath) { sMathPath&=~mathPath; };
MathCaps *EffectEqualization48x::GetMathCaps()
{
if(!sMathCapsInitialized)
{
sMathCapsInitialized=true;
sMathCaps.x64 = false;
sMathCaps.MMX = false;
sMathCaps.SSE = false;
sMathCaps.SSE2 = false;
sMathCaps.SSE3 = false;
sMathCaps.SSSE3 = false;
sMathCaps.SSE41 = false;
sMathCaps.SSE42 = false;
sMathCaps.SSE4a = false;
sMathCaps.AVX = false;
sMathCaps.XOP = false;
sMathCaps.FMA3 = false;
sMathCaps.FMA4 = false;
int info[4];
cpuid(info, 0);
int nIds = info[0];
cpuid(info, 0x80000000);
int nExIds = info[0];
// Detect Instruction Set
if (nIds >= 1){
cpuid(info,0x00000001);
sMathCaps.MMX = (info[3] & ((int)1 << 23)) != 0;
sMathCaps.SSE = (info[3] & ((int)1 << 25)) != 0;
sMathCaps.SSE2 = (info[3] & ((int)1 << 26)) != 0;
sMathCaps.SSE3 = (info[2] & ((int)1 << 0)) != 0;
sMathCaps.SSSE3 = (info[2] & ((int)1 << 9)) != 0;
sMathCaps.SSE41 = (info[2] & ((int)1 << 19)) != 0;
sMathCaps.SSE42 = (info[2] & ((int)1 << 20)) != 0;
sMathCaps.AVX = (info[2] & ((int)1 << 28)) != 0;
sMathCaps.FMA3 = (info[2] & ((int)1 << 12)) != 0;
}
if (nExIds >= 0x80000001){
cpuid(info,0x80000001);
sMathCaps.x64 = (info[3] & ((int)1 << 29)) != 0;
sMathCaps.SSE4a = (info[2] & ((int)1 << 6)) != 0;
sMathCaps.FMA4 = (info[2] & ((int)1 << 16)) != 0;
sMathCaps.XOP = (info[2] & ((int)1 << 11)) != 0;
}
if(sMathCaps.SSE)
sMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED; // we are starting on.
}
return &sMathCaps;
};
void * malloc_simd(const size_t size)
{
#if defined WIN32 // WIN32
return _aligned_malloc(size, 16);
#elif defined __linux__ // Linux
return memalign (16, size);
#elif defined __MACH__ // Mac OS X
return malloc(size);
#else // other (use valloc for page-aligned memory)
return valloc(size);
#endif
}
void free_simd(void* mem)
{
#if defined WIN32 // WIN32
_aligned_free(mem);
#else
free(mem);
#endif
}
EffectEqualization48x::EffectEqualization48x():
mThreadCount(0),mFilterSize(0),mWindowSize(0),mBlockSize(0),mWorkerDataCount(0),mBlocksPerBuffer(20),
mScratchBufferSize(0),mSubBufferSize(0),mBigBuffer(NULL),mBufferInfo(NULL),mEQWorkers(0),mThreaded(false),
mBenching(false)
{
}
EffectEqualization48x::~EffectEqualization48x()
{
}
bool EffectEqualization48x::AllocateBuffersWorkers(bool threaded)
{
if(mBigBuffer)
FreeBuffersWorkers();
mFilterSize=(mEffectEqualization->mM-1)&(~15); // 4000 !!! Filter MUST BE QUAD WORD ALIGNED !!!!
mWindowSize=mEffectEqualization->windowSize;
mBlockSize=mWindowSize-mFilterSize; // 12,384
mThreaded=threaded;
if( mThreaded )
{
mThreadCount=wxThread::GetCPUCount();
mWorkerDataCount=mThreadCount+2; // 2 extra slots (maybe double later)
// we're skewing the data by one block to allow for 1/4 block intersections.
// this will remove the disparity in data at the intersections of the runs
// The nice magic allocation
// megabyte - 3 windows - 4 overlaping buffers - filter
// 2^20 = 1,048,576 - 3 * 2^14 (16,384) - ((4 * 20) - 3) * 12,384 - 4000
// 1,048,576 - 49,152 - 953,568 - 4000 = 41,856 (leftover)
mScratchBufferSize=mWindowSize*3*(sizeof(__m128)/sizeof(float)); // 3 window size blocks size of __m128 but we allocate in float
mSubBufferSize=mBlockSize*((mBlocksPerBuffer<<2)-3); // we are going to do a full block overlap -(blockSize*3)
mBigBuffer=(float *)malloc_simd(sizeof(float)*(mSubBufferSize+mFilterSize+mScratchBufferSize)*mWorkerDataCount); // we run over by filtersize
// fill the bufferInfo
mBufferInfo = new BufferInfo[mWorkerDataCount];
for(int i=0;i<mWorkerDataCount;i++) {
mBufferInfo[i].mFftWindowSize=mWindowSize;
mBufferInfo[i].mFftFilterSize=mFilterSize;
mBufferInfo[i].mBufferLength=mBlockSize*mBlocksPerBuffer;
mBufferInfo[i].mScratchBuffer=&mBigBuffer[(mSubBufferSize+mScratchBufferSize)*i+mSubBufferSize];
for(int j=0;j<4;j++)
mBufferInfo[i].mBufferDest[j]=mBufferInfo[i].mBufferSouce[j]=&mBigBuffer[j*(mBufferInfo[i].mBufferLength-mBlockSize)+(mSubBufferSize+mScratchBufferSize)*i];
}
// start the workers
mDataMutex.IsOk();
mEQWorkers=new EQWorker[mThreadCount];
for(int i=0;i<mThreadCount;i++) {
mEQWorkers[i].SetData( mBufferInfo, mWorkerDataCount, &mDataMutex, this);
mEQWorkers[i].Create();
mEQWorkers[i].Run();
}
} else {
mScratchBufferSize=mWindowSize*3*(sizeof(__m128)/sizeof(float)); // 3 window size blocks size of __m128
mSubBufferSize=mBlockSize*((mBlocksPerBuffer<<2)-3); // we are going to do a full block overlap -(blockSize*3)
mBigBuffer=(float *)malloc_simd(sizeof(float)*(mSubBufferSize+mFilterSize+mScratchBufferSize)); // we run over by filtersize
mBufferInfo = new BufferInfo[1]; // yeah it looks odd but it keeps compatibility with threaded processing
mBufferInfo[0].mFftWindowSize=mWindowSize;
mBufferInfo[0].mFftFilterSize=mFilterSize;
mBufferInfo[0].mBufferLength=mBlockSize*mBlocksPerBuffer;
mBufferInfo[0].mScratchBuffer=&mBigBuffer[mSubBufferSize];
for(int j=0;j<4;j++)
mBufferInfo[0].mBufferDest[j]=mBufferInfo[0].mBufferSouce[j]=&mBigBuffer[j*(mBufferInfo[0].mBufferLength-mBlockSize)];
}
return true;
}
bool EffectEqualization48x::FreeBuffersWorkers()
{
if(mThreaded) {
for(int i=0;i<mThreadCount;i++) { // tell all the workers to exit
mEQWorkers[i].ExitLoop();
}
for(int i=0;i<mThreadCount;i++) {
mEQWorkers[i].Wait();
}
delete[] mEQWorkers; // kill the workers ( go directly to jail)
mEQWorkers= NULL;
mThreadCount=0;
mWorkerDataCount=0;
}
delete [] mBufferInfo;
mBufferInfo = NULL;
free_simd(mBigBuffer);
mBigBuffer=NULL;
return true;
}
bool EffectEqualization48x::Process(EffectEqualization* effectEqualization)
{
mEffectEqualization=effectEqualization;
// return TrackCompare(); // used for debugging data
mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks.
bool bGoodResult = true;
TableUsage(sMathPath);
if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!!
mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
AllocateBuffersWorkers((sMathPath & MATH_FUNCTION_THREADED) != 0);
SelectedTrackListOfKindIterator iter(Track::Wave, mEffectEqualization->mOutputTracks);
WaveTrack *track = (WaveTrack *) iter.First();
int count = 0;
while (track) {
double trackStart = track->GetStartTime();
double trackEnd = track->GetEndTime();
double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
if (t1 > t0) {
sampleCount start = track->TimeToLongSamples(t0);
sampleCount end = track->TimeToLongSamples(t1);
sampleCount len = (sampleCount)(end - start);
if(!sMathPath) {
if (!mEffectEqualization->ProcessOne(count, track, start, len))
{
bGoodResult = false;
break;
}
} else {
if(sMathPath<8) {
if (!ProcessOne4x(count, track, start, len))
{
bGoodResult = false;
break;
}
} else {
if (!ProcessOne4xThreaded(count, track, start, len))
{
bGoodResult = false;
break;
}
}
}
}
track = (WaveTrack *) iter.Next();
count++;
}
FreeBuffersWorkers();
mEffectEqualization->ReplaceProcessedTracks(bGoodResult);
return bGoodResult;
}
bool EffectEqualization48x::TrackCompare()
{
mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks.
bool bGoodResult = true;
TableUsage(sMathPath);
if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!!
mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
AllocateBuffersWorkers((sMathPath & MATH_FUNCTION_THREADED)!=0);
// Reset map
wxArrayPtrVoid SecondIMap;
wxArrayPtrVoid SecondOMap;
SecondIMap.Clear();
SecondOMap.Clear();
TrackList *SecondOutputTracks = new TrackList();
//iterate over tracks of type trackType (All types if Track::All)
TrackListOfKindIterator aIt(mEffectEqualization->mOutputTracksType, mEffectEqualization->mTracks);
for (Track *aTrack = aIt.First(); aTrack; aTrack = aIt.Next()) {
// Include selected tracks, plus sync-lock selected tracks for Track::All.
if (aTrack->GetSelected() ||
(mEffectEqualization->mOutputTracksType == Track::All && aTrack->IsSyncLockSelected()))
{
Track *o = aTrack->Duplicate();
SecondOutputTracks->Add(o);
SecondIMap.Add(aTrack);
SecondIMap.Add(o);
}
}
for(int i=0;i<2;i++) {
SelectedTrackListOfKindIterator iter(Track::Wave, i?mEffectEqualization->mOutputTracks:SecondOutputTracks);
i?sMathPath=sMathPath:sMathPath=0;
WaveTrack *track = (WaveTrack *) iter.First();
int count = 0;
while (track) {
double trackStart = track->GetStartTime();
double trackEnd = track->GetEndTime();
double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
if (t1 > t0) {
sampleCount start = track->TimeToLongSamples(t0);
sampleCount end = track->TimeToLongSamples(t1);
sampleCount len = (sampleCount)(end - start);
if(!sMathPath) {
if (!mEffectEqualization->ProcessOne(count, track, start, len))
{
bGoodResult = false;
break;
}
} else {
if(sMathPath<8) {
if (!ProcessOne4x(count, track, start, len))
{
bGoodResult = false;
break;
}
} else {
if (!ProcessOne4xThreaded(count, track, start, len))
{
bGoodResult = false;
break;
}
}
}
}
track = (WaveTrack *) iter.Next();
count++;
}
}
SelectedTrackListOfKindIterator iter(Track::Wave, mEffectEqualization->mOutputTracks);
SelectedTrackListOfKindIterator iter2(Track::Wave, SecondOutputTracks);
WaveTrack *track = (WaveTrack *) iter.First();
WaveTrack *track2 = (WaveTrack *) iter2.First();
while (track) {
double trackStart = track->GetStartTime();
double trackEnd = track->GetEndTime();
double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
if (t1 > t0) {
sampleCount start = track->TimeToLongSamples(t0);
sampleCount end = track->TimeToLongSamples(t1);
sampleCount len = (sampleCount)(end - start);
DeltaTrack(track, track2, start, len);
}
track = (WaveTrack *) iter.Next();
track2 = (WaveTrack *) iter2.Next();
}
delete SecondOutputTracks;
FreeBuffersWorkers();
mEffectEqualization->ReplaceProcessedTracks(bGoodResult);
return bGoodResult;
}
bool EffectEqualization48x::DeltaTrack(WaveTrack * t, WaveTrack * t2, sampleCount start, sampleCount len)
{
sampleCount trackBlockSize = t->GetMaxBlockSize();
float *buffer1 = new float[trackBlockSize];
float *buffer2 = new float[trackBlockSize];
AudacityProject *p = GetActiveProject();
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
sampleCount originalLen = len;
sampleCount currentSample = start;
while(len) {
sampleCount curretLength=(trackBlockSize>len)?len:trackBlockSize;
t->Get((samplePtr)buffer1, floatSample, currentSample, curretLength);
t2->Get((samplePtr)buffer2, floatSample, currentSample, curretLength);
for(int i=0;i<curretLength;i++)
buffer1[i]-=buffer2[i];
output->Append((samplePtr)buffer1, floatSample, curretLength);
currentSample+=curretLength;
len-=curretLength;
}
delete[] buffer1;
delete[] buffer2;
output->Flush();
len=originalLen;
ProcessTail(t, output, start, len);
delete output;
return true;
}
bool EffectEqualization48x::Benchmark(EffectEqualization* effectEqualization)
{
mEffectEqualization=effectEqualization;
mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks.
bool bGoodResult = true;
TableUsage(sMathPath);
if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!!
mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
AllocateBuffersWorkers((bool)MATH_FUNCTION_THREADED);
SelectedTrackListOfKindIterator iter(Track::Wave, mEffectEqualization->mOutputTracks);
long times[] = { 0,0,0 };
wxStopWatch timer;
mBenching=true;
for(int i=0;i<3;i++) {
int localMathPath;
switch(i) {
case 0: localMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED;
if(!sMathCaps.SSE)
localMathPath=-1;
break;
case 1: localMathPath=MATH_FUNCTION_SSE;
if(!sMathCaps.SSE)
localMathPath=-1;
break;
case 2: localMathPath=0;
break;
default: localMathPath=-1;
}
if(localMathPath>=0) {
timer.Start();
WaveTrack *track = (WaveTrack *) iter.First();
int count = 0;
while (track) {
double trackStart = track->GetStartTime();
double trackEnd = track->GetEndTime();
double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
if (t1 > t0) {
sampleCount start = track->TimeToLongSamples(t0);
sampleCount end = track->TimeToLongSamples(t1);
sampleCount len = (sampleCount)(end - start);
if(!localMathPath) {
if (!mEffectEqualization->ProcessOne(count, track, start, len))
{
bGoodResult = false;
break;
}
} else {
if(localMathPath<8) {
if (!ProcessOne4x(count, track, start, len))
{
bGoodResult = false;
break;
}
} else {
if (!ProcessOne4xThreaded(count, track, start, len))
{
bGoodResult = false;
break;
}
}
}
}
track = (WaveTrack *) iter.Next();
count++;
}
times[i]=timer.Time();
}
}
FreeBuffersWorkers();
mBenching=false;
bGoodResult=false;
mEffectEqualization->ReplaceProcessedTracks(bGoodResult);
wxTimeSpan tsSSEThreaded(0, 0, 0, times[0]);
wxTimeSpan tsSSE(0, 0, 0, times[1]);
wxTimeSpan tsDefault(0, 0, 0, times[2]);
wxMessageBox(wxString::Format(_("Benchmark times:\nDefault: %s\nSSE: %s\nSSE Threaded: %s\n"),tsDefault.Format(wxT("%M:%S.%l")).c_str(),tsSSE.Format(wxT("%M:%S.%l")).c_str(),tsSSEThreaded.Format(wxT("%M:%S.%l")).c_str()));
/* wxTimeSpan tsSSEThreaded(0, 0, 0, times[0]);
wxTimeSpan tsSSE(0, 0, 0, times[1]);
wxTimeSpan tsDefault(0, 0, 0, times[2]);
wxString outputString;
outputString.Format(_("Benchmark times:\nDefault: %s\nSSE: %s\nSSE Threaded: %s\n"),tsDefault.Format(wxT("%M:%S.%l")),tsSSE.Format(wxT("%M:%S.%l")),tsSSEThreaded.Format(wxT("%M:%S.%l")));
wxMessageBox(outputString); */
return bGoodResult;
}
bool EffectEqualization48x::ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, sampleCount bufferLength)
{
sampleCount blockCount=bufferLength/mBlockSize;
sampleCount lastBlockSize=bufferLength%mBlockSize;
if(lastBlockSize)
blockCount++;
float *workBuffer=&sourceBuffer[bufferLength]; // all scratch buffers are at the end
for(int runx=0;runx<blockCount;runx++)
{
float *currentBuffer=&workBuffer[mWindowSize*(runx&1)];
for(int i=0;i<mBlockSize;i++)
currentBuffer[i]=sourceBuffer[i];
sourceBuffer+=mBlockSize;
float *currentFilter=&currentBuffer[mBlockSize];
for(int i=0;i<mFilterSize;i++)
currentFilter[i]=0;
mEffectEqualization->Filter(mWindowSize, currentBuffer);
float *writeEnd=currentBuffer+mBlockSize;
if(runx==blockCount)
writeEnd=currentBuffer+(lastBlockSize+mFilterSize);
if(runx) {
float *lastOverrun=&workBuffer[mWindowSize*((runx+1)&1)+mBlockSize];
for(int j=0;j<mFilterSize;j++)
*destBuffer++= *currentBuffer++ + *lastOverrun++;
} else
currentBuffer+=mFilterSize>>1; // this will skip the first filterSize on the first run
while(currentBuffer<writeEnd)
*destBuffer++ = *currentBuffer++;
}
return true;
}
bool EffectEqualization48x::ProcessBuffer4x(BufferInfo *bufferInfo)
{
// length must be a factor of window size for 4x processing.
if(bufferInfo->mBufferLength%mBlockSize)
return false;
sampleCount blockCount=bufferInfo->mBufferLength/mBlockSize;
__m128 *readBlocks[4]; // some temps so we dont destroy the vars in the struct
__m128 *writeBlocks[4];
for(int i=0;i<4;i++) {
readBlocks[i]=(__m128 *)bufferInfo->mBufferSouce[i];
writeBlocks[i]=(__m128 *)bufferInfo->mBufferDest[i];
}
__m128 *swizzledBuffer128=(__m128 *)bufferInfo->mScratchBuffer;
__m128 *scratchBuffer=&swizzledBuffer128[mWindowSize*2];
for(int run4x=0;run4x<blockCount;run4x++)
{
// swizzle the data to the swizzle buffer
__m128 *currentSwizzledBlock=&swizzledBuffer128[mWindowSize*(run4x&1)];
for(int i=0,j=0;j<mBlockSize;i++,j+=4) {
__m128 tmp0 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(1,0,1,0));
__m128 tmp1 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(3,2,3,2));
__m128 tmp2 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(1,0,1,0));
__m128 tmp3 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(3,2,3,2));
currentSwizzledBlock[j] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2,0,2,0));
currentSwizzledBlock[j+1] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3,1,3,1));
currentSwizzledBlock[j+2] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2,0,2,0));
currentSwizzledBlock[j+3] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3,1,3,1));
}
__m128 *thisOverrun128=&currentSwizzledBlock[mBlockSize];
for(int i=0;i<mFilterSize;i++)
thisOverrun128[i]=_mm_set1_ps(0.0);
Filter4x(mWindowSize, (float *)currentSwizzledBlock, (float *)scratchBuffer);
int writeStart=0, writeToStart=0; // note readStart is where the read data is written
int writeEnd=mBlockSize;
if(run4x) {
// maybe later swizzle add and write in one
__m128 *lastOverrun128=&swizzledBuffer128[mWindowSize*((run4x+1)&1)+mBlockSize];
// add and swizzle data + filter
for(int i=0,j=0;j<mFilterSize;i++,j+=4) {
__m128 tmps0 = _mm_add_ps(currentSwizzledBlock[j], lastOverrun128[j]);
__m128 tmps1 = _mm_add_ps(currentSwizzledBlock[j+1], lastOverrun128[j+1]);
__m128 tmps2 = _mm_add_ps(currentSwizzledBlock[j+2], lastOverrun128[j+2]);
__m128 tmps3 = _mm_add_ps(currentSwizzledBlock[j+3], lastOverrun128[j+3]);
__m128 tmp0 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(0,1,0,1));
__m128 tmp1 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(2,3,2,3));
__m128 tmp2 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(0,1,0,1));
__m128 tmp3 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(2,3,2,3));
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
}
writeStart=mFilterSize;
writeToStart=mFilterSize>>2;
// swizzle it back.
for(int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=4) {
__m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
__m128 tmp1 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(2,3,2,3));
__m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(0,1,0,1));
__m128 tmp3 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(2,3,2,3));
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
}
} else {
// swizzle it back. We overlap one block so we only write the first block on the first run
writeStart=0;
writeToStart=0;
for(int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=4) {
__m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
__m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(0,1,0,1));
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
}
}
for(int i=0;i<4;i++) { // shift each block
readBlocks[i]+=mBlockSize>>2; // these are 128b pointers, each window is 1/4 blockSize for those
writeBlocks[i]+=mBlockSize>>2;
}
}
return true;
}
bool EffectEqualization48x::ProcessOne4x(int count, WaveTrack * t,
sampleCount start, sampleCount len)
{
sampleCount blockCount=len/mBlockSize;
if(blockCount<16) // it's not worth 4x processing do a regular process
return mEffectEqualization->ProcessOne(count, t, start, len);
sampleCount trackBlockSize = t->GetMaxBlockSize();
AudacityProject *p = GetActiveProject();
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
mEffectEqualization->TrackProgress(count, 0.0);
int bigRuns=len/(mSubBufferSize-mBlockSize);
int trackBlocksPerBig=mSubBufferSize/trackBlockSize;
int trackLeftovers=mSubBufferSize-trackBlocksPerBig*trackBlockSize;
int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(mSubBufferSize-mBlockSize));
sampleCount currentSample=start;
for(int bigRun=0;bigRun<bigRuns;bigRun++)
{
// fill the buffer
for(int i=0;i<trackBlocksPerBig;i++) {
t->Get((samplePtr)&mBigBuffer[i*trackBlockSize], floatSample, currentSample, trackBlockSize);
currentSample+=trackBlockSize;
}
if(trackLeftovers) {
t->Get((samplePtr)&mBigBuffer[trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
currentSample+=trackLeftovers;
}
currentSample-=mBlockSize+(mFilterSize>>1);
ProcessBuffer4x(mBufferInfo);
if (mEffectEqualization->TrackProgress(count, (double)(bigRun)/(double)bigRuns))
{
break;
}
output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, mSubBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
}
if(singleProcessLength) {
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
output->Append((samplePtr)&mBigBuffer[mBlockSize], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
}
output->Flush();
ProcessTail(t, output, start, len);
delete output;
return true;
}
void *EQWorker::Entry()
{
while(!mExitLoop) {
mMutex->Lock();
bool bufferAquired=false;
for(int i=0;i<mBufferInfoCount;i++)
if(mBufferInfoList[i].mBufferStatus==BufferReady) { // we found an unlocked ready buffer
bufferAquired=true;
mBufferInfoList[i].mBufferStatus=BufferBusy; // we own it now
mMutex->Unlock();
mEffectEqualization48x->ProcessBuffer4x(&mBufferInfoList[i]);
mBufferInfoList[i].mBufferStatus=BufferDone; // we're done
break;
}
if(!bufferAquired)
mMutex->Unlock();
}
return NULL;
}
bool EffectEqualization48x::ProcessOne4xThreaded(int count, WaveTrack * t,
sampleCount start, sampleCount len)
{
sampleCount blockCount=len/mBlockSize;
if(blockCount<16) // it's not worth 4x processing do a regular process
return ProcessOne4x(count, t, start, len);
if(mThreadCount<=0 || blockCount<256) // dont do it without cores or big data
return ProcessOne4x(count, t, start, len);
AudacityProject *p = GetActiveProject();
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
sampleCount trackBlockSize = t->GetMaxBlockSize();
mEffectEqualization->TrackProgress(count, 0.0);
int bigRuns=len/(mSubBufferSize-mBlockSize);
int trackBlocksPerBig=mSubBufferSize/trackBlockSize;
int trackLeftovers=mSubBufferSize-trackBlocksPerBig*trackBlockSize;
int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(mSubBufferSize-mBlockSize));
sampleCount currentSample=start;
int bigBlocksRead=mWorkerDataCount, bigBlocksWritten=0;
// fill the first workerDataCount buffers we checked above and there is at least this data
for(int i=0;i<mWorkerDataCount;i++)
{
// fill the buffer
for(int j=0;j<trackBlocksPerBig;j++) {
t->Get((samplePtr)&mBufferInfo[i].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize);
currentSample+=trackBlockSize;
}
if(trackLeftovers) {
t->Get((samplePtr)&mBufferInfo[i].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
currentSample+=trackLeftovers;
}
currentSample-=mBlockSize+(mFilterSize>>1);
mBufferInfo[i].mBufferStatus=BufferReady; // free for grabbin
}
int currentIndex=0;
while(bigBlocksWritten<bigRuns) {
mDataMutex.Lock(); // Get in line for data
// process as many blocks as we can
while((mBufferInfo[currentIndex].mBufferStatus==BufferDone) && (bigBlocksWritten<bigRuns)) { // data is ours
if (mEffectEqualization->TrackProgress(count, (double)(bigBlocksWritten)/(double)bigRuns))
{
break;
}
output->Append((samplePtr)&mBufferInfo[currentIndex].mBufferDest[0][(bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)], floatSample, mSubBufferSize-((bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)));
bigBlocksWritten++;
if(bigBlocksRead<bigRuns) {
// fill the buffer
for(int j=0;j<trackBlocksPerBig;j++) {
t->Get((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize);
currentSample+=trackBlockSize;
}
if(trackLeftovers) {
t->Get((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
currentSample+=trackLeftovers;
}
currentSample-=mBlockSize+(mFilterSize>>1);
mBufferInfo[currentIndex].mBufferStatus=BufferReady; // free for grabbin
bigBlocksRead++;
} else mBufferInfo[currentIndex].mBufferStatus=BufferEmpty; // this is completely unecessary
currentIndex=(currentIndex+1)%mWorkerDataCount;
}
mDataMutex.Unlock(); // Get back in line for data
}
if(singleProcessLength) {
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
output->Append((samplePtr)&mBigBuffer[mBlockSize], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
}
output->Flush();
ProcessTail(t, output, start, len);
delete output;
return true;
}
bool EffectEqualization48x::ProcessTail(WaveTrack * t, WaveTrack * output, sampleCount start, sampleCount len)
{
// double offsetT0 = t->LongSamplesToTime((sampleCount)offset);
double lenT = t->LongSamplesToTime(len);
// 'start' is the sample offset in 't', the passed in track
// 'startT' is the equivalent time value
// 'output' starts at zero
double startT = t->LongSamplesToTime(start);
//output has one waveclip for the total length, even though
//t might have whitespace seperating multiple clips
//we want to maintain the original clip structure, so
//only paste the intersections of the new clip.
//Find the bits of clips that need replacing
std::vector<std::pair<double, double> > clipStartEndTimes;
std::vector<std::pair<double, double> > clipRealStartEndTimes; //the above may be truncated due to a clip being partially selected
for (WaveClipList::compatibility_iterator it=t->GetClipIterator(); it; it=it->GetNext())
{
WaveClip *clip;
double clipStartT;
double clipEndT;
clip = it->GetData();
clipStartT = clip->GetStartTime();
clipEndT = clip->GetEndTime();
if( clipEndT <= startT )
continue; // clip is not within selection
if( clipStartT >= startT + lenT )
continue; // clip is not within selection
//save the actual clip start/end so that we can rejoin them after we paste.
clipRealStartEndTimes.push_back(std::pair<double,double>(clipStartT,clipEndT));
if( clipStartT < startT ) // does selection cover the whole clip?
clipStartT = startT; // don't copy all the new clip
if( clipEndT > startT + lenT ) // does selection cover the whole clip?
clipEndT = startT + lenT; // don't copy all the new clip
//save them
clipStartEndTimes.push_back(std::pair<double,double>(clipStartT,clipEndT));
}
//now go thru and replace the old clips with new
for(unsigned int i=0;i<clipStartEndTimes.size();i++)
{
Track *toClipOutput;
//remove the old audio and get the new
t->Clear(clipStartEndTimes[i].first,clipStartEndTimes[i].second);
// output->Copy(clipStartEndTimes[i].first-startT+offsetT0,clipStartEndTimes[i].second-startT+offsetT0, &toClipOutput);
output->Copy(clipStartEndTimes[i].first-startT,clipStartEndTimes[i].second-startT, &toClipOutput);
if(toClipOutput)
{
//put the processed audio in
bool bResult = t->Paste(clipStartEndTimes[i].first, toClipOutput);
wxASSERT(bResult); // TO DO: Actually handle this.
//if the clip was only partially selected, the Paste will have created a split line. Join is needed to take care of this
//This is not true when the selection is fully contained within one clip (second half of conditional)
if( (clipRealStartEndTimes[i].first != clipStartEndTimes[i].first ||
clipRealStartEndTimes[i].second != clipStartEndTimes[i].second) &&
!(clipRealStartEndTimes[i].first <= startT &&
clipRealStartEndTimes[i].second >= startT+lenT) )
t->Join(clipRealStartEndTimes[i].first,clipRealStartEndTimes[i].second);
delete toClipOutput;
}
}
return true;
}
void EffectEqualization48x::Filter4x(sampleCount len,
float *buffer, float *scratchBuffer)
{
int i;
__m128 real128, imag128;
// Apply FFT
RealFFTf4x(buffer, mEffectEqualization->hFFT);
// Apply filter
// DC component is purely real
__m128 *localFFTBuffer=(__m128 *)scratchBuffer;
__m128 *localBuffer=(__m128 *)buffer;
__m128 filterFuncR, filterFuncI;
filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[0]);
localFFTBuffer[0]=_mm_mul_ps(localBuffer[0], filterFuncR);
int halfLength=(len/2);
bool useBitReverseTable=sMathPath&1;
for(i=1; i<halfLength; i++)
{
if(useBitReverseTable) {
real128=localBuffer[mEffectEqualization->hFFT->BitReversed[i] ];
imag128=localBuffer[mEffectEqualization->hFFT->BitReversed[i]+1];
} else {
int bitReversed=SmallReverseBits(i,mEffectEqualization->hFFT->pow2Bits);
real128=localBuffer[bitReversed];
imag128=localBuffer[bitReversed+1];
}
filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[i]);
filterFuncI=_mm_set1_ps(mEffectEqualization->mFilterFuncI[i]);
localFFTBuffer[2*i ] = _mm_sub_ps( _mm_mul_ps(real128, filterFuncR), _mm_mul_ps(imag128, filterFuncI));
localFFTBuffer[2*i+1] = _mm_add_ps( _mm_mul_ps(real128, filterFuncI), _mm_mul_ps(imag128, filterFuncR));
}
// Fs/2 component is purely real
filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[halfLength]);
localFFTBuffer[1] = _mm_mul_ps(localBuffer[1], filterFuncR);
// Inverse FFT and normalization
InverseRealFFTf4x(scratchBuffer, mEffectEqualization->hFFT);
ReorderToTime4x(mEffectEqualization->hFFT, scratchBuffer, buffer);
}
#endif

View File

@@ -0,0 +1,146 @@
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
/**********************************************************************
Audacity: A Digital Audio Editor
Equalization48x.h
Intrinsics (SSE/AVX) and Threaded Equalization
***********************************************************************/
#ifndef __AUDACITY_EFFECT_EQUALIZATION48X__
#define __AUDACITY_EFFECT_EQUALIZATION48X__
// bitwise function selection
// options are
#define MATH_FUNCTION_ORIGINAL 0 // 0 original path
#define MATH_FUNCTION_BITREVERSE_TABLE 1 // 1 SSE BitReverse Table
#define MATH_FUNCTION_SIN_COS_TABLE 2 // 2 SSE SinCos Table
// 3 SSE with SinCos and BitReverse buffer
#define MATH_FUNCTION_SSE 4 // 4 SSE no SinCos and no BitReverse buffer
#define MATH_FUNCTION_THREADED 8 // 8 SSE threaded no SinCos and no BitReverse buffer
// 9 SSE threaded BitReverse Table
// 10 SSE threaded SinCos Table
// 11 SSE threaded with SinCos and BitReverse buffer
//#define MATH_FUNCTION_AVX 16
// added by Andrew Hallendorff intrinsics processing
enum EQBufferStatus
{
BufferEmpty=0,
BufferReady,
BufferBusy,
BufferDone
};
class BufferInfo {
public:
BufferInfo() { mBufferLength=0; mBufferStatus=BufferEmpty; };
float* mBufferSouce[4];
float* mBufferDest[4];
int mBufferLength;
sampleCount mFftWindowSize;
sampleCount mFftFilterSize;
float* mScratchBuffer;
EQBufferStatus mBufferStatus;
};
typedef struct {
int x64;
int MMX;
int SSE;
int SSE2;
int SSE3;
int SSSE3;
int SSE41;
int SSE42;
int SSE4a;
int AVX;
int XOP;
int FMA3;
int FMA4;
} MathCaps;
class EffectEqualization;
class EffectEqualization48x;
static int EQWorkerCounter=0;
class EQWorker : public wxThread {
public:
EQWorker():wxThread(wxTHREAD_JOINABLE) {
mBufferInfoList=NULL;
mBufferInfoCount=0;
mMutex=NULL;
mEffectEqualization48x=NULL;
mExitLoop=false;
mThreadID=EQWorkerCounter++;
}
void SetData( BufferInfo* bufferInfoList, int bufferInfoCount, wxMutex *mutex, EffectEqualization48x *effectEqualization48x) {
mBufferInfoList=bufferInfoList;
mBufferInfoCount=bufferInfoCount;
mMutex=mutex;
mEffectEqualization48x=effectEqualization48x;
}
void ExitLoop() { // this will cause the thread to drop from the loops
mExitLoop=true;
}
virtual void* Entry();
BufferInfo* mBufferInfoList;
int mBufferInfoCount, mThreadID;
wxMutex *mMutex;
EffectEqualization48x *mEffectEqualization48x;
bool mExitLoop;
};
class EffectEqualization48x {
public:
EffectEqualization48x();
virtual ~EffectEqualization48x();
static MathCaps *GetMathCaps();
static void SetMathPath(int mathPath);
static int GetMathPath();
static void AddMathPathOption(int mathPath);
static void RemoveMathPathOption(int mathPath);
bool Process(EffectEqualization* effectEqualization);
bool Benchmark(EffectEqualization* effectEqualization);
private:
bool TrackCompare();
bool DeltaTrack(WaveTrack * t, WaveTrack * t2, sampleCount start, sampleCount len);
bool AllocateBuffersWorkers(bool threaded);
bool FreeBuffersWorkers();
bool ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, sampleCount bufferLength);
bool ProcessBuffer4x(BufferInfo *bufferInfo);
bool ProcessOne4x(int count, WaveTrack * t, sampleCount start, sampleCount len);
bool ProcessOne4xThreaded(int count, WaveTrack * t, sampleCount start, sampleCount len);
bool ProcessTail(WaveTrack * t, WaveTrack * output, sampleCount start, sampleCount len);
void Filter4x(sampleCount len, float *buffer, float *scratchBuffer);
EffectEqualization* mEffectEqualization;
int mThreadCount;
sampleCount mFilterSize;
sampleCount mBlockSize;
sampleCount mWindowSize;
int mWorkerDataCount;
int mBlocksPerBuffer;
int mScratchBufferSize;
int mSubBufferSize;
float *mBigBuffer;
BufferInfo* mBufferInfo;
wxMutex mDataMutex;
EQWorker* mEQWorkers;
bool mThreaded;
bool mBenching;
friend EQWorker;
};
#endif
#endif

View File

@@ -123,6 +123,17 @@ void EffectsPrefs::PopulateOrExchange(ShuttleGui & S)
} }
S.EndStatic(); S.EndStatic();
#endif #endif
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
S.StartStatic(_("Instruction Set"));
{
S.TieCheckBox(_("&Use SSE/SSE2/.../AVX"),
wxT("/SSE/GUI"),
true);
}
S.EndStatic();
#endif
} }
bool EffectsPrefs::Apply() bool EffectsPrefs::Apply()

View File

@@ -634,6 +634,14 @@
RelativePath="..\..\..\src\RealFFTf.h" RelativePath="..\..\..\src\RealFFTf.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\RealFFTf48x.cpp"
>
</File>
<File
RelativePath="..\..\..\src\RealFFTf48x.h"
>
</File>
<File <File
RelativePath="..\..\..\src\Resample.cpp" RelativePath="..\..\..\src\Resample.cpp"
> >
@@ -748,6 +756,14 @@
RelativePath="..\..\..\src\SplashDialog.h" RelativePath="..\..\..\src\SplashDialog.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\SseMathFuncs.cpp"
>
</File>
<File
RelativePath="..\..\..\src\SseMathFuncs.h"
>
</File>
<File <File
RelativePath="..\..\..\src\Tags.cpp" RelativePath="..\..\..\src\Tags.cpp"
> >
@@ -996,6 +1012,14 @@
RelativePath="..\..\..\src\effects\Equalization.h" RelativePath="..\..\..\src\effects\Equalization.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\effects\Equalization48x.cpp"
>
</File>
<File
RelativePath="..\..\..\src\effects\Equalization48x.h"
>
</File>
<File <File
RelativePath="..\..\..\src\effects\Fade.cpp" RelativePath="..\..\..\src\effects\Fade.cpp"
> >