mirror of
https://github.com/cookiengineer/audacity
synced 2025-06-16 16:10:06 +02:00
This brings the builtin, LV2, and VAMP effects inline with the Audio Units, LADSPA, and VST effects. All effects now share a common UI. This gives all effects (though not implemented for all): User and factory preset capability Preset import/export capability Shared or private configuration options Builtin effects can now be migrated to RTP, depending on algorithm. LV2 effects now support graphical interfaces if the plugin supplies one. Nyquist prompt enhanced to provide some features of the Nyquist Workbench. It may not look like it, but this was a LOT of work, so trust me, there WILL be problems and everything effect related should be suspect. Keep a sharp eye (or two) open.
1327 lines
55 KiB
C++
1327 lines
55 KiB
C++
/**********************************************************************
|
|
|
|
Audacity: A Digital Audio Editor
|
|
|
|
EffectEqualization.cpp
|
|
|
|
Andrew Hallendorff
|
|
|
|
*******************************************************************//**
|
|
|
|
\file Equalization48x.cpp
|
|
\brief Fast SSE based implementation of equalization.
|
|
|
|
*//****************************************************************/
|
|
|
|
#include "../Audacity.h"
|
|
#include "../Project.h"
|
|
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
|
|
#include "Equalization.h"
|
|
#include "../WaveTrack.h"
|
|
#include "float_cast.h"
|
|
#include <vector>
|
|
|
|
#include <wx/dcmemory.h>
|
|
#include <wx/event.h>
|
|
#include <wx/string.h>
|
|
|
|
#if wxUSE_TOOLTIPS
|
|
#include <wx/tooltip.h>
|
|
#endif
|
|
#include <wx/utils.h>
|
|
|
|
#include <math.h>
|
|
|
|
#include <wx/arrimpl.cpp>
|
|
|
|
#include "Equalization48x.h"
|
|
#include "../RealFFTf.h"
|
|
#include "../RealFFTf48x.h"
|
|
|
|
#ifndef USE_SSE2
|
|
#define USE_SSE2
|
|
#endif
|
|
|
|
#include <stdlib.h>
|
|
#include <malloc.h>
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include <emmintrin.h>
|
|
|
|
#ifdef _WIN32
|
|
|
|
// Windows
|
|
#include <intrin.h>
|
|
#define cpuid __cpuid
|
|
|
|
#else
|
|
|
|
// GCC Inline Assembly
|
|
void cpuid(int CPUInfo[4],int InfoType){
|
|
__asm__ __volatile__ (
|
|
"cpuid":
|
|
"=a" (CPUInfo[0]),
|
|
"=b" (CPUInfo[1]),
|
|
"=c" (CPUInfo[2]),
|
|
"=d" (CPUInfo[3]) :
|
|
"a" (InfoType)
|
|
);
|
|
}
|
|
|
|
#endif
|
|
|
|
bool sMathCapsInitialized = false;
|
|
|
|
MathCaps sMathCaps;
|
|
|
|
// dirty switcher
|
|
int sMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED;
|
|
|
|
void EffectEqualization48x::SetMathPath(int mathPath) { sMathPath=mathPath; };
|
|
|
|
int EffectEqualization48x::GetMathPath() { return sMathPath; };
|
|
|
|
void EffectEqualization48x::AddMathPathOption(int mathPath) { sMathPath|=mathPath; };
|
|
|
|
void EffectEqualization48x::RemoveMathPathOption(int mathPath) { sMathPath&=~mathPath; };
|
|
|
|
MathCaps *EffectEqualization48x::GetMathCaps()
|
|
{
|
|
if(!sMathCapsInitialized)
|
|
{
|
|
sMathCapsInitialized=true;
|
|
sMathCaps.x64 = false;
|
|
sMathCaps.MMX = false;
|
|
sMathCaps.SSE = false;
|
|
sMathCaps.SSE2 = false;
|
|
sMathCaps.SSE3 = false;
|
|
sMathCaps.SSSE3 = false;
|
|
sMathCaps.SSE41 = false;
|
|
sMathCaps.SSE42 = false;
|
|
sMathCaps.SSE4a = false;
|
|
sMathCaps.AVX = false;
|
|
sMathCaps.XOP = false;
|
|
sMathCaps.FMA3 = false;
|
|
sMathCaps.FMA4 = false;
|
|
|
|
int info[4];
|
|
cpuid(info, 0);
|
|
int nIds = info[0];
|
|
|
|
cpuid(info, 0x80000000);
|
|
int nExIds = info[0];
|
|
|
|
// Detect Instruction Set
|
|
if (nIds >= 1){
|
|
cpuid(info,0x00000001);
|
|
sMathCaps.MMX = (info[3] & ((int)1 << 23)) != 0;
|
|
sMathCaps.SSE = (info[3] & ((int)1 << 25)) != 0;
|
|
sMathCaps.SSE2 = (info[3] & ((int)1 << 26)) != 0;
|
|
sMathCaps.SSE3 = (info[2] & ((int)1 << 0)) != 0;
|
|
|
|
sMathCaps.SSSE3 = (info[2] & ((int)1 << 9)) != 0;
|
|
sMathCaps.SSE41 = (info[2] & ((int)1 << 19)) != 0;
|
|
sMathCaps.SSE42 = (info[2] & ((int)1 << 20)) != 0;
|
|
|
|
sMathCaps.AVX = (info[2] & ((int)1 << 28)) != 0;
|
|
sMathCaps.FMA3 = (info[2] & ((int)1 << 12)) != 0;
|
|
}
|
|
|
|
if (nExIds >= 0x80000001){
|
|
cpuid(info,0x80000001);
|
|
sMathCaps.x64 = (info[3] & ((int)1 << 29)) != 0;
|
|
sMathCaps.SSE4a = (info[2] & ((int)1 << 6)) != 0;
|
|
sMathCaps.FMA4 = (info[2] & ((int)1 << 16)) != 0;
|
|
sMathCaps.XOP = (info[2] & ((int)1 << 11)) != 0;
|
|
}
|
|
if(sMathCaps.SSE)
|
|
sMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED; // we are starting on.
|
|
}
|
|
return &sMathCaps;
|
|
};
|
|
|
|
void * malloc_simd(const size_t size)
|
|
{
|
|
#if defined WIN32 // WIN32
|
|
return _aligned_malloc(size, 16);
|
|
#elif defined __linux__ // Linux
|
|
return memalign (16, size);
|
|
#elif defined __MACH__ // Mac OS X
|
|
return malloc(size);
|
|
#else // other (use valloc for page-aligned memory)
|
|
return valloc(size);
|
|
#endif
|
|
}
|
|
|
|
void free_simd(void* mem)
|
|
{
|
|
#if defined WIN32 // WIN32
|
|
_aligned_free(mem);
|
|
#else
|
|
free(mem);
|
|
#endif
|
|
}
|
|
|
|
EffectEqualization48x::EffectEqualization48x():
|
|
mThreadCount(0),mFilterSize(0),mWindowSize(0),mBlockSize(0),mWorkerDataCount(0),mBlocksPerBuffer(20),
|
|
mScratchBufferSize(0),mSubBufferSize(0),mBigBuffer(NULL),mBufferInfo(NULL),mEQWorkers(0),mThreaded(false),
|
|
mBenching(false),mBufferCount(0)
|
|
{
|
|
}
|
|
|
|
EffectEqualization48x::~EffectEqualization48x()
|
|
{
|
|
}
|
|
|
|
bool EffectEqualization48x::AllocateBuffersWorkers(int nThreads)
|
|
{
|
|
if(mBigBuffer)
|
|
FreeBuffersWorkers();
|
|
mFilterSize=(mEffectEqualization->mM-1)&(~15); // 4000 !!! Filter MUST BE QUAD WORD ALIGNED !!!!
|
|
mWindowSize=mEffectEqualization->windowSize;
|
|
mBlockSize=mWindowSize-mFilterSize; // 12,384
|
|
mThreaded = (nThreads > 0 );
|
|
if(mThreaded)
|
|
{
|
|
mThreadCount=wxThread::GetCPUCount();
|
|
mWorkerDataCount=mThreadCount+2; // 2 extra slots (maybe double later)
|
|
} else {
|
|
mWorkerDataCount=1;
|
|
mThreadCount=0;
|
|
}
|
|
#ifdef __AVX_ENABLED
|
|
mBufferCount=sMathPath&MATH_FUNCTION_AVX?8:4;
|
|
#else
|
|
mBufferCount=4;
|
|
#endif
|
|
// we're skewing the data by one block to allow for 1/4 block intersections.
|
|
// this will remove the disparity in data at the intersections of the runs
|
|
|
|
// The nice magic allocation
|
|
// megabyte - 3 windows - 4 overlaping buffers - filter
|
|
// 2^20 = 1,048,576 - 3 * 2^14 (16,384) - ((4 * 20) - 3) * 12,384 - 4000
|
|
// 1,048,576 - 49,152 - 953,568 - 4000 = 41,856 (leftover)
|
|
|
|
mScratchBufferSize=mWindowSize*3*sizeof(float)*mBufferCount; // 3 window size blocks of instruction size
|
|
mSubBufferSize=mBlockSize*(mBufferCount*(mBlocksPerBuffer-1)); // we are going to do a full block overlap
|
|
mBigBuffer=(float *)malloc_simd(sizeof(float)*(mSubBufferSize+mFilterSize+mScratchBufferSize)*mWorkerDataCount); // we run over by filtersize
|
|
// fill the bufferInfo
|
|
mBufferInfo = new BufferInfo[mWorkerDataCount];
|
|
for(int i=0;i<mWorkerDataCount;i++) {
|
|
mBufferInfo[i].mFftWindowSize=mWindowSize;
|
|
mBufferInfo[i].mFftFilterSize=mFilterSize;
|
|
mBufferInfo[i].mBufferLength=mBlockSize*mBlocksPerBuffer;
|
|
mBufferInfo[i].mContiguousBufferSize=mSubBufferSize;
|
|
mBufferInfo[i].mScratchBuffer=&mBigBuffer[(mSubBufferSize+mScratchBufferSize)*i+mSubBufferSize];
|
|
for(int j=0;j<mBufferCount;j++)
|
|
mBufferInfo[i].mBufferDest[j]=mBufferInfo[i].mBufferSouce[j]=&mBigBuffer[j*(mBufferInfo[i].mBufferLength-mBlockSize)+(mSubBufferSize+mScratchBufferSize)*i];
|
|
}
|
|
if(mThreadCount) {
|
|
// start the workers
|
|
mDataMutex.IsOk();
|
|
mEQWorkers=new EQWorker[mThreadCount];
|
|
for(int i=0;i<mThreadCount;i++) {
|
|
mEQWorkers[i].SetData( mBufferInfo, mWorkerDataCount, &mDataMutex, this);
|
|
mEQWorkers[i].Create();
|
|
mEQWorkers[i].Run();
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool EffectEqualization48x::FreeBuffersWorkers()
|
|
{
|
|
if(mThreaded) {
|
|
for(int i=0;i<mThreadCount;i++) { // tell all the workers to exit
|
|
mEQWorkers[i].ExitLoop();
|
|
}
|
|
for(int i=0;i<mThreadCount;i++) {
|
|
mEQWorkers[i].Wait();
|
|
}
|
|
delete[] mEQWorkers; // kill the workers ( go directly to jail)
|
|
mEQWorkers= NULL;
|
|
mThreadCount=0;
|
|
mWorkerDataCount=0;
|
|
}
|
|
delete [] mBufferInfo;
|
|
mBufferInfo = NULL;
|
|
free_simd(mBigBuffer);
|
|
mBigBuffer=NULL;
|
|
return true;
|
|
}
|
|
|
|
|
|
#pragma warning(push)
|
|
// Disable the unreachable code warning in MSVC, for this function.
|
|
#pragma warning(disable: 4702)
|
|
bool EffectEqualization48x::RunFunctionSelect(int flags, int count, WaveTrack * track, sampleCount start, sampleCount len)
|
|
{
|
|
// deal with tables here
|
|
flags&=~(MATH_FUNCTION_BITREVERSE_TABLE|MATH_FUNCTION_SIN_COS_TABLE); // clear out the table flags
|
|
switch (flags)
|
|
{
|
|
case MATH_FUNCTION_SSE:
|
|
return ProcessOne4x(count, track, start, len);
|
|
break;
|
|
case MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED:
|
|
return ProcessOne1x4xThreaded(count, track, start, len);
|
|
break;
|
|
case MATH_FUNCTION_THREADED:
|
|
case MATH_FUNCTION_THREADED|MATH_FUNCTION_SEGMENTED_CODE:
|
|
return ProcessOne1x4xThreaded(count, track, start, len, 1);
|
|
break;
|
|
case MATH_FUNCTION_SEGMENTED_CODE:
|
|
return ProcessOne1x(count, track, start, len);
|
|
break;
|
|
default:
|
|
return !mEffectEqualization->ProcessOne(count, track, start, len);
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
#pragma warning(pop)
|
|
|
|
bool EffectEqualization48x::Process(EffectEqualization* effectEqualization)
|
|
{
|
|
mEffectEqualization=effectEqualization;
|
|
// return TrackCompare(); // used for debugging data
|
|
mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks.
|
|
bool bBreakLoop = false;
|
|
|
|
TableUsage(sMathPath);
|
|
if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!!
|
|
mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
|
|
AllocateBuffersWorkers(sMathPath&MATH_FUNCTION_THREADED);
|
|
SelectedTrackListOfKindIterator iter(Track::Wave, mEffectEqualization->mOutputTracks);
|
|
WaveTrack *track = (WaveTrack *) iter.First();
|
|
int count = 0;
|
|
while (track) {
|
|
double trackStart = track->GetStartTime();
|
|
double trackEnd = track->GetEndTime();
|
|
double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
|
|
double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
|
|
|
|
if (t1 > t0) {
|
|
sampleCount start = track->TimeToLongSamples(t0);
|
|
sampleCount end = track->TimeToLongSamples(t1);
|
|
sampleCount len = (sampleCount)(end - start);
|
|
bBreakLoop=RunFunctionSelect(sMathPath, count, track, start, len);
|
|
if( bBreakLoop )
|
|
break;
|
|
}
|
|
track = (WaveTrack *) iter.Next();
|
|
count++;
|
|
}
|
|
FreeBuffersWorkers();
|
|
|
|
mEffectEqualization->ReplaceProcessedTracks(!bBreakLoop);
|
|
return !bBreakLoop;
|
|
}
|
|
|
|
bool EffectEqualization48x::TrackCompare()
|
|
{
|
|
mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks.
|
|
bool bBreakLoop = false;
|
|
|
|
TableUsage(sMathPath);
|
|
if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!!
|
|
mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
|
|
AllocateBuffersWorkers(sMathPath&MATH_FUNCTION_THREADED);
|
|
// Reset map
|
|
wxArrayPtrVoid SecondIMap;
|
|
wxArrayPtrVoid SecondOMap;
|
|
SecondIMap.Clear();
|
|
SecondOMap.Clear();
|
|
|
|
TrackList *SecondOutputTracks = new TrackList();
|
|
|
|
//iterate over tracks of type trackType (All types if Track::All)
|
|
TrackListOfKindIterator aIt(mEffectEqualization->mOutputTracksType, mEffectEqualization->mTracks);
|
|
|
|
for (Track *aTrack = aIt.First(); aTrack; aTrack = aIt.Next()) {
|
|
|
|
// Include selected tracks, plus sync-lock selected tracks for Track::All.
|
|
if (aTrack->GetSelected() ||
|
|
(mEffectEqualization->mOutputTracksType == Track::All && aTrack->IsSyncLockSelected()))
|
|
{
|
|
Track *o = aTrack->Duplicate();
|
|
SecondOutputTracks->Add(o);
|
|
SecondIMap.Add(aTrack);
|
|
SecondIMap.Add(o);
|
|
}
|
|
}
|
|
|
|
for(int i=0;i<2;i++) {
|
|
SelectedTrackListOfKindIterator iter(Track::Wave, i?mEffectEqualization->mOutputTracks:SecondOutputTracks);
|
|
i?sMathPath=sMathPath:sMathPath=0;
|
|
WaveTrack *track = (WaveTrack *) iter.First();
|
|
int count = 0;
|
|
while (track) {
|
|
double trackStart = track->GetStartTime();
|
|
double trackEnd = track->GetEndTime();
|
|
double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
|
|
double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
|
|
|
|
if (t1 > t0) {
|
|
sampleCount start = track->TimeToLongSamples(t0);
|
|
sampleCount end = track->TimeToLongSamples(t1);
|
|
sampleCount len = (sampleCount)(end - start);
|
|
bBreakLoop=RunFunctionSelect(sMathPath, count, track, start, len);
|
|
if( bBreakLoop )
|
|
break;
|
|
}
|
|
track = (WaveTrack *) iter.Next();
|
|
count++;
|
|
}
|
|
}
|
|
SelectedTrackListOfKindIterator iter(Track::Wave, mEffectEqualization->mOutputTracks);
|
|
SelectedTrackListOfKindIterator iter2(Track::Wave, SecondOutputTracks);
|
|
WaveTrack *track = (WaveTrack *) iter.First();
|
|
WaveTrack *track2 = (WaveTrack *) iter2.First();
|
|
while (track) {
|
|
double trackStart = track->GetStartTime();
|
|
double trackEnd = track->GetEndTime();
|
|
double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
|
|
double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
|
|
|
|
if (t1 > t0) {
|
|
sampleCount start = track->TimeToLongSamples(t0);
|
|
sampleCount end = track->TimeToLongSamples(t1);
|
|
sampleCount len = (sampleCount)(end - start);
|
|
DeltaTrack(track, track2, start, len);
|
|
}
|
|
track = (WaveTrack *) iter.Next();
|
|
track2 = (WaveTrack *) iter2.Next();
|
|
}
|
|
delete SecondOutputTracks;
|
|
FreeBuffersWorkers();
|
|
mEffectEqualization->ReplaceProcessedTracks(!bBreakLoop);
|
|
return bBreakLoop;
|
|
}
|
|
|
|
bool EffectEqualization48x::DeltaTrack(WaveTrack * t, WaveTrack * t2, sampleCount start, sampleCount len)
|
|
{
|
|
|
|
sampleCount trackBlockSize = t->GetMaxBlockSize();
|
|
|
|
float *buffer1 = new float[trackBlockSize];
|
|
float *buffer2 = new float[trackBlockSize];
|
|
|
|
AudacityProject *p = GetActiveProject();
|
|
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
|
|
sampleCount originalLen = len;
|
|
sampleCount currentSample = start;
|
|
|
|
while(len) {
|
|
sampleCount curretLength=(trackBlockSize>len)?len:trackBlockSize;
|
|
t->Get((samplePtr)buffer1, floatSample, currentSample, curretLength);
|
|
t2->Get((samplePtr)buffer2, floatSample, currentSample, curretLength);
|
|
for(int i=0;i<curretLength;i++)
|
|
buffer1[i]-=buffer2[i];
|
|
output->Append((samplePtr)buffer1, floatSample, curretLength);
|
|
currentSample+=curretLength;
|
|
len-=curretLength;
|
|
}
|
|
delete[] buffer1;
|
|
delete[] buffer2;
|
|
output->Flush();
|
|
len=originalLen;
|
|
ProcessTail(t, output, start, len);
|
|
delete output;
|
|
return true;
|
|
}
|
|
|
|
bool EffectEqualization48x::Benchmark(EffectEqualization* effectEqualization)
|
|
{
|
|
mEffectEqualization=effectEqualization;
|
|
mEffectEqualization->CopyInputTracks(); // Set up mOutputTracks.
|
|
bool bBreakLoop = false;
|
|
|
|
TableUsage(sMathPath);
|
|
if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!!
|
|
mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
|
|
AllocateBuffersWorkers(MATH_FUNCTION_THREADED);
|
|
SelectedTrackListOfKindIterator iter(Track::Wave, mEffectEqualization->mOutputTracks);
|
|
long times[] = { 0,0,0,0,0 };
|
|
wxStopWatch timer;
|
|
mBenching=true;
|
|
for(int i=0;i<5 && !bBreakLoop;i++) {
|
|
int localMathPath;
|
|
switch(i) {
|
|
case 0: localMathPath=MATH_FUNCTION_SSE|MATH_FUNCTION_THREADED;
|
|
if(!sMathCaps.SSE)
|
|
localMathPath=-1;
|
|
break;
|
|
case 1: localMathPath=MATH_FUNCTION_SSE;
|
|
if(!sMathCaps.SSE)
|
|
localMathPath=-1;
|
|
break;
|
|
case 2: localMathPath=MATH_FUNCTION_SEGMENTED_CODE;
|
|
break;
|
|
case 3: localMathPath=MATH_FUNCTION_THREADED|MATH_FUNCTION_SEGMENTED_CODE;
|
|
break;
|
|
case 4: localMathPath=0;
|
|
break;
|
|
default: localMathPath=-1;
|
|
}
|
|
if(localMathPath>=0) {
|
|
timer.Start();
|
|
WaveTrack *track = (WaveTrack *) iter.First();
|
|
int count = 0;
|
|
while (track) {
|
|
double trackStart = track->GetStartTime();
|
|
double trackEnd = track->GetEndTime();
|
|
double t0 = mEffectEqualization->mT0 < trackStart? trackStart: mEffectEqualization->mT0;
|
|
double t1 = mEffectEqualization->mT1 > trackEnd? trackEnd: mEffectEqualization->mT1;
|
|
|
|
if (t1 > t0) {
|
|
sampleCount start = track->TimeToLongSamples(t0);
|
|
sampleCount end = track->TimeToLongSamples(t1);
|
|
sampleCount len = (sampleCount)(end - start);
|
|
bBreakLoop=RunFunctionSelect( localMathPath, count, track, start, len);
|
|
if( bBreakLoop )
|
|
break;
|
|
}
|
|
track = (WaveTrack *) iter.Next();
|
|
count++;
|
|
}
|
|
times[i]=timer.Time();
|
|
}
|
|
}
|
|
FreeBuffersWorkers();
|
|
mBenching=false;
|
|
bBreakLoop=false;
|
|
mEffectEqualization->ReplaceProcessedTracks(bBreakLoop);
|
|
|
|
wxTimeSpan tsSSEThreaded(0, 0, 0, times[0]);
|
|
wxTimeSpan tsSSE(0, 0, 0, times[1]);
|
|
wxTimeSpan tsDefaultEnhanced(0, 0, 0, times[2]);
|
|
wxTimeSpan tsDefaultThreaded(0, 0, 0, times[3]);
|
|
wxTimeSpan tsDefault(0, 0, 0, times[4]);
|
|
|
|
wxMessageBox(wxString::Format(_("Benchmark times:\nOriginal: %s\nDefault Segmented: %s\nDefault Threaded: %s\nSSE: %s\nSSE Threaded: %s\n"),tsDefault.Format(wxT("%M:%S.%l")).c_str(),
|
|
tsDefaultEnhanced.Format(wxT("%M:%S.%l")).c_str(), tsDefaultThreaded.Format(wxT("%M:%S.%l")).c_str(),tsSSE.Format(wxT("%M:%S.%l")).c_str(),tsSSEThreaded.Format(wxT("%M:%S.%l")).c_str()));
|
|
return bBreakLoop;
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessTail(WaveTrack * t, WaveTrack * output, sampleCount start, sampleCount len)
|
|
{
|
|
// double offsetT0 = t->LongSamplesToTime((sampleCount)offset);
|
|
double lenT = t->LongSamplesToTime(len);
|
|
// 'start' is the sample offset in 't', the passed in track
|
|
// 'startT' is the equivalent time value
|
|
// 'output' starts at zero
|
|
double startT = t->LongSamplesToTime(start);
|
|
|
|
//output has one waveclip for the total length, even though
|
|
//t might have whitespace seperating multiple clips
|
|
//we want to maintain the original clip structure, so
|
|
//only paste the intersections of the new clip.
|
|
|
|
//Find the bits of clips that need replacing
|
|
std::vector<std::pair<double, double> > clipStartEndTimes;
|
|
std::vector<std::pair<double, double> > clipRealStartEndTimes; //the above may be truncated due to a clip being partially selected
|
|
for (WaveClipList::compatibility_iterator it=t->GetClipIterator(); it; it=it->GetNext())
|
|
{
|
|
WaveClip *clip;
|
|
double clipStartT;
|
|
double clipEndT;
|
|
|
|
clip = it->GetData();
|
|
clipStartT = clip->GetStartTime();
|
|
clipEndT = clip->GetEndTime();
|
|
if( clipEndT <= startT )
|
|
continue; // clip is not within selection
|
|
if( clipStartT >= startT + lenT )
|
|
continue; // clip is not within selection
|
|
|
|
//save the actual clip start/end so that we can rejoin them after we paste.
|
|
clipRealStartEndTimes.push_back(std::pair<double,double>(clipStartT,clipEndT));
|
|
|
|
if( clipStartT < startT ) // does selection cover the whole clip?
|
|
clipStartT = startT; // don't copy all the new clip
|
|
if( clipEndT > startT + lenT ) // does selection cover the whole clip?
|
|
clipEndT = startT + lenT; // don't copy all the new clip
|
|
|
|
//save them
|
|
clipStartEndTimes.push_back(std::pair<double,double>(clipStartT,clipEndT));
|
|
}
|
|
//now go thru and replace the old clips with new
|
|
for(unsigned int i=0;i<clipStartEndTimes.size();i++)
|
|
{
|
|
Track *toClipOutput;
|
|
//remove the old audio and get the new
|
|
t->Clear(clipStartEndTimes[i].first,clipStartEndTimes[i].second);
|
|
// output->Copy(clipStartEndTimes[i].first-startT+offsetT0,clipStartEndTimes[i].second-startT+offsetT0, &toClipOutput);
|
|
output->Copy(clipStartEndTimes[i].first-startT,clipStartEndTimes[i].second-startT, &toClipOutput);
|
|
if(toClipOutput)
|
|
{
|
|
//put the processed audio in
|
|
bool bResult = t->Paste(clipStartEndTimes[i].first, toClipOutput);
|
|
wxASSERT(bResult); // TO DO: Actually handle this.
|
|
//if the clip was only partially selected, the Paste will have created a split line. Join is needed to take care of this
|
|
//This is not true when the selection is fully contained within one clip (second half of conditional)
|
|
if( (clipRealStartEndTimes[i].first != clipStartEndTimes[i].first ||
|
|
clipRealStartEndTimes[i].second != clipStartEndTimes[i].second) &&
|
|
!(clipRealStartEndTimes[i].first <= startT &&
|
|
clipRealStartEndTimes[i].second >= startT+lenT) )
|
|
t->Join(clipRealStartEndTimes[i].first,clipRealStartEndTimes[i].second);
|
|
delete toClipOutput;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, sampleCount bufferLength)
|
|
{
|
|
BufferInfo bufferInfo;
|
|
bufferInfo.mContiguousBufferSize=bufferLength;
|
|
bufferInfo.mBufferSouce[0]=sourceBuffer;
|
|
bufferInfo.mBufferDest[0]=destBuffer;
|
|
bufferInfo.mScratchBuffer=&sourceBuffer[mSubBufferSize];
|
|
return ProcessBuffer1x(&bufferInfo);
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessBuffer1x(BufferInfo *bufferInfo)
|
|
{
|
|
int bufferCount=bufferInfo->mContiguousBufferSize?1:4;
|
|
for(int bufferIndex=0;bufferIndex<bufferCount;bufferIndex++)
|
|
{
|
|
int bufferLength=bufferInfo->mBufferLength;
|
|
if(bufferInfo->mContiguousBufferSize)
|
|
bufferLength=bufferInfo->mContiguousBufferSize;
|
|
|
|
sampleCount blockCount=bufferLength/mBlockSize;
|
|
sampleCount lastBlockSize=bufferLength%mBlockSize;
|
|
if(lastBlockSize)
|
|
blockCount++;
|
|
|
|
float *workBuffer=bufferInfo->mScratchBuffer; // all scratch buffers are at the end
|
|
float *scratchBuffer=&workBuffer[mWindowSize*2]; // all scratch buffers are at the end
|
|
float *sourceBuffer=bufferInfo->mBufferSouce[bufferIndex];
|
|
float *destBuffer=bufferInfo->mBufferDest[bufferIndex];
|
|
for(int runx=0;runx<blockCount;runx++)
|
|
{
|
|
float *currentBuffer=&workBuffer[mWindowSize*(runx&1)];
|
|
for(int i=0;i<mBlockSize;i++)
|
|
currentBuffer[i]=sourceBuffer[i];
|
|
sourceBuffer+=mBlockSize;
|
|
float *currentFilter=¤tBuffer[mBlockSize];
|
|
for(int i=0;i<mFilterSize;i++)
|
|
currentFilter[i]=0;
|
|
// mEffectEqualization->Filter(mWindowSize, currentBuffer);
|
|
Filter1x(mWindowSize, currentBuffer, scratchBuffer);
|
|
float *writeEnd=currentBuffer+mBlockSize;
|
|
if(runx==blockCount)
|
|
writeEnd=currentBuffer+(lastBlockSize+mFilterSize);
|
|
if(runx) {
|
|
float *lastOverrun=&workBuffer[mWindowSize*((runx+1)&1)+mBlockSize];
|
|
for(int j=0;j<mFilterSize;j++)
|
|
*destBuffer++= *currentBuffer++ + *lastOverrun++;
|
|
} else
|
|
currentBuffer+=mFilterSize>>1; // this will skip the first filterSize on the first run
|
|
while(currentBuffer<writeEnd)
|
|
*destBuffer++ = *currentBuffer++;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessOne1x(int count, WaveTrack * t,
|
|
sampleCount start, sampleCount len)
|
|
{
|
|
//sampleCount blockCount=len/mBlockSize;
|
|
|
|
sampleCount trackBlockSize = t->GetMaxBlockSize();
|
|
|
|
AudacityProject *p = GetActiveProject();
|
|
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
|
|
|
|
mEffectEqualization->TrackProgress(count, 0.0);
|
|
int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize; // half the buffers if avx is active
|
|
int bigRuns=len/(subBufferSize-mBlockSize);
|
|
int trackBlocksPerBig=subBufferSize/trackBlockSize;
|
|
int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
|
|
int singleProcessLength;
|
|
if(!bigRuns)
|
|
singleProcessLength=len;
|
|
else
|
|
singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize));
|
|
sampleCount currentSample=start;
|
|
bool bBreakLoop = false;
|
|
for(int bigRun=0;bigRun<bigRuns;bigRun++)
|
|
{
|
|
// fill the buffer
|
|
for(int i=0;i<trackBlocksPerBig;i++) {
|
|
t->Get((samplePtr)&mBigBuffer[i*trackBlockSize], floatSample, currentSample, trackBlockSize);
|
|
currentSample+=trackBlockSize;
|
|
}
|
|
if(trackLeftovers) {
|
|
t->Get((samplePtr)&mBigBuffer[trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
|
|
currentSample+=trackLeftovers;
|
|
}
|
|
currentSample-=mBlockSize+(mFilterSize>>1);
|
|
|
|
ProcessBuffer1x(mBufferInfo);
|
|
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/(double)bigRuns);
|
|
if( bBreakLoop )
|
|
break;
|
|
output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, subBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
|
|
}
|
|
if(singleProcessLength && !bBreakLoop) {
|
|
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
output->Append((samplePtr)&mBigBuffer[bigRuns?mBlockSize:0], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
}
|
|
output->Flush();
|
|
if(!bBreakLoop)
|
|
ProcessTail(t, output, start, len);
|
|
delete output;
|
|
return bBreakLoop;
|
|
}
|
|
|
|
void EffectEqualization48x::Filter1x(sampleCount len,
|
|
float *buffer, float *scratchBuffer)
|
|
{
|
|
int i;
|
|
float real, imag;
|
|
// Apply FFT
|
|
RealFFTf1x(buffer, mEffectEqualization->hFFT);
|
|
|
|
// Apply filter
|
|
// DC component is purely real
|
|
|
|
float filterFuncR, filterFuncI;
|
|
filterFuncR=mEffectEqualization->mFilterFuncR[0];
|
|
scratchBuffer[0]=buffer[0]*filterFuncR;
|
|
int halfLength=(len/2);
|
|
|
|
bool useBitReverseTable=sMathPath&1;
|
|
|
|
for(i=1; i<halfLength; i++)
|
|
{
|
|
if(useBitReverseTable) {
|
|
real=buffer[mEffectEqualization->hFFT->BitReversed[i] ];
|
|
imag=buffer[mEffectEqualization->hFFT->BitReversed[i]+1];
|
|
} else {
|
|
int bitReversed=SmallRB(i,mEffectEqualization->hFFT->pow2Bits);
|
|
real=buffer[bitReversed];
|
|
imag=buffer[bitReversed+1];
|
|
}
|
|
filterFuncR=mEffectEqualization->mFilterFuncR[i];
|
|
filterFuncI=mEffectEqualization->mFilterFuncI[i];
|
|
|
|
scratchBuffer[2*i ] = real*filterFuncR - imag*filterFuncI;
|
|
scratchBuffer[2*i+1] = real*filterFuncI + imag*filterFuncR;
|
|
}
|
|
// Fs/2 component is purely real
|
|
filterFuncR=mEffectEqualization->mFilterFuncR[halfLength];
|
|
scratchBuffer[1] = buffer[1] * filterFuncR;
|
|
|
|
// Inverse FFT and normalization
|
|
InverseRealFFTf1x(scratchBuffer, mEffectEqualization->hFFT);
|
|
ReorderToTime1x(mEffectEqualization->hFFT, scratchBuffer, buffer);
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessBuffer4x(BufferInfo *bufferInfo)
|
|
{
|
|
// length must be a factor of window size for 4x processing.
|
|
if(bufferInfo->mBufferLength%mBlockSize)
|
|
return false;
|
|
|
|
sampleCount blockCount=bufferInfo->mBufferLength/mBlockSize;
|
|
|
|
__m128 *readBlocks[4]; // some temps so we dont destroy the vars in the struct
|
|
__m128 *writeBlocks[4];
|
|
for(int i=0;i<4;i++) {
|
|
readBlocks[i]=(__m128 *)bufferInfo->mBufferSouce[i];
|
|
writeBlocks[i]=(__m128 *)bufferInfo->mBufferDest[i];
|
|
}
|
|
|
|
__m128 *swizzledBuffer128=(__m128 *)bufferInfo->mScratchBuffer;
|
|
__m128 *scratchBuffer=&swizzledBuffer128[mWindowSize*2];
|
|
|
|
for(int run4x=0;run4x<blockCount;run4x++)
|
|
{
|
|
// swizzle the data to the swizzle buffer
|
|
__m128 *currentSwizzledBlock=&swizzledBuffer128[mWindowSize*(run4x&1)];
|
|
for(int i=0,j=0;j<mBlockSize;i++,j+=4) {
|
|
__m128 tmp0 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(1,0,1,0));
|
|
__m128 tmp1 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(3,2,3,2));
|
|
__m128 tmp2 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(1,0,1,0));
|
|
__m128 tmp3 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(3,2,3,2));
|
|
currentSwizzledBlock[j] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2,0,2,0));
|
|
currentSwizzledBlock[j+1] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3,1,3,1));
|
|
currentSwizzledBlock[j+2] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2,0,2,0));
|
|
currentSwizzledBlock[j+3] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3,1,3,1));
|
|
}
|
|
__m128 *thisOverrun128=¤tSwizzledBlock[mBlockSize];
|
|
for(int i=0;i<mFilterSize;i++)
|
|
thisOverrun128[i]=_mm_set1_ps(0.0);
|
|
Filter4x(mWindowSize, (float *)currentSwizzledBlock, (float *)scratchBuffer);
|
|
int writeStart=0, writeToStart=0; // note readStart is where the read data is written
|
|
int writeEnd=mBlockSize;
|
|
if(run4x) {
|
|
// maybe later swizzle add and write in one
|
|
__m128 *lastOverrun128=&swizzledBuffer128[mWindowSize*((run4x+1)&1)+mBlockSize];
|
|
// add and swizzle data + filter
|
|
for(int i=0,j=0;j<mFilterSize;i++,j+=4) {
|
|
__m128 tmps0 = _mm_add_ps(currentSwizzledBlock[j], lastOverrun128[j]);
|
|
__m128 tmps1 = _mm_add_ps(currentSwizzledBlock[j+1], lastOverrun128[j+1]);
|
|
__m128 tmps2 = _mm_add_ps(currentSwizzledBlock[j+2], lastOverrun128[j+2]);
|
|
__m128 tmps3 = _mm_add_ps(currentSwizzledBlock[j+3], lastOverrun128[j+3]);
|
|
__m128 tmp0 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp1 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(2,3,2,3));
|
|
__m128 tmp2 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp3 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(2,3,2,3));
|
|
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
|
|
writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
|
|
}
|
|
writeStart=mFilterSize;
|
|
writeToStart=mFilterSize>>2;
|
|
// swizzle it back.
|
|
for(int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=4) {
|
|
__m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp1 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(2,3,2,3));
|
|
__m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp3 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(2,3,2,3));
|
|
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
|
|
writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
|
|
}
|
|
} else {
|
|
// swizzle it back. We overlap one block so we only write the first block on the first run
|
|
writeStart=0;
|
|
writeToStart=0;
|
|
for(int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=4) {
|
|
__m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+1], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+2], _MM_SHUFFLE(0,1,0,1));
|
|
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
|
|
}
|
|
}
|
|
for(int i=0;i<4;i++) { // shift each block
|
|
readBlocks[i]+=mBlockSize>>2; // these are 128b pointers, each window is 1/4 blockSize for those
|
|
writeBlocks[i]+=mBlockSize>>2;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessOne4x(int count, WaveTrack * t,
|
|
sampleCount start, sampleCount len)
|
|
{
|
|
int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize; // half the buffers if avx is active
|
|
|
|
if(len<subBufferSize) // it's not worth 4x processing do a regular process
|
|
return ProcessOne1x(count, t, start, len);
|
|
|
|
sampleCount trackBlockSize = t->GetMaxBlockSize();
|
|
|
|
AudacityProject *p = GetActiveProject();
|
|
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
|
|
|
|
mEffectEqualization->TrackProgress(count, 0.0);
|
|
int bigRuns=len/(subBufferSize-mBlockSize);
|
|
int trackBlocksPerBig=subBufferSize/trackBlockSize;
|
|
int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
|
|
int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize));
|
|
sampleCount currentSample=start;
|
|
|
|
bool bBreakLoop = false;
|
|
for(int bigRun=0;bigRun<bigRuns;bigRun++)
|
|
{
|
|
// fill the buffer
|
|
for(int i=0;i<trackBlocksPerBig;i++) {
|
|
t->Get((samplePtr)&mBigBuffer[i*trackBlockSize], floatSample, currentSample, trackBlockSize);
|
|
currentSample+=trackBlockSize;
|
|
}
|
|
if(trackLeftovers) {
|
|
t->Get((samplePtr)&mBigBuffer[trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
|
|
currentSample+=trackLeftovers;
|
|
}
|
|
currentSample-=mBlockSize+(mFilterSize>>1);
|
|
|
|
ProcessBuffer4x(mBufferInfo);
|
|
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/(double)bigRuns);
|
|
if( bBreakLoop )
|
|
break;
|
|
output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, subBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
|
|
}
|
|
if(singleProcessLength && !bBreakLoop) {
|
|
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
output->Append((samplePtr)&mBigBuffer[bigRuns?mBlockSize:0], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
// output->Append((samplePtr)&mBigBuffer[bigRuns?mBlockSize:0], floatSample, singleProcessLength);
|
|
}
|
|
output->Flush();
|
|
if(!bBreakLoop)
|
|
ProcessTail(t, output, start, len);
|
|
delete output;
|
|
return bBreakLoop;
|
|
}
|
|
void *EQWorker::Entry()
|
|
{
|
|
while(!mExitLoop) {
|
|
mMutex->Lock();
|
|
bool bufferAquired=false;
|
|
for(int i=0;i<mBufferInfoCount;i++)
|
|
if(mBufferInfoList[i].mBufferStatus==BufferReady) { // we found an unlocked ready buffer
|
|
bufferAquired=true;
|
|
mBufferInfoList[i].mBufferStatus=BufferBusy; // we own it now
|
|
mMutex->Unlock();
|
|
switch (mProcessingType)
|
|
{
|
|
case 1:
|
|
mEffectEqualization48x->ProcessBuffer1x(&mBufferInfoList[i]);
|
|
break;
|
|
case 4:
|
|
mEffectEqualization48x->ProcessBuffer4x(&mBufferInfoList[i]);
|
|
break;
|
|
}
|
|
mBufferInfoList[i].mBufferStatus=BufferDone; // we're done
|
|
break;
|
|
}
|
|
if(!bufferAquired)
|
|
mMutex->Unlock();
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessOne1x4xThreaded(int count, WaveTrack * t,
|
|
sampleCount start, sampleCount len, int processingType)
|
|
{
|
|
int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize; // half the buffers if avx is active
|
|
|
|
sampleCount blockCount=len/mBlockSize;
|
|
|
|
if(blockCount<16) // it's not worth 4x processing do a regular process
|
|
return ProcessOne4x(count, t, start, len);
|
|
if(mThreadCount<=0 || blockCount<256) // dont do it without cores or big data
|
|
return ProcessOne4x(count, t, start, len);
|
|
|
|
for(int i=0;i<mThreadCount;i++)
|
|
mEQWorkers[i].mProcessingType=processingType;
|
|
|
|
AudacityProject *p = GetActiveProject();
|
|
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
|
|
|
|
sampleCount trackBlockSize = t->GetMaxBlockSize();
|
|
mEffectEqualization->TrackProgress(count, 0.0);
|
|
int bigRuns=len/(subBufferSize-mBlockSize);
|
|
int trackBlocksPerBig=subBufferSize/trackBlockSize;
|
|
int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
|
|
int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize));
|
|
sampleCount currentSample=start;
|
|
|
|
int bigBlocksRead=mWorkerDataCount, bigBlocksWritten=0;
|
|
|
|
// fill the first workerDataCount buffers we checked above and there is at least this data
|
|
int maxPreFill=bigRuns<mWorkerDataCount?bigRuns:mWorkerDataCount;
|
|
for(int i=0;i<maxPreFill;i++)
|
|
{
|
|
// fill the buffer
|
|
for(int j=0;j<trackBlocksPerBig;j++) {
|
|
t->Get((samplePtr)&mBufferInfo[i].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize);
|
|
currentSample+=trackBlockSize;
|
|
}
|
|
if(trackLeftovers) {
|
|
t->Get((samplePtr)&mBufferInfo[i].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
|
|
currentSample+=trackLeftovers;
|
|
}
|
|
currentSample-=mBlockSize+(mFilterSize>>1);
|
|
mBufferInfo[i].mBufferStatus=BufferReady; // free for grabbin
|
|
}
|
|
int currentIndex=0;
|
|
bool bBreakLoop = false;
|
|
while(bigBlocksWritten<bigRuns && !bBreakLoop) {
|
|
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigBlocksWritten)/(double)bigRuns);
|
|
if( bBreakLoop )
|
|
break;
|
|
mDataMutex.Lock(); // Get in line for data
|
|
// process as many blocks as we can
|
|
while((mBufferInfo[currentIndex].mBufferStatus==BufferDone) && (bigBlocksWritten<bigRuns)) { // data is ours
|
|
output->Append((samplePtr)&mBufferInfo[currentIndex].mBufferDest[0][(bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)], floatSample, subBufferSize-((bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)));
|
|
bigBlocksWritten++;
|
|
if(bigBlocksRead<bigRuns) {
|
|
// fill the buffer
|
|
for(int j=0;j<trackBlocksPerBig;j++) {
|
|
t->Get((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize);
|
|
currentSample+=trackBlockSize;
|
|
}
|
|
if(trackLeftovers) {
|
|
t->Get((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
|
|
currentSample+=trackLeftovers;
|
|
}
|
|
currentSample-=mBlockSize+(mFilterSize>>1);
|
|
mBufferInfo[currentIndex].mBufferStatus=BufferReady; // free for grabbin
|
|
bigBlocksRead++;
|
|
} else mBufferInfo[currentIndex].mBufferStatus=BufferEmpty; // this is completely unecessary
|
|
currentIndex=(currentIndex+1)%mWorkerDataCount;
|
|
}
|
|
mDataMutex.Unlock(); // Get back in line for data
|
|
}
|
|
if(singleProcessLength && !bBreakLoop) {
|
|
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
output->Append((samplePtr)&mBigBuffer[mBlockSize], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
}
|
|
output->Flush();
|
|
if(!bBreakLoop)
|
|
ProcessTail(t, output, start, len);
|
|
delete output;
|
|
return bBreakLoop;
|
|
}
|
|
|
|
void EffectEqualization48x::Filter4x(sampleCount len,
|
|
float *buffer, float *scratchBuffer)
|
|
{
|
|
int i;
|
|
__m128 real128, imag128;
|
|
// Apply FFT
|
|
RealFFTf4x(buffer, mEffectEqualization->hFFT);
|
|
|
|
// Apply filter
|
|
// DC component is purely real
|
|
__m128 *localFFTBuffer=(__m128 *)scratchBuffer;
|
|
__m128 *localBuffer=(__m128 *)buffer;
|
|
|
|
__m128 filterFuncR, filterFuncI;
|
|
filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[0]);
|
|
localFFTBuffer[0]=_mm_mul_ps(localBuffer[0], filterFuncR);
|
|
int halfLength=(len/2);
|
|
|
|
bool useBitReverseTable=sMathPath&1;
|
|
|
|
for(i=1; i<halfLength; i++)
|
|
{
|
|
if(useBitReverseTable) {
|
|
real128=localBuffer[mEffectEqualization->hFFT->BitReversed[i] ];
|
|
imag128=localBuffer[mEffectEqualization->hFFT->BitReversed[i]+1];
|
|
} else {
|
|
int bitReversed=SmallRB(i,mEffectEqualization->hFFT->pow2Bits);
|
|
real128=localBuffer[bitReversed];
|
|
imag128=localBuffer[bitReversed+1];
|
|
}
|
|
filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[i]);
|
|
filterFuncI=_mm_set1_ps(mEffectEqualization->mFilterFuncI[i]);
|
|
localFFTBuffer[2*i ] = _mm_sub_ps( _mm_mul_ps(real128, filterFuncR), _mm_mul_ps(imag128, filterFuncI));
|
|
localFFTBuffer[2*i+1] = _mm_add_ps( _mm_mul_ps(real128, filterFuncI), _mm_mul_ps(imag128, filterFuncR));
|
|
}
|
|
// Fs/2 component is purely real
|
|
filterFuncR=_mm_set1_ps(mEffectEqualization->mFilterFuncR[halfLength]);
|
|
localFFTBuffer[1] = _mm_mul_ps(localBuffer[1], filterFuncR);
|
|
|
|
// Inverse FFT and normalization
|
|
InverseRealFFTf4x(scratchBuffer, mEffectEqualization->hFFT);
|
|
ReorderToTime4x(mEffectEqualization->hFFT, scratchBuffer, buffer);
|
|
}
|
|
|
|
#ifdef __AVX_ENABLED
|
|
|
|
// note although written it has not been tested
|
|
|
|
bool EffectEqualization48x::ProcessBuffer8x(BufferInfo *bufferInfo)
|
|
{
|
|
// length must be a factor of window size for 4x processing.
|
|
if(bufferInfo->mBufferLength%mBlockSize || mBufferCount!=8)
|
|
return false;
|
|
|
|
sampleCount blockCount=bufferInfo->mBufferLength/mBlockSize;
|
|
|
|
__m128 *readBlocks[8]; // some temps so we dont destroy the vars in the struct
|
|
__m128 *writeBlocks[8];
|
|
for(int i=0;i<8;i++) {
|
|
readBlocks[i]=(__m128 *)bufferInfo->mBufferSouce[i];
|
|
writeBlocks[i]=(__m128 *)bufferInfo->mBufferDest[i];
|
|
}
|
|
|
|
__m128 *swizzledBuffer128=(__m128 *)bufferInfo->mScratchBuffer;
|
|
__m128 *scratchBuffer=&swizzledBuffer128[mWindowSize*4];
|
|
|
|
int doubleFilter=mFilterSize<<1;
|
|
int doubleWindow=mWindowSize<<1;
|
|
int doubleBlock=mBlockSize<<1;
|
|
for(int run4x=0;run4x<blockCount;run4x++)
|
|
{
|
|
// swizzle the data to the swizzle buffer
|
|
__m128 *currentSwizzledBlock=&swizzledBuffer128[doubleWindow*(run4x&1)];
|
|
for(int i=0,j=0;j<doubleBlock;i++,j+=8) { // mBlockSize or doubleBlock???
|
|
__m128 tmp0 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(1,0,1,0));
|
|
__m128 tmp1 = _mm_shuffle_ps(readBlocks[0][i], readBlocks[1][i], _MM_SHUFFLE(3,2,3,2));
|
|
__m128 tmp2 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(1,0,1,0));
|
|
__m128 tmp3 = _mm_shuffle_ps(readBlocks[2][i], readBlocks[3][i], _MM_SHUFFLE(3,2,3,2));
|
|
currentSwizzledBlock[j] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2,0,2,0));
|
|
currentSwizzledBlock[j+2] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3,1,3,1));
|
|
currentSwizzledBlock[j+4] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2,0,2,0));
|
|
currentSwizzledBlock[j+6] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3,1,3,1));
|
|
tmp0 = _mm_shuffle_ps(readBlocks[4][i], readBlocks[5][i], _MM_SHUFFLE(1,0,1,0));
|
|
tmp1 = _mm_shuffle_ps(readBlocks[4][i], readBlocks[5][i], _MM_SHUFFLE(3,2,3,2));
|
|
tmp2 = _mm_shuffle_ps(readBlocks[6][i], readBlocks[7][i], _MM_SHUFFLE(1,0,1,0));
|
|
tmp3 = _mm_shuffle_ps(readBlocks[6][i], readBlocks[7][i], _MM_SHUFFLE(3,2,3,2));
|
|
currentSwizzledBlock[j+1] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(2,0,2,0));
|
|
currentSwizzledBlock[j+3] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(3,1,3,1));
|
|
currentSwizzledBlock[j+5] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(2,0,2,0));
|
|
currentSwizzledBlock[j+7] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(3,1,3,1));
|
|
}
|
|
__m128 *thisOverrun128=¤tSwizzledBlock[doubleBlock];
|
|
for(int i=0;i<doubleFilter;i++)
|
|
thisOverrun128[i]=_mm_set1_ps(0.0);
|
|
Filter8x(mWindowSize, (float *)currentSwizzledBlock, (float *)scratchBuffer);
|
|
int writeStart=0, writeToStart=0; // note readStart is where the read data is written
|
|
int writeEnd=doubleBlock;
|
|
if(run4x) {
|
|
// maybe later swizzle add and write in one
|
|
__m128 *lastOverrun128=&swizzledBuffer128[doubleWindow*((run4x+1)&1)+doubleBlock];
|
|
// add and swizzle data + filter
|
|
for(int i=0,j=0;j<doubleFilter;i++,j+=8) {
|
|
__m128 tmps0 = _mm_add_ps(currentSwizzledBlock[j], lastOverrun128[j]);
|
|
__m128 tmps1 = _mm_add_ps(currentSwizzledBlock[j+2], lastOverrun128[j+2]);
|
|
__m128 tmps2 = _mm_add_ps(currentSwizzledBlock[j+4], lastOverrun128[j+4]);
|
|
__m128 tmps3 = _mm_add_ps(currentSwizzledBlock[j+6], lastOverrun128[j+6]);
|
|
__m128 tmp0 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp1 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(2,3,2,3));
|
|
__m128 tmp2 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp3 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(2,3,2,3));
|
|
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
|
|
writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
|
|
tmps0 = _mm_add_ps(currentSwizzledBlock[j+1], lastOverrun128[j+1]);
|
|
tmps1 = _mm_add_ps(currentSwizzledBlock[j+3], lastOverrun128[j+3]);
|
|
tmps2 = _mm_add_ps(currentSwizzledBlock[j+5], lastOverrun128[j+5]);
|
|
tmps3 = _mm_add_ps(currentSwizzledBlock[j+7], lastOverrun128[j+7]);
|
|
tmp0 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(0,1,0,1));
|
|
tmp1 = _mm_shuffle_ps(tmps1, tmps0, _MM_SHUFFLE(2,3,2,3));
|
|
tmp2 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(0,1,0,1));
|
|
tmp3 = _mm_shuffle_ps(tmps3, tmps2, _MM_SHUFFLE(2,3,2,3));
|
|
writeBlocks[4][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[5][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
|
|
writeBlocks[6][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[7][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
|
|
}
|
|
writeStart=doubleFilter;
|
|
writeToStart=mFilterSize>>2;
|
|
// swizzle it back.
|
|
for(int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=8) {
|
|
__m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+2], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp1 = _mm_shuffle_ps(currentSwizzledBlock[j+2], currentSwizzledBlock[j], _MM_SHUFFLE(2,3,2,3));
|
|
__m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+6], currentSwizzledBlock[j+4], _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp3 = _mm_shuffle_ps(currentSwizzledBlock[j+6], currentSwizzledBlock[j+4], _MM_SHUFFLE(2,3,2,3));
|
|
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[1][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
|
|
writeBlocks[2][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[3][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
|
|
tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+1], _MM_SHUFFLE(0,1,0,1));
|
|
tmp1 = _mm_shuffle_ps(currentSwizzledBlock[j+3], currentSwizzledBlock[j+1], _MM_SHUFFLE(2,3,2,3));
|
|
tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+7], currentSwizzledBlock[j+5], _MM_SHUFFLE(0,1,0,1));
|
|
tmp3 = _mm_shuffle_ps(currentSwizzledBlock[j+7], currentSwizzledBlock[j+5], _MM_SHUFFLE(2,3,2,3));
|
|
writeBlocks[4][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[5][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(0,2,0,2));
|
|
writeBlocks[6][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(1,3,1,3));
|
|
writeBlocks[7][i] = _mm_shuffle_ps(tmp1, tmp3, _MM_SHUFFLE(0,2,0,2));
|
|
}
|
|
} else {
|
|
// swizzle it back. We overlap one block so we only write the first block on the first run
|
|
writeStart=0;
|
|
writeToStart=0;
|
|
for(int i=writeToStart,j=writeStart;j<writeEnd;i++,j+=8) {
|
|
__m128 tmp0 = _mm_shuffle_ps(currentSwizzledBlock[j+2], currentSwizzledBlock[j], _MM_SHUFFLE(0,1,0,1));
|
|
__m128 tmp2 = _mm_shuffle_ps(currentSwizzledBlock[j+6], currentSwizzledBlock[j+4], _MM_SHUFFLE(0,1,0,1));
|
|
writeBlocks[0][i] = _mm_shuffle_ps(tmp0, tmp2, _MM_SHUFFLE(1,3,1,3));
|
|
}
|
|
}
|
|
for(int i=0;i<8;i++) { // shift each block
|
|
readBlocks[i]+=mBlockSize>>2; // these are 128b pointers, each window is 1/4 blockSize for those
|
|
writeBlocks[i]+=mBlockSize>>2;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessOne8x(int count, WaveTrack * t,
|
|
sampleCount start, sampleCount len)
|
|
{
|
|
sampleCount blockCount=len/mBlockSize;
|
|
|
|
if(blockCount<32) // it's not worth 8x processing do a regular process
|
|
return ProcessOne4x(count, t, start, len);
|
|
|
|
sampleCount trackBlockSize = t->GetMaxBlockSize();
|
|
|
|
AudacityProject *p = GetActiveProject();
|
|
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
|
|
|
|
mEffectEqualization->TrackProgress(count, 0.0);
|
|
int bigRuns=len/(mSubBufferSize-mBlockSize);
|
|
int trackBlocksPerBig=mSubBufferSize/trackBlockSize;
|
|
int trackLeftovers=mSubBufferSize-trackBlocksPerBig*trackBlockSize;
|
|
int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(mSubBufferSize-mBlockSize));
|
|
sampleCount currentSample=start;
|
|
|
|
bool bBreakLoop = false;
|
|
for(int bigRun=0;bigRun<bigRuns;bigRun++)
|
|
{
|
|
// fill the buffer
|
|
for(int i=0;i<trackBlocksPerBig;i++) {
|
|
t->Get((samplePtr)&mBigBuffer[i*trackBlockSize], floatSample, currentSample, trackBlockSize);
|
|
currentSample+=trackBlockSize;
|
|
}
|
|
if(trackLeftovers) {
|
|
t->Get((samplePtr)&mBigBuffer[trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
|
|
currentSample+=trackLeftovers;
|
|
}
|
|
currentSample-=mBlockSize+(mFilterSize>>1);
|
|
|
|
ProcessBuffer4x(mBufferInfo);
|
|
if (bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/(double)bigRuns))
|
|
{
|
|
break;
|
|
}
|
|
output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, mSubBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
|
|
}
|
|
if(singleProcessLength && !bBreakLoop) {
|
|
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
output->Append((samplePtr)&mBigBuffer[mBlockSize], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
}
|
|
output->Flush();
|
|
if(!bBreakLoop)
|
|
ProcessTail(t, output, start, len);
|
|
delete output;
|
|
return bBreakLoop;
|
|
}
|
|
|
|
bool EffectEqualization48x::ProcessOne8xThreaded(int count, WaveTrack * t,
|
|
sampleCount start, sampleCount len)
|
|
{
|
|
sampleCount blockCount=len/mBlockSize;
|
|
|
|
if(blockCount<16) // it's not worth 4x processing do a regular process
|
|
return ProcessOne4x(count, t, start, len);
|
|
if(mThreadCount<=0 || blockCount<256) // dont do it without cores or big data
|
|
return ProcessOne4x(count, t, start, len);
|
|
|
|
AudacityProject *p = GetActiveProject();
|
|
WaveTrack *output=p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
|
|
|
|
sampleCount trackBlockSize = t->GetMaxBlockSize();
|
|
mEffectEqualization->TrackProgress(count, 0.0);
|
|
int bigRuns=len/(mSubBufferSize-mBlockSize);
|
|
int trackBlocksPerBig=mSubBufferSize/trackBlockSize;
|
|
int trackLeftovers=mSubBufferSize-trackBlocksPerBig*trackBlockSize;
|
|
int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(mSubBufferSize-mBlockSize));
|
|
sampleCount currentSample=start;
|
|
|
|
int bigBlocksRead=mWorkerDataCount, bigBlocksWritten=0;
|
|
|
|
// fill the first workerDataCount buffers we checked above and there is at least this data
|
|
for(int i=0;i<mWorkerDataCount;i++)
|
|
{
|
|
// fill the buffer
|
|
for(int j=0;j<trackBlocksPerBig;j++) {
|
|
t->Get((samplePtr)&mBufferInfo[i].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize);
|
|
currentSample+=trackBlockSize;
|
|
}
|
|
if(trackLeftovers) {
|
|
t->Get((samplePtr)&mBufferInfo[i].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
|
|
currentSample+=trackLeftovers;
|
|
}
|
|
currentSample-=mBlockSize+(mFilterSize>>1);
|
|
mBufferInfo[i].mBufferStatus=BufferReady; // free for grabbin
|
|
}
|
|
int currentIndex=0;
|
|
bool bBreakLoop = false;
|
|
while(bigBlocksWritten<bigRuns) {
|
|
if (bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigBlocksWritten)/(double)bigRuns))
|
|
{
|
|
break;
|
|
}
|
|
mDataMutex.Lock(); // Get in line for data
|
|
// process as many blocks as we can
|
|
while((mBufferInfo[currentIndex].mBufferStatus==BufferDone) && (bigBlocksWritten<bigRuns)) { // data is ours
|
|
output->Append((samplePtr)&mBufferInfo[currentIndex].mBufferDest[0][(bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)], floatSample, mSubBufferSize-((bigBlocksWritten?mBlockSize:0)+(mFilterSize>>1)));
|
|
bigBlocksWritten++;
|
|
if(bigBlocksRead<bigRuns) {
|
|
// fill the buffer
|
|
for(int j=0;j<trackBlocksPerBig;j++) {
|
|
t->Get((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][j*trackBlockSize], floatSample, currentSample, trackBlockSize);
|
|
currentSample+=trackBlockSize;
|
|
}
|
|
if(trackLeftovers) {
|
|
t->Get((samplePtr)&mBufferInfo[currentIndex].mBufferSouce[0][trackBlocksPerBig*trackBlockSize], floatSample, currentSample, trackLeftovers);
|
|
currentSample+=trackLeftovers;
|
|
}
|
|
currentSample-=mBlockSize+(mFilterSize>>1);
|
|
mBufferInfo[currentIndex].mBufferStatus=BufferReady; // free for grabbin
|
|
bigBlocksRead++;
|
|
} else mBufferInfo[currentIndex].mBufferStatus=BufferEmpty; // this is completely unecessary
|
|
currentIndex=(currentIndex+1)%mWorkerDataCount;
|
|
}
|
|
mDataMutex.Unlock(); // Get back in line for data
|
|
}
|
|
if(singleProcessLength && !bBreakLoop) {
|
|
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
output->Append((samplePtr)&mBigBuffer[mBlockSize], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
|
|
}
|
|
output->Flush();
|
|
if(!bBreakLoop)
|
|
ProcessTail(t, output, start, len);
|
|
delete output;
|
|
return bBreakLoop;
|
|
}
|
|
|
|
|
|
|
|
|
|
void EffectEqualization48x::Filter8x(sampleCount len,
|
|
float *buffer, float *scratchBuffer)
|
|
{
|
|
int i;
|
|
__m256 real256, imag256;
|
|
// Apply FFT
|
|
RealFFTf8x(buffer, mEffectEqualization->hFFT);
|
|
|
|
// Apply filter
|
|
// DC component is purely real
|
|
__m256 *localFFTBuffer=(__m256 *)scratchBuffer;
|
|
__m256 *localBuffer=(__m256 *)buffer;
|
|
|
|
__m256 filterFuncR, filterFuncI;
|
|
filterFuncR=_mm256_set1_ps(mEffectEqualization->mFilterFuncR[0]);
|
|
localFFTBuffer[0]=_mm256_mul_ps(localBuffer[0], filterFuncR);
|
|
int halfLength=(len/2);
|
|
|
|
bool useBitReverseTable=sMathPath&1;
|
|
|
|
for(i=1; i<halfLength; i++)
|
|
{
|
|
if(useBitReverseTable) {
|
|
real256=localBuffer[mEffectEqualization->hFFT->BitReversed[i] ];
|
|
imag256=localBuffer[mEffectEqualization->hFFT->BitReversed[i]+1];
|
|
} else {
|
|
int bitReversed=SmallRB(i,mEffectEqualization->hFFT->pow2Bits);
|
|
real256=localBuffer[bitReversed];
|
|
imag256=localBuffer[bitReversed+1];
|
|
}
|
|
filterFuncR=_mm256_set1_ps(mEffectEqualization->mFilterFuncR[i]);
|
|
filterFuncI=_mm256_set1_ps(mEffectEqualization->mFilterFuncI[i]);
|
|
localFFTBuffer[2*i ] = _mm256_sub_ps( _mm256_mul_ps(real256, filterFuncR), _mm256_mul_ps(imag256, filterFuncI));
|
|
localFFTBuffer[2*i+1] = _mm256_add_ps( _mm256_mul_ps(real256, filterFuncI), _mm256_mul_ps(imag256, filterFuncR));
|
|
}
|
|
// Fs/2 component is purely real
|
|
filterFuncR=_mm256_set1_ps(mEffectEqualization->mFilterFuncR[halfLength]);
|
|
localFFTBuffer[1] = _mm256_mul_ps(localBuffer[1], filterFuncR);
|
|
|
|
// Inverse FFT and normalization
|
|
InverseRealFFTf8x(scratchBuffer, mEffectEqualization->hFFT);
|
|
ReorderToTime8x(mEffectEqualization->hFFT, scratchBuffer, buffer);
|
|
}
|
|
|
|
#endif
|
|
|
|
#endif
|