1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-05-03 09:09:47 +02:00

Incomplete fixes for compilation of EXPERIMENTAL_EQ_SSE_THREADED

This commit is contained in:
Paul Licameli 2017-02-24 22:20:25 -05:00
parent f061b0b853
commit dbb2f04def
5 changed files with 45 additions and 38 deletions

View File

@ -36,6 +36,7 @@
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "Audacity.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>

View File

@ -51,6 +51,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "Audacity.h"
#include "Experimental.h"
#ifdef EXPERIMENTAL_EQ_SSE_THREADED
@ -376,7 +377,7 @@ void RealFFTf1xSinCosBRTable(fft_type *buffer,HFFT h)
*(A++) = *(B++) + 2 * v2;
}
A = B;
B + =ButterfliesPerGroup * 2;
B += ButterfliesPerGroup * 2;
sptr += 2;
}
ButterfliesPerGroup >>= 1;

View File

@ -93,7 +93,7 @@ public:
};
int SmallRB(int bits, int numberBits);
int (*SmallVRB[])(int bits);
extern int (*SmallVRB[])(int bits);
#endif

View File

@ -44,7 +44,11 @@
#endif
#include <stdlib.h>
#ifdef __WXMSW__
#include <malloc.h>
#endif
#include <stdio.h>
#include <math.h>
#include <emmintrin.h>
@ -182,10 +186,11 @@ bool EffectEqualization48x::AllocateBuffersWorkers(int nThreads)
mWindowSize=mEffectEqualization->windowSize;
wxASSERT(mFilterSize < mWindowSize);
mBlockSize=mWindowSize-mFilterSize; // 12,384
mThreaded = (nThreads > 0 );
auto threadCount = wxThread::GetCPUCount();
mThreaded = (nThreads > 0 && threadCount > 0);
if(mThreaded)
{
mThreadCount=wxThread::GetCPUCount();
{
mThreadCount = threadCount;
mWorkerDataCount=mThreadCount+2; // 2 extra slots (maybe double later)
} else {
mWorkerDataCount=1;
@ -294,7 +299,7 @@ bool EffectEqualization48x::Process(EffectEqualization* effectEqualization)
if(sMathPath) // !!! Filter MUST BE QUAD WORD ALIGNED !!!!
mEffectEqualization->mM=(mEffectEqualization->mM&(~15))+1;
AllocateBuffersWorkers(sMathPath&MATH_FUNCTION_THREADED);
SelectedTrackListOfKindIterator iter(Track::Wave, mEffectEqualization->mOutputTracks);
SelectedTrackListOfKindIterator iter(Track::Wave, mEffectEqualization->mOutputTracks.get());
WaveTrack *track = (WaveTrack *) iter.First();
int count = 0;
while (track) {
@ -418,8 +423,8 @@ bool EffectEqualization48x::DeltaTrack(WaveTrack * t, WaveTrack * t2, sampleCoun
auto originalLen = len;
auto currentSample = start;
while(len) {
auto curretLength = std::min(len, trackBlockSize);
while(len > 0) {
auto curretLength = limitSampleBufferSize(trackBlockSize, len);
t->Get((samplePtr)buffer1, floatSample, currentSample, curretLength);
t2->Get((samplePtr)buffer2, floatSample, currentSample, curretLength);
for(decltype(curretLength) i=0;i<curretLength;i++)
@ -574,7 +579,7 @@ bool EffectEqualization48x::ProcessTail(WaveTrack * t, WaveTrack * output, sampl
return true;
}
bool EffectEqualization48x::ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, sampleCount bufferLength)
bool EffectEqualization48x::ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, size_t bufferLength)
{
BufferInfo bufferInfo;
bufferInfo.mContiguousBufferSize=bufferLength;
@ -589,12 +594,12 @@ bool EffectEqualization48x::ProcessBuffer1x(BufferInfo *bufferInfo)
int bufferCount=bufferInfo->mContiguousBufferSize?1:4;
for(int bufferIndex=0;bufferIndex<bufferCount;bufferIndex++)
{
int bufferLength=bufferInfo->mBufferLength;
auto bufferLength=bufferInfo->mBufferLength;
if(bufferInfo->mContiguousBufferSize)
bufferLength=bufferInfo->mContiguousBufferSize;
sampleCount blockCount=bufferLength/mBlockSize;
sampleCount lastBlockSize=bufferLength%mBlockSize;
auto blockCount=bufferLength/mBlockSize;
auto lastBlockSize=bufferLength%mBlockSize;
if(lastBlockSize)
blockCount++;
@ -602,7 +607,7 @@ bool EffectEqualization48x::ProcessBuffer1x(BufferInfo *bufferInfo)
float *scratchBuffer=&workBuffer[mWindowSize*2]; // all scratch buffers are at the end
float *sourceBuffer=bufferInfo->mBufferSouce[bufferIndex];
float *destBuffer=bufferInfo->mBufferDest[bufferIndex];
for(int runx=0;runx<blockCount;runx++)
for(size_t runx=0;runx<blockCount;runx++)
{
float *currentBuffer=&workBuffer[mWindowSize*(runx&1)];
for(int i=0;i<mBlockSize;i++)
@ -641,12 +646,12 @@ bool EffectEqualization48x::ProcessOne1x(int count, WaveTrack * t,
mEffectEqualization->TrackProgress(count, 0.0);
int subBufferSize=mBufferCount==8?(mSubBufferSize>>1):mSubBufferSize; // half the buffers if avx is active
int bigRuns=len/(subBufferSize-mBlockSize);
auto bigRuns=len/(subBufferSize-mBlockSize);
int trackBlocksPerBig=subBufferSize/trackBlockSize;
int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
int singleProcessLength;
if(!bigRuns)
singleProcessLength=len;
size_t singleProcessLength;
if(bigRuns == 0)
singleProcessLength = len.as_size_t();
else
singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize));
auto currentSample=start;
@ -665,7 +670,7 @@ bool EffectEqualization48x::ProcessOne1x(int count, WaveTrack * t,
currentSample-=mBlockSize+(mFilterSize>>1);
ProcessBuffer1x(mBufferInfo);
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/(double)bigRuns);
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/bigRuns.as_double());
if( bBreakLoop )
break;
output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, subBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
@ -673,7 +678,7 @@ bool EffectEqualization48x::ProcessOne1x(int count, WaveTrack * t,
if(singleProcessLength && !bBreakLoop) {
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
output->Append((samplePtr)&mBigBuffer[bigRuns?mBlockSize:0], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
output->Append((samplePtr)&mBigBuffer[bigRuns > 0 ? mBlockSize : 0], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
}
output->Flush();
if(!bBreakLoop)
@ -730,7 +735,7 @@ bool EffectEqualization48x::ProcessBuffer4x(BufferInfo *bufferInfo)
if(bufferInfo->mBufferLength%mBlockSize)
return false;
sampleCount blockCount=bufferInfo->mBufferLength/mBlockSize;
auto blockCount=bufferInfo->mBufferLength/mBlockSize;
__m128 *readBlocks[4]; // some temps so we dont destroy the vars in the struct
__m128 *writeBlocks[4];
@ -742,7 +747,7 @@ bool EffectEqualization48x::ProcessBuffer4x(BufferInfo *bufferInfo)
__m128 *swizzledBuffer128=(__m128 *)bufferInfo->mScratchBuffer;
__m128 *scratchBuffer=&swizzledBuffer128[mWindowSize*2];
for(int run4x=0;run4x<blockCount;run4x++)
for(size_t run4x=0;run4x<blockCount;run4x++)
{
// swizzle the data to the swizzle buffer
__m128 *currentSwizzledBlock=&swizzledBuffer128[mWindowSize*(run4x&1)];
@ -825,10 +830,10 @@ bool EffectEqualization48x::ProcessOne4x(int count, WaveTrack * t,
auto output = p->GetTrackFactory()->NewWaveTrack(floatSample, t->GetRate());
mEffectEqualization->TrackProgress(count, 0.0);
int bigRuns=len/(subBufferSize-mBlockSize);
auto bigRuns = len/(subBufferSize-mBlockSize);
int trackBlocksPerBig=subBufferSize/trackBlockSize;
int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize));
size_t singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize));
auto currentSample=start;
bool bBreakLoop = false;
@ -846,7 +851,7 @@ bool EffectEqualization48x::ProcessOne4x(int count, WaveTrack * t,
currentSample-=mBlockSize+(mFilterSize>>1);
ProcessBuffer4x(mBufferInfo);
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/(double)bigRuns);
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigRun)/bigRuns.as_double());
if( bBreakLoop )
break;
output->Append((samplePtr)&mBigBuffer[(bigRun?mBlockSize:0)+(mFilterSize>>1)], floatSample, subBufferSize-((bigRun?mBlockSize:0)+(mFilterSize>>1)));
@ -854,7 +859,7 @@ bool EffectEqualization48x::ProcessOne4x(int count, WaveTrack * t,
if(singleProcessLength && !bBreakLoop) {
t->Get((samplePtr)mBigBuffer, floatSample, currentSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
ProcessBuffer(mBigBuffer, mBigBuffer, singleProcessLength+mBlockSize+(mFilterSize>>1));
output->Append((samplePtr)&mBigBuffer[bigRuns?mBlockSize:0], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
output->Append((samplePtr)&mBigBuffer[bigRuns > 0 ? mBlockSize : 0], floatSample, singleProcessLength+mBlockSize+(mFilterSize>>1));
// output->Append((samplePtr)&mBigBuffer[bigRuns?mBlockSize:0], floatSample, singleProcessLength);
}
output->Flush();
@ -910,16 +915,16 @@ bool EffectEqualization48x::ProcessOne1x4xThreaded(int count, WaveTrack * t,
auto trackBlockSize = t->GetMaxBlockSize();
mEffectEqualization->TrackProgress(count, 0.0);
int bigRuns=len/(subBufferSize-mBlockSize);
auto bigRuns = len/(subBufferSize-mBlockSize);
int trackBlocksPerBig=subBufferSize/trackBlockSize;
int trackLeftovers=subBufferSize-trackBlocksPerBig*trackBlockSize;
int singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize));
size_t singleProcessLength=(mFilterSize>>1)*bigRuns + len%(bigRuns*(subBufferSize-mBlockSize));
auto currentSample=start;
int bigBlocksRead=mWorkerDataCount, bigBlocksWritten=0;
// fill the first workerDataCount buffers we checked above and there is at least this data
int maxPreFill=bigRuns<mWorkerDataCount?bigRuns:mWorkerDataCount;
auto maxPreFill = bigRuns < mWorkerDataCount ? bigRuns : mWorkerDataCount;
for(int i=0;i<maxPreFill;i++)
{
// fill the buffer
@ -937,7 +942,7 @@ bool EffectEqualization48x::ProcessOne1x4xThreaded(int count, WaveTrack * t,
int currentIndex=0;
bool bBreakLoop = false;
while(bigBlocksWritten<bigRuns && !bBreakLoop) {
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigBlocksWritten)/(double)bigRuns);
bBreakLoop=mEffectEqualization->TrackProgress(count, (double)(bigBlocksWritten)/bigRuns.as_double());
if( bBreakLoop )
break;
mDataMutex.Lock(); // Get in line for data
@ -1028,7 +1033,7 @@ bool EffectEqualization48x::ProcessBuffer8x(BufferInfo *bufferInfo)
if(bufferInfo->mBufferLength%mBlockSize || mBufferCount!=8)
return false;
sampleCount blockCount=bufferInfo->mBufferLength/mBlockSize;
auto blockCount=bufferInfo->mBufferLength/mBlockSize;
__m128 *readBlocks[8]; // some temps so we dont destroy the vars in the struct
__m128 *writeBlocks[8];

View File

@ -44,11 +44,11 @@ public:
BufferInfo() { mBufferLength=0; mBufferStatus=BufferEmpty; mContiguousBufferSize=0; };
float* mBufferSouce[__MAXBUFFERCOUNT];
float* mBufferDest[__MAXBUFFERCOUNT];
int mBufferLength;
size_t mBufferLength;
size_t mFftWindowSize;
size_t mFftFilterSize;
float* mScratchBuffer;
int mContiguousBufferSize;
size_t mContiguousBufferSize;
EQBufferStatus mBufferStatus;
};
@ -127,7 +127,7 @@ private:
bool ProcessTail(WaveTrack * t, WaveTrack * output, sampleCount start, sampleCount len);
bool ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, sampleCount bufferLength);
bool ProcessBuffer(fft_type *sourceBuffer, fft_type *destBuffer, size_t bufferLength);
bool ProcessBuffer1x(BufferInfo *bufferInfo);
bool ProcessOne1x(int count, WaveTrack * t, sampleCount start, sampleCount len);
void Filter1x(size_t len, float *buffer, float *scratchBuffer);
@ -145,15 +145,15 @@ private:
#endif
EffectEqualization* mEffectEqualization;
int mThreadCount;
size_t mThreadCount;
size_t mFilterSize;
size_t mBlockSize;
size_t mWindowSize;
int mBufferCount;
int mWorkerDataCount;
int mBlocksPerBuffer;
int mScratchBufferSize;
int mSubBufferSize;
size_t mWorkerDataCount;
size_t mBlocksPerBuffer;
size_t mScratchBufferSize;
size_t mSubBufferSize;
float *mBigBuffer;
BufferInfo* mBufferInfo;
wxMutex mDataMutex;