From ce92dce8562e2b407853f047ca4578b48ab66f73 Mon Sep 17 00:00:00 2001 From: Darrell Walisser Date: Sun, 28 Aug 2016 13:00:44 -0400 Subject: [PATCH 1/2] improved OpenMP SpecCache::Populate --- src/WaveClip.cpp | 78 +++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 44 deletions(-) diff --git a/src/WaveClip.cpp b/src/WaveClip.cpp index 47913e04d..f993a91a2 100644 --- a/src/WaveClip.cpp +++ b/src/WaveClip.cpp @@ -43,7 +43,6 @@ #include "Experimental.h" -//#undef _OPENMP #ifdef _OPENMP #include #else @@ -930,8 +929,18 @@ bool SpecCache::CalculateOneSpectrum int correctedX = (floor(0.5 + xx + timeCorrection * pixelsPerSecond / rate)); if (correctedX >= lowerBoundX && correctedX < upperBoundX) - result = true, - out[half * correctedX + bin] += power; + { + result = true; + + int index = half * correctedX + bin; +#ifdef _OPENMP + // This assignment can race if index reaches into another thread's bins. + // The probability of a race very low, so this carries little overhead, + // about 5% slower vs allowing it to race. + #pragma omp atomic +#endif + out[index] += power; + } } } } @@ -990,29 +999,6 @@ void SpecCache::Populate if (!autocorrelation) ComputeSpectrogramGainFactors(fftLen, rate, frequencyGain, gainFactors); -#ifdef _OPENMP - // todo: query # of threads or make it a setting - const int numThreads = 8; - omp_set_num_threads(numThreads); - - // We need certain per-thread data for thread safety - // Assumes WaveTrackCache is reentrant since it takes a const* to WaveTrack - struct { - WaveTrackCache* cache; - float* scratch; - } threadLocalStorage[numThreads]; - - // May as well use existing data for one of the threads - assert(numThreads > 0); - threadLocalStorage[0].cache = &waveTrackCache; - threadLocalStorage[0].scratch = &scratch[0]; - - for (int i = 1; i < numThreads; i++) { - threadLocalStorage[i].cache = new WaveTrackCache( waveTrackCache.GetTrack() ); - threadLocalStorage[i].scratch = new float[scratchSize]; - } -#endif - // Loop over the ranges before and after the copied portion and compute anew. // One of the ranges may be empty. for (int jj = 0; jj < 2; ++jj) { @@ -1020,24 +1006,36 @@ void SpecCache::Populate const int upperBoundX = jj == 0 ? copyBegin : numPixels; #ifdef _OPENMP - #pragma omp parallel for + // Storage for mutable per-thread data. + // private clause ensures one copy per thread + struct ThreadLocalStorage { + ThreadLocalStorage() { cache = nullptr; } + ~ThreadLocalStorage() { delete cache; } + + void init(WaveTrackCache &waveTrackCache, size_t scratchSize) { + if (!cache) { + cache = new WaveTrackCache(waveTrackCache.GetTrack()); + scratch.reserve(scratchSize); + } + } + WaveTrackCache* cache; + std::vector scratch; + } tls; + + #pragma omp parallel for private(tls) #endif for (auto xx = lowerBoundX; xx < upperBoundX; ++xx) { #ifdef _OPENMP - int threadNum = omp_get_thread_num(); - - assert(threadNum >=0 && threadNum < numThreads); - - WaveTrackCache* cache = threadLocalStorage[threadNum].cache; - float* buffer = threadLocalStorage[threadNum].scratch; + tls.init(waveTrackCache, scratchSize); + WaveTrackCache& cache = *tls.cache; + float* buffer = &tls.scratch[0]; #else - WaveTrackCache* cache = &waveTrackCache; + WaveTrackCache& cache = waveTrackCache; float* buffer = &scratch[0]; #endif - CalculateOneSpectrum( - settings, *cache, xx, numSamples, + settings, cache, xx, numSamples, offset, rate, pixelsPerSecond, lowerBoundX, upperBoundX, gainFactors, buffer, &freq[0]); @@ -1098,14 +1096,6 @@ void SpecCache::Populate } } } - -#ifdef _OPENMP - for (int i = 1; i < numThreads; i++) - { - delete[] threadLocalStorage[i].scratch; - delete threadLocalStorage[i].cache; - } -#endif } bool WaveClip::GetSpectrogram(WaveTrackCache &waveTrackCache, From 180da5a7696281832e84e88d898d1c12f97b6f5c Mon Sep 17 00:00:00 2001 From: Darrell Walisser Date: Wed, 31 Aug 2016 11:22:29 -0400 Subject: [PATCH 2/2] change reserve to resize, clarify what omp atomic is used --- src/WaveClip.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/WaveClip.cpp b/src/WaveClip.cpp index f993a91a2..ccd1c1771 100644 --- a/src/WaveClip.cpp +++ b/src/WaveClip.cpp @@ -937,7 +937,7 @@ bool SpecCache::CalculateOneSpectrum // This assignment can race if index reaches into another thread's bins. // The probability of a race very low, so this carries little overhead, // about 5% slower vs allowing it to race. - #pragma omp atomic + #pragma omp atomic update #endif out[index] += power; } @@ -1015,7 +1015,7 @@ void SpecCache::Populate void init(WaveTrackCache &waveTrackCache, size_t scratchSize) { if (!cache) { cache = new WaveTrackCache(waveTrackCache.GetTrack()); - scratch.reserve(scratchSize); + scratch.resize(scratchSize); } } WaveTrackCache* cache;