mirror of
https://github.com/cookiengineer/audacity
synced 2025-05-04 17:49:45 +02:00
105 lines
3.5 KiB
C++
105 lines
3.5 KiB
C++
/***************************************************************************
|
|
|
|
Audacity: A Digtial Audio Editor
|
|
|
|
VoiceKey.h: a class implementing a voice key
|
|
|
|
(c) 2002-2005 Shane T. Mueller
|
|
Distributed under the terms of the GPL Version 2 or later.
|
|
|
|
***************************************************************************/
|
|
#ifndef __AUDACITY_VOICEKEY__
|
|
#define __AUDACITY_VOICEKEY__
|
|
|
|
|
|
#ifndef M_PI
|
|
#define M_PI 3.14159265358979323846 /* pi */
|
|
#endif
|
|
|
|
#include "audacity/Types.h"
|
|
|
|
class WaveTrack;
|
|
|
|
enum VoiceKeyTypes
|
|
{
|
|
VKT_NONE = 0,
|
|
VKT_ENERGY = 1,
|
|
VKT_SIGN_CHANGES_LOW = 2,
|
|
VKT_SIGN_CHANGES_HIGH = 4,
|
|
VKT_DIRECTION_CHANGES_LOW = 8,
|
|
VKT_DIRECTION_CHANGES_HIGH = 16
|
|
};
|
|
|
|
class VoiceKey {
|
|
|
|
public:
|
|
VoiceKey();
|
|
~VoiceKey();
|
|
sampleCount OnForward (const WaveTrack & t, sampleCount start, sampleCount len);
|
|
sampleCount OnBackward (const WaveTrack & t, sampleCount start, sampleCount len);
|
|
sampleCount OffForward (const WaveTrack & t, sampleCount start, sampleCount len);
|
|
sampleCount OffBackward (const WaveTrack & t, sampleCount start, sampleCount len);
|
|
|
|
void CalibrateNoise(const WaveTrack & t, sampleCount start, sampleCount len);
|
|
void AdjustThreshold(double t);
|
|
|
|
|
|
bool AboveThreshold(const WaveTrack & t, sampleCount start,sampleCount len);
|
|
|
|
void SetKeyType(bool erg, bool scLow, bool scHigh,
|
|
bool dcLow, bool dcHigh);
|
|
|
|
private:
|
|
|
|
double mWindowSize; //Size of analysis window, in milliseconds
|
|
|
|
double mThresholdAdjustment; //User-accessible sensitivity calibration variable
|
|
|
|
double mEnergyMean;
|
|
double mEnergySD;
|
|
double mSignChangesMean;
|
|
double mSignChangesSD;
|
|
double mDirectionChangesMean;
|
|
double mDirectionChangesSD;
|
|
|
|
double mThresholdEnergy; // Threshold[*] is equal to [*]Mean + [*]SD * ThresholdAdjustment
|
|
double mThresholdSignChangesLower;
|
|
double mThresholdSignChangesUpper;
|
|
double mThresholdDirectionChangesLower;
|
|
double mThresholdDirectionChangesUpper;
|
|
|
|
//These determine which statistics should be used.
|
|
bool mUseEnergy;
|
|
bool mUseSignChangesLow;
|
|
bool mUseSignChangesHigh;
|
|
bool mUseDirectionChangesLow;
|
|
bool mUseDirectionChangesHigh;
|
|
|
|
|
|
double mSilentWindowSize; //Time in milliseconds of below-threshold windows required for silence
|
|
double mSignalWindowSize; //Time in milliseconds of above-threshold windows required for speech
|
|
|
|
double TestEnergy (const WaveTrack & t, sampleCount start,sampleCount len);
|
|
double TestSignChanges (
|
|
const WaveTrack & t, sampleCount start, sampleCount len);
|
|
double TestDirectionChanges(
|
|
const WaveTrack & t, sampleCount start, sampleCount len);
|
|
|
|
void TestEnergyUpdate (double & prevErg, int length, const float & drop, const float & add);
|
|
void TestSignChangesUpdate(double & currentsignchanges,int length, const float & a1,
|
|
const float & a2, const float & z1, const float & z2);
|
|
void TestDirectionChangesUpdate(double & currentdirectionchanges,int length,
|
|
int & atrend, const float & a1, const float & a2,
|
|
int & ztrend, const float & z1, const float & z2);
|
|
|
|
};
|
|
|
|
|
|
inline int sgn(int number){ return (number<0) ? -1: 1;}
|
|
|
|
//This returns a logistic density based on a z-score
|
|
// a logistic distn has variance (pi*s)^2/3
|
|
|
|
//inline float inline float logistic(float z){ return fexp(-1 * z/(pi / sqrt(3)) / (1 + pow(fexp(-1 * z(pi / sqrt(3))),2)));}
|
|
#endif
|