1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-08-10 17:11:17 +02:00

Some rewriting in currently unused VoiceKey...

... involving distinction of size_t and sampleCount
This commit is contained in:
Paul Licameli 2016-08-21 11:55:00 -04:00
parent fc584fd2cf
commit d82e87cac3

View File

@ -39,7 +39,8 @@ using std::endl;
VoiceKey::VoiceKey(){
VoiceKey::VoiceKey()
{
mWindowSize = 0.01; //size of analysis window in seconds
@ -66,7 +67,8 @@ VoiceKey::VoiceKey(){
};
VoiceKey::~VoiceKey(){
VoiceKey::~VoiceKey()
{
};
@ -83,7 +85,8 @@ VoiceKey::~VoiceKey(){
//Move forward to find an ON region.
sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount len) {
sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount len)
{
if((mWindowSize) >= len+10){
@ -95,7 +98,7 @@ sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount l
wxMessageBox(_("Selection is too small to use voice key."));
return start;
}
else{
else {
sampleCount lastsubthresholdsample; // keeps track of the sample number of the last sample to not exceed the threshold
@ -104,9 +107,8 @@ sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount l
unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
unsigned int SignalWindowSizeInt = (unsigned int)(rate * mSignalWindowSize); //This much signal is necessary to trip key
int samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
lastsubthresholdsample = start; //start this off at the selection start
unsigned int i; //iterates through waveblock
int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
int blocksize; //The final block may be smaller than WindowSizeInt, so use this
@ -116,13 +118,14 @@ sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount l
//go through one sample at a time.
//If there are fewer than 10 samples leftover, don't bother.
for(i = start; samplesleft >=10; i+=(WindowSizeInt-1) , samplesleft -= (WindowSizeInt -1)){
for(auto i = start; samplesleft >= 10;
i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
//Set blocksize so that it is the right size
if((unsigned int)samplesleft < WindowSizeInt){
blocksize = samplesleft;
}
else{
else {
blocksize = WindowSizeInt;
}
@ -130,7 +133,7 @@ sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount l
if(AboveThreshold(t,i,blocksize))
{
blockruns++; //Hit
} else{
} else {
blockruns=0; //Miss--start over
lastsubthresholdsample = i;
}
@ -142,17 +145,17 @@ sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount l
}
//Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
if(samplesleft > 10){
if(samplesleft > 10) {
//Calculate how many to scan through--we only have to go through (at most)
//the first window + 1 samples--but we need another window samples to draw from.
samplesleft = 2*WindowSizeInt+1;
auto remaining = 2*WindowSizeInt+1;
//To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
//Only go through the first SignalWindowSizeInt samples, and choose the first that trips the key.
float *buffer = new float[samplesleft];
t.Get((samplePtr)buffer, floatSample,lastsubthresholdsample,samplesleft);
float *buffer = new float[remaining];
t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample, remaining);
@ -178,7 +181,8 @@ sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount l
//Now, go through the sound again, sample by sample.
for(i=0; i<SignalWindowSizeInt-WindowSizeInt;i++){
size_t i;
for(i = 0; i < SignalWindowSizeInt - WindowSizeInt; i++) {
int tests = 0;
int testThreshold = 0;
@ -227,9 +231,9 @@ sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount l
//When we get here, i+lastsubthresholdsample is the best guess for where the word starts
delete [] buffer;
return i+lastsubthresholdsample;
return i + lastsubthresholdsample;
}
else{
else {
//If we failed to find anything, return the start position
return start ;
}
@ -237,7 +241,8 @@ sampleCount VoiceKey::OnForward (WaveTrack & t, sampleCount start, sampleCount l
}
//Move backward from end to find an ON region.
sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount len) {
sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount len)
{
if((mWindowSize) >= len+10){
@ -245,7 +250,7 @@ sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount le
wxMessageBox(_("Selection is too small to use voice key."));
return end;
}
else{
else {
sampleCount lastsubthresholdsample; // keeps track of the sample number of the last sample to not exceed the threshold
@ -254,9 +259,8 @@ sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount le
unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
//unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
int samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
lastsubthresholdsample = end; //start this off at the end
unsigned int i; //iterates through waveblock
int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
int blocksize; //The final block may be smaller than WindowSizeInt, so use this
@ -265,13 +269,14 @@ sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount le
//of above-threshold blocks occur, we return to the last sub-threshold block and
//go through one sample at a time.
//If there are fewer than 10 samples leftover, don't bother.
for(i = end - WindowSizeInt; samplesleft >=10; i-=(WindowSizeInt-1) , samplesleft -= (WindowSizeInt -1)){
for(auto i = end - WindowSizeInt; samplesleft >= 10;
i -= (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
//Set blocksize so that it is the right size
if(samplesleft < (int)WindowSizeInt){
blocksize = samplesleft;
}
else{
else {
blocksize = WindowSizeInt;
}
@ -294,21 +299,21 @@ sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount le
}
//Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
if(samplesleft > 10){
if(samplesleft > 10) {
//Calculate how many to scan through--we only have to go through (at most)
//the first window + 1 samples--but we need another window samples to draw from.
samplesleft = 2*WindowSizeInt+1;
auto remaining = 2*WindowSizeInt+1;
//To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
//Only go through the first mSilentWindowSizeInt samples, and choose the first that trips the key.
float *buffer = new float[samplesleft];
t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample-samplesleft,samplesleft);
float *buffer = new float[remaining];
t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample-remaining, remaining);
//Initialize these trend markers atrend and ztrend. They keep track of the
//up/down trends at the start and end of the evaluation window.
int atrend = sgn(buffer[samplesleft - 2]-buffer[samplesleft - 1]);
int ztrend = sgn(buffer[samplesleft - WindowSizeInt-2]-buffer[samplesleft - WindowSizeInt-2]);
int atrend = sgn(buffer[remaining - 2]-buffer[remaining - 1]);
int ztrend = sgn(buffer[remaining - WindowSizeInt-2]-buffer[remaining - WindowSizeInt-2]);
double erg=0;
double sc = 0;
@ -323,7 +328,8 @@ sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount le
dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
//Now, go through the sound again, sample by sample.
for(i=samplesleft-1; i>WindowSizeInt; i--){
size_t i;
for(i = remaining - 1; i > WindowSizeInt; i--) {
int tests = 0;
int testThreshold = 0;
//Update the test statistics
@ -366,9 +372,9 @@ sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount le
//When we get here, i+lastsubthresholdsample is the best guess for where the word starts
delete [] buffer;
return lastsubthresholdsample - samplesleft + i;
return lastsubthresholdsample - remaining + i;
}
else{
else {
//If we failed to find anything, return the start position
return end ;
}
@ -377,14 +383,15 @@ sampleCount VoiceKey::OnBackward (WaveTrack & t, sampleCount end, sampleCount le
//Move froward from the start to find an OFF region.
sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount len) {
sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount len)
{
if((mWindowSize) >= len+10){
wxMessageBox(_("Selection is too small to use voice key."));
return start;
}
else{
else {
sampleCount lastsubthresholdsample; // keeps track of the sample number of the last sample to not exceed the threshold
@ -394,9 +401,8 @@ sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount
unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
int samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
lastsubthresholdsample = start; //start this off at the selection start
unsigned int i; //iterates through waveblock
int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
int blocksize; //The final block may be smaller than WindowSizeInt, so use this
@ -404,13 +410,14 @@ sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount
//of above-threshold blocks occur, we return to the last sub-threshold block and
//go through one sample at a time.
//If there are fewer than 10 samples leftover, don't bother.
for(i = start; samplesleft >=10; i+=(WindowSizeInt-1) , samplesleft -= (WindowSizeInt -1)){
for(auto i = start; samplesleft >= 10;
i += (WindowSizeInt - 1) , samplesleft -= (WindowSizeInt - 1)) {
//Set blocksize so that it is the right size
if(samplesleft < (int)WindowSizeInt){
blocksize = samplesleft;
}
else{
else {
blocksize = WindowSizeInt;
}
@ -431,17 +438,17 @@ sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount
}
//Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
if(samplesleft > 10){
if(samplesleft > 10) {
//Calculate how many to scan through--we only have to go through (at most)
//the first window + 1 samples--but we need another window samples to draw from.
samplesleft = 2*WindowSizeInt+1;
auto remaining = 2*WindowSizeInt+1;
//To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
//Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
float *buffer = new float[samplesleft];
t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample,samplesleft);
float *buffer = new float[remaining];
t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample, remaining);
//Initialize these trend markers atrend and ztrend. They keep track of the
//up/down trends at the start and end of the evaluation window.
@ -462,7 +469,8 @@ sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount
dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
//Now, go through the sound again, sample by sample.
for(i=0; i<SilentWindowSizeInt-WindowSizeInt;i++){
size_t i;
for(i = 0; i < SilentWindowSizeInt - WindowSizeInt; i++) {
int tests = 0;
int testThreshold = 0;
//Update the test statistics
@ -505,9 +513,9 @@ sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount
//When we get here, i+lastsubthresholdsample is the best guess for where the word starts
delete [] buffer;
return i+lastsubthresholdsample;
return i + lastsubthresholdsample;
}
else{
else {
//If we failed to find anything, return the start position
return start ;
}
@ -516,7 +524,8 @@ sampleCount VoiceKey::OffForward (WaveTrack & t, sampleCount start, sampleCount
//Move backward from the end to find an OFF region
sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount len) {
sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount len)
{
if((mWindowSize) >= len+10){
@ -524,7 +533,7 @@ sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount l
wxMessageBox(_("Selection is too small to use voice key."));
return end;
}
else{
else {
sampleCount lastsubthresholdsample; // keeps track of the sample number of the last sample to not exceed the threshold
@ -533,9 +542,8 @@ sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount l
unsigned int WindowSizeInt = (unsigned int)(rate * mWindowSize); //Size of window to examine
//unsigned int SilentWindowSizeInt = (unsigned int)(rate * mSilentWindowSize); //This much signal is necessary to trip key
int samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
auto samplesleft = len - WindowSizeInt; //Indexes the number of samples remaining in the selection
lastsubthresholdsample = end; //start this off at the end
unsigned int i; //iterates through waveblock
int blockruns=0; //keeps track of the number of consecutive above-threshold blocks
int blocksize; //The final block may be smaller than WindowSizeInt, so use this
@ -543,13 +551,14 @@ sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount l
//of above-threshold blocks occur, we return to the last sub-threshold block and
//go through one sample at a time.
//If there are fewer than 10 samples leftover, don't bother.
for(i = end - WindowSizeInt; samplesleft >=10; i-=(WindowSizeInt-1) , samplesleft -= (WindowSizeInt -1)){
for(auto i = end - WindowSizeInt; samplesleft >= 10;
i -= (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1 )) {
//Set blocksize so that it is the right size
if(samplesleft < (int)WindowSizeInt){
blocksize = samplesleft;
}
else{
else {
blocksize = WindowSizeInt;
}
@ -572,21 +581,24 @@ sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount l
}
//Now, if we broke out early (samplesleft > 10), go back to the lastsubthresholdsample and look more carefully
if(samplesleft > 10){
if(samplesleft > 10) {
//Calculate how many to scan through--we only have to go through (at most)
//the first window + 1 samples--but we need another window samples to draw from.
samplesleft = 2*WindowSizeInt+1;
auto remaining = 2*WindowSizeInt+1;
//To speed things up, create a local buffer to store things in, to avoid the costly t.Get();
//Only go through the first SilentWindowSizeInt samples, and choose the first that trips the key.
float *buffer = new float[samplesleft];
t.Get((samplePtr)buffer, floatSample, lastsubthresholdsample-samplesleft,samplesleft);
float *buffer = new float[remaining];
t.Get((samplePtr)buffer, floatSample,
lastsubthresholdsample - remaining, remaining);
//Initialize these trend markers atrend and ztrend. They keep track of the
//up/down trends at the start and end of the evaluation window.
int atrend = sgn(buffer[samplesleft - 2]-buffer[samplesleft - 1]);
int ztrend = sgn(buffer[samplesleft - WindowSizeInt-2]-buffer[samplesleft - WindowSizeInt-2]);
//up/down trends at the start and end of the remaining window.
int atrend = sgn(buffer[remaining - 2] - buffer[remaining - 1]);
int ztrend =
sgn(buffer[remaining - WindowSizeInt - 2] -
buffer[remaining - WindowSizeInt - 2]);
double erg=0;
double sc=0;
@ -600,7 +612,8 @@ sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount l
dc = TestDirectionChanges(t,lastsubthresholdsample,WindowSizeInt);
//Now, go through the sound again, sample by sample.
for(i=samplesleft-1; i>WindowSizeInt; i--){
size_t i;
for(i = remaining - 1; i > WindowSizeInt; i--) {
int tests = 0;
int testThreshold = 0;
@ -646,9 +659,9 @@ sampleCount VoiceKey::OffBackward (WaveTrack & t, sampleCount end, sampleCount l
//When we get here, i+lastsubthresholdsample is the best guess for where the word starts
delete [] buffer;
return lastsubthresholdsample - samplesleft + i;
return lastsubthresholdsample - remaining + i;
}
else{
else {
//If we failed to find anything, return the start position
return end ;
}
@ -724,7 +737,8 @@ bool VoiceKey::AboveThreshold(WaveTrack & t, sampleCount start, sampleCount len)
//This adjusts the threshold. Larger values of t expand the noise region,
//making more things be classified as noise (and requiring a stronger signal).
void VoiceKey::AdjustThreshold(double t){
void VoiceKey::AdjustThreshold(double t)
{
mThresholdAdjustment = t;
mThresholdEnergy = mEnergyMean + mEnergySD * t;
@ -736,7 +750,8 @@ void VoiceKey::AdjustThreshold(double t){
//This 'calibrates' the voicekey to noise
void VoiceKey::CalibrateNoise(WaveTrack & t, sampleCount start, sampleCount len){
void VoiceKey::CalibrateNoise(WaveTrack & t, sampleCount start, sampleCount len)
{
//To calibrate the noise, we need to scan the sample block just like in the voicekey and
//calculate the mean and standard deviation of the test statistics.
//Then, we set the BaselineThreshold to be one
@ -779,13 +794,12 @@ void VoiceKey::CalibrateNoise(WaveTrack & t, sampleCount start, sampleCount len)
// int n = len - WindowSizeInt; //This is how many samples we have
int samplesleft = len - WindowSizeInt;
int i;
auto samplesleft = len - WindowSizeInt;
int blocksize;
int samples=0;
for(i = start; samplesleft >=10 ; i += (WindowSizeInt -1), samplesleft -= (WindowSizeInt -1) )
{
for(auto i = start; samplesleft >= 10;
i += (WindowSizeInt - 1), samplesleft -= (WindowSizeInt -1) ) {
//Take samples chunk-by-chunk.
//Normally, this should be in WindowSizeInt chunks, but at the end (if there are more than 10
//samples left) take a chunk that eats the rest of the samples.