1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-11-24 06:10:09 +01:00

Extensive changes to improve NoteTrack display and (some) editing, NoteTrack playback via MIDI, and Midi-to-Audio alignment.

This commit is contained in:
rbdannenberg
2010-09-18 21:02:36 +00:00
parent f6327602e8
commit a1f0e5ed5b
96 changed files with 5679 additions and 3566 deletions

View File

@@ -30,7 +30,6 @@ using namespace std;
// each row is one chroma vector,
// data is stored as an array of chroma vectors:
// vector 1, vector 2, ...
#define CHROM(row, column) AREF2((*chrom_energy), row, column)
float hz_to_step(float hz)
{
@@ -40,21 +39,19 @@ float hz_to_step(float hz)
/* GEN_MAGNITUDE
given the real and imaginary portions of a complex FFT function, compute
the magnitude of the fft bin.
given input of 2 arrays (inR and inI) of length n, takes the ith element
from each, squares them, sums them, takes the square root of the sum and
puts the output into the ith position in the array out.
NOTE: out should be length n
*/
void gen_Magnitude(float* inR,float* inI, int low, int hi, float* out)
void gen_Magnitude(float* inR, float* inI, int low, int hi, float* out)
{
int i;
for (i = low; i < hi; i++) {
float magVal = sqrt(inR[i] * inR[i] + inI[i] * inI[i]);
//printf(" %d: sqrt(%g^2+%g^2)=%g\n",i,inR[i],inI[i+1],magVal);
out[i]= magVal;
#ifdef SA_VERBOSE
if (i == 1000) printf("gen_Magnitude: %d %g\n", i, magVal);
if (i == 1000) fprintf(dbf, "gen_Magnitude: %d %g\n", i, magVal);
#endif
}
}
@@ -116,17 +113,12 @@ int min_Bin_Num(float* bins, int numBins){
applies the hamming function to each sample.
n specifies the length of in and out.
*/
void gen_Hamming(float* in, int n, float* out)
void gen_Hamming(float* h, int n)
{
int k = 0;
for(k = 0; k < n; k++) {
float internalValue = 2.0 * M_PI * k * (1.0 / (n - 1));
float cosValue = cos(internalValue);
float hammingValue = 0.54F + (-0.46F * cosValue);
#ifdef SA_VERBOSE
if (k == 1000) printf("Hamming %g\n", hammingValue);
#endif
out[k] = hammingValue * in[k];
int k;
for (k = 0; k < n; k++) {
float cos_value = (float) cos(2.0 * M_PI * k * (1.0 / n));
h[k] = 0.54F + (-0.46F * cos_value);
}
}
@@ -142,6 +134,36 @@ int nextPowerOf2(int n)
}
// normalize a chroma vector (from audio or midi) to have
// mean of 0 and std. dev. of 1
//
static void normalize(float *cv)
{
float avg = 0;
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
avg += cv[i];
}
avg /= CHROMA_BIN_COUNT;
/* Normalize this frame to avg. 0 */
for (int i = 0; i < CHROMA_BIN_COUNT; i++)
cv[i] -= avg;
/* Calculate std. dev. for this frame */
float sum = 0;
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
float x = cv[i];
sum += x * x;
}
float dev = sqrt(sum / CHROMA_BIN_COUNT);
if (dev == 0.0) dev = 1.0F; /* don't divide by zero */
/* Normalize this frame to std. dev. 1*/
for (int i = 0; i < CHROMA_BIN_COUNT; i++) cv[i] /= dev;
}
/* GEN_CHROMA_AUDIO -- compute chroma for an audio file
*/
/*
@@ -153,8 +175,8 @@ int nextPowerOf2(int n)
(aka the length of the 1st dimention of chrom_energy)
*/
int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
int lcutoff, float **chrom_energy, float *actual_frame_period,
int id, bool verbose)
int lcutoff, float **chrom_energy, double *actual_frame_period,
int id)
{
int i;
double sample_rate = reader.get_sample_rate();
@@ -165,9 +187,12 @@ int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
printf ("==============FILE %d====================\n", id);
reader.print_info();
}
// this seems like a poor way to set actual_frame_period_1 or _2 in
#if DEBUG_LOG
fprintf(dbf, "******** BEGIN AUDIO CHROMA COMPUTATION *********\n");
#endif
// this seems like a poor way to set actual_frame_period_0 or _1 in
// the Scorealign object, but I'm not sure what would be better:
*actual_frame_period = reader.actual_frame_period;
*actual_frame_period = float(reader.actual_frame_period);
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
reg11[i] = -999;
@@ -230,7 +255,7 @@ int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
// sample_rate / full_data_size);
double freq = low_bin * sample_rate / full_data_size;
for (i = low_bin; i < high_bin; i++) {
float raw_bin = hz_to_step(freq);
float raw_bin = hz_to_step(float(freq));
int round_bin = (int) (raw_bin + 0.5F);
int mod_bin = round_bin % 12;
bin_map[i] = mod_bin;
@@ -238,24 +263,35 @@ int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
}
// printf("BIN_COUNT is !!!!!!!!!!!!! %d\n",CHROMA_BIN_COUNT);
// create Hamming window data
float *hamming = ALLOC(float, reader.samples_per_frame);
gen_Hamming(hamming, reader.samples_per_frame);
while (reader.read_window(full_data)) {
//fill out array with 0's till next power of 2
#ifdef SA_VERBOSE
printf("samples_per_frame %d sample %g\n", reader.samples_per_frame,
full_data[0]);
fprintf(dbf, "samples_per_frame %d sample %g\n",
reader.samples_per_frame, full_data[0]);
#endif
for (i = reader.samples_per_frame; i < full_data_size; i++)
full_data[i] = 0;
#ifdef AS_VERBOSE
printf("preFFT: full_data[1000] %g\n", full_data[1000]);
#ifdef SA_VERBOSE
fprintf(dbf, "preFFT: full_data[1000] %g\n", full_data[1000]);
#endif
//the data from the wave file, each point mult by a hamming value
gen_Hamming(full_data, full_data_size, full_data);
// compute the RMS, then apply the Hamming window to the data
float rms = 0.0f;
for (i = 0; i < reader.samples_per_frame; i++) {
float x = full_data[i];
rms += x * x;
full_data[i] = x * hamming[i];
}
rms = sqrt(rms / reader.samples_per_frame);
#ifdef SA_VERBOSE
printf("preFFT: hammingData[1000] %g\n", full_data[1000]);
fprintf(dbf, "preFFT: hammingData[1000] %g\n",
full_data[1000]);
#endif
FFT3(full_data_size, 0, full_data, NULL, fft_dataR, fft_dataI); //fft3
@@ -322,19 +358,42 @@ int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
//put chrom energy into the returned array
#ifdef SA_VERBOSE
printf("cv_index %d\n", cv_index);
fprintf(dbf, "cv_index %d\n", cv_index);
#endif
assert(cv_index < reader.frame_count);
for (i = 0; i < CHROMA_BIN_COUNT; i++)
CHROM(cv_index, i) = binEnergy[i] / binCount[i];
float *cv = AREF1(*chrom_energy, cv_index);
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
cv[i] = binEnergy[i] / binCount[i];
}
if (rms < silence_threshold) {
// "silence" flag
cv[CHROMA_BIN_COUNT] = 1.0f;
} else {
cv[CHROMA_BIN_COUNT] = 0.0f;
// normalize the non-silent frames
normalize(cv);
}
#if DEBUG_LOG
fprintf(dbf, "%d@%g) ", cv_index, cv_index * reader.actual_frame_period);
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
fprintf(dbf, "%d:%g ", i, cv[i]);
}
fprintf(dbf, " sil?:%g\n\n", cv[CHROMA_BIN_COUNT]);
#endif
cv_index++;
if (progress && cv_index % 10 == 0 &&
!progress->set_feature_progress(
float(cv_index * reader.actual_frame_period))) {
break;
}
} // end of while ((readcount = read_mono_floats...
free(hamming);
free(fft_dataI);
free(fft_dataR);
free(full_data);
if (verbose)
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames);
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file0_frames);
return cv_index;
}
@@ -362,7 +421,7 @@ typedef Event_list *Event_list_ptr;
The chroma energy is placed in the float *chrom_energy.
this 2D is an array of pointers.
The function returns the number of frames
(aka the length of the 1st dimention of chrom_energy)
(aka the length of the 1st dimension of chrom_energy)
*
*
Notes: keep a list of notes that are sounding.
@@ -374,25 +433,33 @@ typedef Event_list *Event_list_ptr;
How many frames?
*/
int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
float **chrom_energy, float *actual_frame_period,
int id, bool verbose)
int Scorealign::gen_chroma_midi(Alg_seq &seq, float dur, int nnotes,
int hcutoff, int lcutoff,
float **chrom_energy, double *actual_frame_period,
int id)
{
// silence_threshold is compared to the *average* of chroma bins.
// Rather than divide the sum by CHROMA_BIN_COUNT to compute the
// average, just compute the sum and compare to silence_threshold * 12
float threshold = (float) (silence_threshold * CHROMA_BIN_COUNT);
if (verbose) {
printf ("==============FILE %d====================\n", id);
SA_V(seq.write(cout, true));
}
/*=============================================================*/
#if DEBUG_LOG
fprintf(dbf, "******** BEGIN MIDI CHROMA COMPUTATION *********\n");
#endif /*=============================================================*/
*actual_frame_period = (frame_period) ; // since we don't quantize to samples
*actual_frame_period = frame_period; // since we don't quantize to samples
/*=============================================================*/
seq.convert_to_seconds();
/* find duration */
float dur = 0.0F;
int nnotes = 0;
nnotes= find_midi_duration(seq, &dur);
///* find duration */
//float dur = 0.0F;
//int nnotes = 0;
//nnotes = find_midi_duration(seq, &dur);
/*================================================================*/
@@ -417,13 +484,15 @@ int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
/*====================================================*/
float frame_begin = max((cv_index * (frame_period)) -
window_size/2 , 0.0F); //chooses zero if negative
float frame_begin = (float) max(cv_index * frame_period -
window_size / 2.0, 0.0);
//chooses zero if negative
float frame_end= frame_begin +(window_size/2);
float frame_end = (float) (cv_index * frame_period + window_size / 2.0);
/*============================================================*/
float *cv = AREF1(*chrom_energy, cv_index);
/* zero the vector */
for (int i = 0; i < CHROMA_BIN_COUNT; i++) CHROM(cv_index, i) = 0;
for (int i = 0; i < CHROMA_BIN_COUNT + 1; i++) cv[i] = 0;
/* add new notes that are in the frame */
while (event && event->time < frame_end) {
if (event->is_note()) {
@@ -442,6 +511,7 @@ int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
}
if (*ptr) ptr = &((*ptr)->next);
}
float sum = 0.0;
for (Event_list_ptr item = list; item; item = item->next) {
/* compute duration of overlap */
float overlap =
@@ -450,18 +520,34 @@ int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
float velocity = item->note->loud;
float weight = overlap * velocity;
#if DEBUG_LOG
fprintf(dbf, "%3d pitch %g key %d overlap %g velocity %g\n",
cv_index, item->note->pitch, item->note->get_identifier(),
overlap, velocity);
fprintf(dbf, "%3d pitch %g starting %g key %d overlap %g velocity %g\n",
cv_index, item->note->pitch, item->note->time,
item->note->get_identifier(), overlap, velocity);
#endif
CHROM(cv_index, (int)item->note->pitch % 12) += weight;
cv[(int) item->note->pitch % 12] += weight;
sum += weight;
}
if (sum < threshold) {
cv[CHROMA_BIN_COUNT] = 1.0;
} else {
normalize(cv);
}
#if DEBUG_LOG
fprintf(dbf, "%d@%g) ", cv_index, frame_begin);
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
fprintf(dbf, "%d:%g ", i, CHROM(cv_index, i));
fprintf(dbf, "%d:%g ", i, cv[i]);
}
fprintf(dbf, "\n\n");
fprintf(dbf, " sil?:%g\n\n", cv[CHROMA_BIN_COUNT]);
#endif
if (cv_index % 10 == 0 && progress &&
!progress->set_feature_progress(
float(cv_index * *actual_frame_period))) {
break;
}
}
while (list) {
Event_list_ptr temp = list;
@@ -470,6 +556,6 @@ int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
}
iterator.end();
if (verbose)
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames);
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file0_frames);
return frame_count;
}