mirror of
https://github.com/cookiengineer/audacity
synced 2026-03-06 14:35:32 +01:00
Move library tree where it belongs
This commit is contained in:
475
lib-src/libscorealign/gen_chroma.cpp
Normal file
475
lib-src/libscorealign/gen_chroma.cpp
Normal file
@@ -0,0 +1,475 @@
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "malloc.h"
|
||||
#endif
|
||||
#include "stdlib.h" // for OSX compatibility, malloc.h -> stdlib.h
|
||||
#include "stdio.h"
|
||||
#include "assert.h"
|
||||
#include "string.h"
|
||||
#include "math.h"
|
||||
#include <fstream>
|
||||
#include "allegro.h"
|
||||
#include "fft3/FFT3.h"
|
||||
#include "audioreader.h"
|
||||
#include "scorealign.h"
|
||||
#include "gen_chroma.h"
|
||||
#include "comp_chroma.h"
|
||||
#include "mfmidi.h"
|
||||
#include "sautils.h"
|
||||
#ifdef SA_VERBOSE
|
||||
#include <iostream> // cout
|
||||
#endif
|
||||
using namespace std;
|
||||
|
||||
//if 1, causes printing internally
|
||||
#define PRINT_BIN_ENERGY 1
|
||||
|
||||
#define p1 0.0577622650466621
|
||||
#define p2 2.1011784386926213
|
||||
|
||||
// each row is one chroma vector,
|
||||
// data is stored as an array of chroma vectors:
|
||||
// vector 1, vector 2, ...
|
||||
#define CHROM(row, column) AREF2((*chrom_energy), row, column)
|
||||
|
||||
float hz_to_step(float hz)
|
||||
{
|
||||
return float((log(hz) - p2) / p1);
|
||||
}
|
||||
|
||||
/* GEN_MAGNITUDE
|
||||
given the real and imaginary portions of a complex FFT function, compute
|
||||
the magnitude of the fft bin.
|
||||
given input of 2 arrays (inR and inI) of length n, takes the ith element
|
||||
from each, squares them, sums them, takes the square root of the sum and
|
||||
puts the output into the ith position in the array out.
|
||||
|
||||
NOTE: out should be length n
|
||||
*/
|
||||
void gen_Magnitude(float* inR,float* inI, int low, int hi, float* out)
|
||||
{
|
||||
int i;
|
||||
for (i = low; i < hi; i++) {
|
||||
float magVal = sqrt(inR[i] * inR[i] + inI[i] * inI[i]);
|
||||
//printf(" %d: sqrt(%g^2+%g^2)=%g\n",i,inR[i],inI[i+1],magVal);
|
||||
out[i]= magVal;
|
||||
#ifdef SA_VERBOSE
|
||||
if (i == 1000) printf("gen_Magnitude: %d %g\n", i, magVal);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* PRINT_BINS
|
||||
This function is intended for debugging purposes.
|
||||
pass in an array representing the "mid point"
|
||||
of each bin, and the number of bins. The
|
||||
function will print out:
|
||||
i value
|
||||
index falue
|
||||
low range of the bin
|
||||
middle of the bin
|
||||
high range of the bin
|
||||
*/
|
||||
void print_Bins(float* bins, int numBins){
|
||||
printf("BINS: \n");
|
||||
int i;
|
||||
for (i=0; i<numBins; i++) {
|
||||
int index = i % numBins;
|
||||
int indexNext = (index + 1) % numBins;
|
||||
int indexPrev = (index - 1) % numBins;
|
||||
|
||||
float maxValue =(bins[index]+bins[indexNext])/2;
|
||||
float minValue=(bins[index]+bins[indexPrev])/2;
|
||||
|
||||
if(index == 1)
|
||||
maxValue =bins[index]+(bins[index]-((bins[index]+bins[indexPrev])/2));
|
||||
if(index == 2)
|
||||
minValue =bins[index]-(((bins[index]+bins[indexNext])/2)-bins[index]);
|
||||
|
||||
printf("%d (%d) %g||%g||%g\n",i,index,minValue,bins[i],maxValue);
|
||||
}
|
||||
}
|
||||
|
||||
/* MIN_BIN_NUM
|
||||
Returns the index in the array of bins
|
||||
of the "smallest" bin. aka, the bin
|
||||
whose midpoint is the smallest.
|
||||
*/
|
||||
int min_Bin_Num(float* bins, int numBins){
|
||||
|
||||
int i;
|
||||
int minIndex=0;
|
||||
float minValue=bins[0];
|
||||
for (i = 0; i < numBins; i++) {
|
||||
if (minValue > bins[i]) {
|
||||
minValue = bins[i];
|
||||
minIndex = i;
|
||||
}
|
||||
}
|
||||
return minIndex;
|
||||
}
|
||||
|
||||
|
||||
/* GEN_HAMMING
|
||||
given data from reading in a section of a sound file
|
||||
applies the hamming function to each sample.
|
||||
n specifies the length of in and out.
|
||||
*/
|
||||
void gen_Hamming(float* in, int n, float* out)
|
||||
{
|
||||
int k = 0;
|
||||
for(k = 0; k < n; k++) {
|
||||
float internalValue = 2.0 * M_PI * k * (1.0 / (n - 1));
|
||||
float cosValue = cos(internalValue);
|
||||
float hammingValue = 0.54F + (-0.46F * cosValue);
|
||||
#ifdef SA_VERBOSE
|
||||
if (k == 1000) printf("Hamming %g\n", hammingValue);
|
||||
#endif
|
||||
out[k] = hammingValue * in[k];
|
||||
}
|
||||
}
|
||||
|
||||
/* NEXTPOWEROF2
|
||||
given an int n, finds the next power of 2 larger than
|
||||
or equal to n.
|
||||
*/
|
||||
int nextPowerOf2(int n)
|
||||
{
|
||||
int result = 1;
|
||||
while (result < n) result = (result << 1);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* GEN_CHROMA_AUDIO -- compute chroma for an audio file
|
||||
*/
|
||||
/*
|
||||
generates the chroma energy for a given sequence
|
||||
with a low cutoff and high cutoff.
|
||||
The chroma energy is placed in the float *chrom_energy.
|
||||
this 2D is an array of pointers.
|
||||
The function returns the number of frames
|
||||
(aka the length of the 1st dimention of chrom_energy)
|
||||
*/
|
||||
int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
|
||||
int lcutoff, float **chrom_energy, float *actual_frame_period,
|
||||
int id, bool verbose)
|
||||
{
|
||||
int i;
|
||||
double sample_rate = reader.get_sample_rate();
|
||||
float reg11[CHROMA_BIN_COUNT]; // temp storage1;
|
||||
float reg12[CHROMA_BIN_COUNT]; // temp storage2;
|
||||
|
||||
if (verbose) {
|
||||
printf ("==============FILE %d====================\n", id);
|
||||
reader.print_info();
|
||||
}
|
||||
// this seems like a poor way to set actual_frame_period_1 or _2 in
|
||||
// the Scorealign object, but I'm not sure what would be better:
|
||||
*actual_frame_period = reader.actual_frame_period;
|
||||
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
reg11[i] = -999;
|
||||
}
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++){
|
||||
reg12[i] = 0;
|
||||
}
|
||||
|
||||
/*=============================================================*/
|
||||
|
||||
// allocate some buffers for use in the loop
|
||||
int full_data_size = nextPowerOf2(reader.samples_per_frame);
|
||||
if (verbose) {
|
||||
printf(" samples per frame is %d \n", reader.samples_per_frame);
|
||||
printf(" total chroma frames %d\n", reader.frame_count);
|
||||
// printf(" Window size %g second \n", reader.window_size);
|
||||
printf(" hopsize in samples %d \n", reader.hop_samples);
|
||||
printf(" fft size %d\n", full_data_size);
|
||||
}
|
||||
|
||||
float *full_data = ALLOC(float, full_data_size);
|
||||
float *fft_dataR = ALLOC(float, full_data_size);
|
||||
float *fft_dataI = ALLOC(float, full_data_size);
|
||||
//set to zero
|
||||
memset(full_data, 0, full_data_size * sizeof(float));
|
||||
memset(fft_dataR, 0, full_data_size * sizeof(float));
|
||||
memset(fft_dataI, 0, full_data_size * sizeof(float));
|
||||
//check to see if memory has been allocated
|
||||
assert(full_data != NULL);
|
||||
assert(fft_dataR != NULL);
|
||||
assert(fft_dataI != NULL);
|
||||
|
||||
int *bin_map = ALLOC(int, full_data_size);
|
||||
|
||||
//set up the chrom_energy array;
|
||||
*chrom_energy = ALLOC(float, reader.frame_count * (CHROMA_BIN_COUNT + 1));
|
||||
int cv_index = 0;
|
||||
|
||||
// set up mapping from spectral bins to chroma bins
|
||||
// ordinarily, we would add 0.5 to round to nearest bin, but we also
|
||||
// want to subtract 0.5 because the bin has a width of +/- 0.5. These
|
||||
// two cancel out, so we can just round down and get the right answer.
|
||||
int num_bins_to_use = (int) (hcutoff * full_data_size / sample_rate);
|
||||
// But then we want to add 1 because the loops will only go to
|
||||
// high_bin - 1:
|
||||
int high_bin = min(num_bins_to_use + 1, full_data_size);
|
||||
//printf("center freq of high bin is %g\n", (high_bin - 1) * sample_rate /
|
||||
// full_data_size);
|
||||
//printf("high freq of high bin is %g\n",
|
||||
// (high_bin - 1 + 0.5) * sample_rate / full_data_size);
|
||||
// If we add 0.5, we'll round to nearest bin center frequency, but
|
||||
// bin covers a frequency range that goes 0.5 bin width lower, so we
|
||||
// add 1 before rounding.
|
||||
int low_bin = (int) (lcutoff * full_data_size / sample_rate);
|
||||
//printf("center freq of low bin is %g\n", low_bin * sample_rate /
|
||||
// full_data_size);
|
||||
//printf("low freq of low bin is %g\n", (low_bin - 0.5) * sample_rate /
|
||||
// full_data_size);
|
||||
//printf("frequency spacing of bins is %g\n",
|
||||
// sample_rate / full_data_size);
|
||||
double freq = low_bin * sample_rate / full_data_size;
|
||||
for (i = low_bin; i < high_bin; i++) {
|
||||
float raw_bin = hz_to_step(freq);
|
||||
int round_bin = (int) (raw_bin + 0.5F);
|
||||
int mod_bin = round_bin % 12;
|
||||
bin_map[i] = mod_bin;
|
||||
freq += sample_rate / full_data_size;
|
||||
}
|
||||
// printf("BIN_COUNT is !!!!!!!!!!!!! %d\n",CHROMA_BIN_COUNT);
|
||||
|
||||
while (reader.read_window(full_data)) {
|
||||
//fill out array with 0's till next power of 2
|
||||
#ifdef SA_VERBOSE
|
||||
printf("samples_per_frame %d sample %g\n", reader.samples_per_frame,
|
||||
full_data[0]);
|
||||
#endif
|
||||
for (i = reader.samples_per_frame; i < full_data_size; i++)
|
||||
full_data[i] = 0;
|
||||
|
||||
#ifdef AS_VERBOSE
|
||||
printf("preFFT: full_data[1000] %g\n", full_data[1000]);
|
||||
#endif
|
||||
|
||||
//the data from the wave file, each point mult by a hamming value
|
||||
gen_Hamming(full_data, full_data_size, full_data);
|
||||
|
||||
#ifdef SA_VERBOSE
|
||||
printf("preFFT: hammingData[1000] %g\n", full_data[1000]);
|
||||
#endif
|
||||
FFT3(full_data_size, 0, full_data, NULL, fft_dataR, fft_dataI); //fft3
|
||||
|
||||
//given the fft, compute the energy of each point
|
||||
gen_Magnitude(fft_dataR, fft_dataI, low_bin, high_bin, full_data);
|
||||
|
||||
/*-------------------------------------
|
||||
GENERATE BINS AND PUT
|
||||
THE CORRECT ENERGY IN
|
||||
EACH BIN, CORRESPONDING
|
||||
TO THE CORRECT PITCH
|
||||
-------------------------------------*/
|
||||
|
||||
float binEnergy[CHROMA_BIN_COUNT];
|
||||
int binCount[CHROMA_BIN_COUNT];
|
||||
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
binCount[i] = 0;
|
||||
binEnergy[i] = 0.0;
|
||||
}
|
||||
|
||||
for (i = low_bin; i < high_bin; i++) {
|
||||
int mod_bin = bin_map[i];
|
||||
binEnergy[mod_bin] += full_data[i];
|
||||
binCount[mod_bin]++;
|
||||
}
|
||||
|
||||
/*-------------------------------------
|
||||
END OF BIN GENERATION
|
||||
-------------------------------------*/
|
||||
/* THE FOLLOWING LOOKS LIKE SOME OLD CODE TO COMPUTE
|
||||
* CHROMA FLUX, BUT IT IS NOT IN USE NOW
|
||||
|
||||
if (PRINT_BIN_ENERGY) {
|
||||
float mao1;
|
||||
float sum=0.;
|
||||
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
reg12[i]=binEnergy[i] / binCount[i];
|
||||
}
|
||||
|
||||
if (reg11[0]==-999){
|
||||
printf("Chroma Flux \n\n");
|
||||
} else {
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
}
|
||||
for (int k = 0; k < CHROMA_BIN_COUNT; k++) {
|
||||
float x = reg11[k];
|
||||
float y = reg12[k];
|
||||
float diff = x - y;
|
||||
sum += diff * diff;
|
||||
}
|
||||
mao1 = sqrt(sum);
|
||||
sequence++;
|
||||
sum = 0.;
|
||||
mao1 = 0.;
|
||||
}
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
reg11[i]=reg12[i];
|
||||
}
|
||||
//fclose(Pointer);
|
||||
}
|
||||
*/
|
||||
//put chrom energy into the returned array
|
||||
|
||||
#ifdef SA_VERBOSE
|
||||
printf("cv_index %d\n", cv_index);
|
||||
#endif
|
||||
assert(cv_index < reader.frame_count);
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++)
|
||||
CHROM(cv_index, i) = binEnergy[i] / binCount[i];
|
||||
cv_index++;
|
||||
} // end of while ((readcount = read_mono_floats...
|
||||
|
||||
free(fft_dataI);
|
||||
free(fft_dataR);
|
||||
free(full_data);
|
||||
if (verbose)
|
||||
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames);
|
||||
return cv_index;
|
||||
}
|
||||
|
||||
|
||||
class Event_list {
|
||||
public:
|
||||
Alg_note_ptr note;
|
||||
Event_list *next;
|
||||
|
||||
Event_list(Alg_event_ptr event_, Event_list *next_) {
|
||||
note = (Alg_note_ptr) event_;
|
||||
next = next_;
|
||||
}
|
||||
|
||||
~Event_list() {
|
||||
}
|
||||
};
|
||||
typedef Event_list *Event_list_ptr;
|
||||
|
||||
|
||||
/* gen_chroma_midi -- generate chroma vectors for midi file */
|
||||
/*
|
||||
generates the chroma energy for a given sequence
|
||||
with a low cutoff and high cutoff.
|
||||
The chroma energy is placed in the float *chrom_energy.
|
||||
this 2D is an array of pointers.
|
||||
The function returns the number of frames
|
||||
(aka the length of the 1st dimention of chrom_energy)
|
||||
*
|
||||
*
|
||||
Notes: keep a list of notes that are sounding.
|
||||
For each frame,
|
||||
zero the vector
|
||||
while next note starts before end of frame, insert note in list
|
||||
for each note in list, compute weight and add to vector. Remove
|
||||
if note ends before frame start time.
|
||||
How many frames?
|
||||
*/
|
||||
|
||||
int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
|
||||
float **chrom_energy, float *actual_frame_period,
|
||||
int id, bool verbose)
|
||||
{
|
||||
if (verbose) {
|
||||
printf ("==============FILE %d====================\n", id);
|
||||
SA_V(seq.write(cout, true));
|
||||
}
|
||||
/*=============================================================*/
|
||||
|
||||
*actual_frame_period = (frame_period) ; // since we don't quantize to samples
|
||||
|
||||
/*=============================================================*/
|
||||
|
||||
seq.convert_to_seconds();
|
||||
/* find duration */
|
||||
float dur = 0.0F;
|
||||
int nnotes = 0;
|
||||
nnotes= find_midi_duration(seq, &dur);
|
||||
|
||||
/*================================================================*/
|
||||
|
||||
int frame_count= (int)ceil(((float)dur/ frame_period + 1));
|
||||
|
||||
/*================================================================*/
|
||||
|
||||
if (verbose) {
|
||||
printf(" note count = %d\n", nnotes);
|
||||
printf(" duration in sec = %f\n", dur);
|
||||
printf(" chroma frames %d\n", frame_count);
|
||||
}
|
||||
|
||||
//set up the chrom_energy array;
|
||||
(*chrom_energy) = ALLOC(float, frame_count * (CHROMA_BIN_COUNT + 1));
|
||||
Event_list_ptr list = NULL;
|
||||
Alg_iterator iterator(&seq, false);
|
||||
iterator.begin();
|
||||
Alg_event_ptr event = iterator.next();
|
||||
int cv_index;
|
||||
for (cv_index = 0; cv_index < frame_count; cv_index++) {
|
||||
|
||||
/*====================================================*/
|
||||
|
||||
float frame_begin = max((cv_index * (frame_period)) -
|
||||
window_size/2 , 0.0F); //chooses zero if negative
|
||||
|
||||
float frame_end= frame_begin +(window_size/2);
|
||||
/*============================================================*/
|
||||
/* zero the vector */
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++) CHROM(cv_index, i) = 0;
|
||||
/* add new notes that are in the frame */
|
||||
while (event && event->time < frame_end) {
|
||||
if (event->is_note()) {
|
||||
list = new Event_list(event, list);
|
||||
}
|
||||
event = iterator.next();
|
||||
}
|
||||
/* remove notes that are no longer sounding */
|
||||
Event_list_ptr *ptr = &list;
|
||||
while (*ptr) {
|
||||
while ((*ptr) &&
|
||||
(*ptr)->note->time + (*ptr)->note->dur < frame_begin) {
|
||||
Event_list_ptr temp = *ptr;
|
||||
*ptr = (*ptr)->next;
|
||||
delete temp;
|
||||
}
|
||||
if (*ptr) ptr = &((*ptr)->next);
|
||||
}
|
||||
for (Event_list_ptr item = list; item; item = item->next) {
|
||||
/* compute duration of overlap */
|
||||
float overlap =
|
||||
min(frame_end, (float) (item->note->time + item->note->dur)) -
|
||||
max(frame_begin, (float) item->note->time);
|
||||
float velocity = item->note->loud;
|
||||
float weight = overlap * velocity;
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "%3d pitch %g key %d overlap %g velocity %g\n",
|
||||
cv_index, item->note->pitch, item->note->get_identifier(),
|
||||
overlap, velocity);
|
||||
#endif
|
||||
CHROM(cv_index, (int)item->note->pitch % 12) += weight;
|
||||
}
|
||||
#if DEBUG_LOG
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
fprintf(dbf, "%d:%g ", i, CHROM(cv_index, i));
|
||||
}
|
||||
fprintf(dbf, "\n\n");
|
||||
#endif
|
||||
}
|
||||
while (list) {
|
||||
Event_list_ptr temp = list;
|
||||
list = list->next;
|
||||
delete temp;
|
||||
}
|
||||
iterator.end();
|
||||
if (verbose)
|
||||
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames);
|
||||
return frame_count;
|
||||
}
|
||||
Reference in New Issue
Block a user