mirror of
https://github.com/cookiengineer/audacity
synced 2025-10-10 16:43:33 +02:00
Extensive changes to improve NoteTrack display and (some) editing, NoteTrack playback via MIDI, and Midi-to-Audio alignment.
This commit is contained in:
@@ -15,7 +15,12 @@ are estimated directly from pitch data without synthesis. A similarity matrix
|
||||
is constructed and dynamic programming finds the lowest-cost path through the
|
||||
matrix.
|
||||
|
||||
(some more details should be added here about handling boundaries)
|
||||
The alignment can optionally skip the initial silence and final silence
|
||||
frames in both files. The "best" path matches from the beginning times
|
||||
(with or without silence) to the end of either sequence but not
|
||||
necessarily to the end of both. In other words, the match will match
|
||||
all of the first file to an initial segment of the second, or it will
|
||||
match all of the second to an initial segment of the first.
|
||||
|
||||
Output includes a map from one version to the other. If one file is MIDI,
|
||||
output also includes (1) an estimated transcript in ASCII format with time,
|
||||
@@ -32,10 +37,15 @@ For Windows, open score-align.vcproj (probably out of date now -- please
|
||||
|
||||
Command line parameters:
|
||||
|
||||
scorealign [-<flags> [<period><windowsize><path> <smooth><trans> <midi>]]
|
||||
scorealign [-<flags> [<period> <windowsize> <path> <smooth>
|
||||
<trans> <midi> <beatmap> <image>]]
|
||||
<file1> [<file2>]
|
||||
specifying only <file1> simply transcribes MIDI in <file1> to
|
||||
transcription.txt. Otherwise, align <file1> and <file2>.
|
||||
Flags are all listed together, e.g. -hwrstm, followed by filenames
|
||||
and arguments corresponding to the flags in the order the flags are
|
||||
given. Do not try something like "-h 0.1 -w 0.25" Instead, use
|
||||
"-hw 0.1 0.25". The flags are:
|
||||
-h 0.25 indicates a frame period of 0.25 seconds
|
||||
-w 0.25 indicates a window size of 0.25 seconds.
|
||||
-r indicates filename to write raw alignment path to (default path.data)
|
||||
@@ -44,6 +54,8 @@ scorealign [-<flags> [<period><windowsize><path> <smooth><trans> <midi>]]
|
||||
(default is transcription.txt)
|
||||
-m is filename to write the time aligned midi file (default is midi.mid)
|
||||
-b is filename to write the time aligned beat times (default is beatmap.txt)
|
||||
-i is filename to write an image of the distance matrix
|
||||
(default is distance.pnm)
|
||||
-o 2.0 indicates a smoothing window of 2.0s
|
||||
-p 3.0 means pre-smooth with a 3s window
|
||||
-x 6.0 indicates 6s line segment approximation
|
||||
@@ -80,9 +92,9 @@ linear regression values. Next, a hill-climbing search is performed to
|
||||
minimize the total distance along the path. This is like dynamic programming
|
||||
except that each line spans many frames, so the resulting path is forced to
|
||||
be fairly straight. Linear interpolation is used to estimate chroma distance
|
||||
since the lines do always pass through integer frame locations. This approach
|
||||
is probably good when the audio is known to have a steady tempo or be
|
||||
performed with tempo changes that match those in the midi file.
|
||||
since the lines do not always pass through integer frame locations. This
|
||||
approach is probably good when the audio is known to have a steady tempo or
|
||||
be performed with tempo changes that match those in the midi file.
|
||||
|
||||
Some notes on the software architecture of scorealign:
|
||||
|
||||
|
29
lib-src/libscorealign/ScoreAlignParams.h
Normal file
29
lib-src/libscorealign/ScoreAlignParams.h
Normal file
@@ -0,0 +1,29 @@
|
||||
/**********************************************************************
|
||||
|
||||
Audacity: A Digital Audio Editor
|
||||
|
||||
ScoreAlignParams.h
|
||||
|
||||
**********************************************************************/
|
||||
|
||||
#ifndef __AUDACITY_SCORE_ALIGN_PARAMS__
|
||||
#define __AUDACITY_SCORE_ALIGN_PARAMS__
|
||||
|
||||
struct ScoreAlignParams {
|
||||
double mFramePeriod;
|
||||
double mWindowSize;
|
||||
double mSilenceThreshold;
|
||||
double mForceFinalAlignment;
|
||||
double mIgnoreSilence;
|
||||
double mPresmoothTime;
|
||||
double mLineTime;
|
||||
double mSmoothTime;
|
||||
// information returned from score alignment:
|
||||
int mStatus; // wxID_OK or not?
|
||||
double mAudioStart;
|
||||
double mAudioEnd;
|
||||
double mMidiStart;
|
||||
double mMidiEnd;
|
||||
};
|
||||
|
||||
#endif
|
@@ -6,6 +6,7 @@
|
||||
#include "stdlib.h"
|
||||
#include "audioreader.h"
|
||||
#include "allegro.h"
|
||||
#include "scorealign.h"
|
||||
#include "scorealign-glue.h"
|
||||
#include "audiomixerreader.h"
|
||||
|
||||
@@ -26,7 +27,7 @@ Audio_mixer_reader::Audio_mixer_reader(void *mixer_,
|
||||
index = 0;
|
||||
channels = chans;
|
||||
sample_rate = srate;
|
||||
total_frames = end_time * srate + 0.5 /* for rounding */;
|
||||
total_frames = (long) (end_time * srate + 0.5 /* for rounding */);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -1,64 +1,15 @@
|
||||
|
||||
#include <math.h>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include "allegro.h"
|
||||
#include "audioreader.h"
|
||||
#include "scorealign.h"
|
||||
#include "gen_chroma.h"
|
||||
#include "comp_chroma.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
/* NORM_CHROMA
|
||||
*
|
||||
* This function normalizes the chroma for each frame of the
|
||||
* chrom_energy to mean 0 and std. dev. 1. But if this is a
|
||||
* "silent frame", set the 13th element to 1.
|
||||
*/
|
||||
void norm_chroma( int len, float *chrom_energy ) {
|
||||
|
||||
float avg = 0;
|
||||
float dev = 0;
|
||||
float sum = 0;
|
||||
|
||||
for( int i = 0; i < len; i++ ) {
|
||||
|
||||
/* Calculate avg for this frame */
|
||||
sum = 0;
|
||||
for ( int j = 0; j < 12; j++ )
|
||||
sum += AREF2(chrom_energy, i, j);
|
||||
avg = sum / 12.0;
|
||||
|
||||
/* Silence detection: */
|
||||
float silence = 0.0F;
|
||||
if (avg < SILENCE_THRESHOLD) { /* assume silent */
|
||||
silence = 1.0F;
|
||||
}
|
||||
AREF2(chrom_energy, i, 12) = silence;
|
||||
|
||||
// printf("avg at %g: %g\n", i * 0.25, avg);
|
||||
|
||||
/* Normalize this frame to avg. 0 */
|
||||
for ( int j = 0; j < 12; j++ )
|
||||
AREF2(chrom_energy, i, j) -= avg;
|
||||
|
||||
/* Calculate std. dev. for this frame */
|
||||
sum = 0;
|
||||
for ( int j = 0; j < 12; j++ ) {
|
||||
float x = AREF2(chrom_energy, i, j);
|
||||
sum += x * x;
|
||||
}
|
||||
dev = sqrt( sum / 12.0 );
|
||||
if (dev == 0.0) dev = 1.0F; /* don't divide by zero */
|
||||
|
||||
/* Normalize this frame to std. dev. 1*/
|
||||
for ( int j = 0; j < 12; j++ )
|
||||
AREF2(chrom_energy, i, j) /= dev;
|
||||
}
|
||||
}
|
||||
|
||||
/* Returns the minimum of two values */
|
||||
double min2( double x, double y ) {
|
||||
return (x < y ? x : y);
|
||||
}
|
||||
#define SILENCE_DISTANCE 16.0
|
||||
|
||||
/* GEN_DIST
|
||||
*
|
||||
@@ -66,27 +17,23 @@ double min2( double x, double y ) {
|
||||
* and j in two chroma vectors for use with dynamic time warping of
|
||||
* the chroma vectors.
|
||||
*/
|
||||
float gen_dist( int i, int j, float *chrom_energy1,
|
||||
float *chrom_energy2 ) {
|
||||
|
||||
float sum = 0;
|
||||
float MAX = 12.0;
|
||||
|
||||
if (AREF2(chrom_energy1, i, CHROMA_BIN_COUNT) !=
|
||||
AREF2(chrom_energy2, j, CHROMA_BIN_COUNT)) {
|
||||
//printf("gd%g ", SILENCE_DISTANCE); // print result
|
||||
return SILENCE_DISTANCE;
|
||||
}
|
||||
/* Determine the distance between these vectors
|
||||
chroma1[i] and chroma2[j] to return */
|
||||
for (int k = 0; k < 12; k++) {
|
||||
float x = AREF2(chrom_energy1, i, k);
|
||||
float y = AREF2(chrom_energy2, j, k);
|
||||
float diff = x - y;
|
||||
|
||||
sum += diff*diff ;
|
||||
}
|
||||
sum = min2( sqrt( sum ), MAX );
|
||||
//printf("gd%g ", sum); // print the result
|
||||
return sum;
|
||||
float Scorealign::gen_dist(int i, int j)
|
||||
{
|
||||
const float MAX = 12.0;
|
||||
assert(i < file0_frames);
|
||||
assert(j < file1_frames);
|
||||
float *cv0 = AREF1(chrom_energy0, i);
|
||||
float *cv1 = AREF1(chrom_energy1, j);
|
||||
if (cv0[CHROMA_BIN_COUNT] != cv1[CHROMA_BIN_COUNT]) {
|
||||
// silent frames are a (large) constant distance from non-silent frames
|
||||
return SILENCE_DISTANCE;
|
||||
}
|
||||
/* calculate the Euclidean distance between these vectors */
|
||||
float sum = 0;
|
||||
for (int k = 0; k < CHROMA_BIN_COUNT; k++) {
|
||||
float diff = cv0[k] - cv1[k];
|
||||
sum += diff * diff ;
|
||||
}
|
||||
// place a ceiling (MAX) on distance
|
||||
return min(sqrt(sum), MAX);
|
||||
}
|
||||
|
@@ -1,24 +1,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
//#include <stdio.h>
|
||||
//#include <stdlib.h>
|
||||
//#include <string.h>
|
||||
//#include <ctype.h>
|
||||
//#include <math.h>
|
||||
|
||||
#define SILENCE_THRESHOLD 0.001
|
||||
#define SILENCE_DISTANCE 16.0
|
||||
|
||||
/* NORM_CHROMA
|
||||
*
|
||||
* This function normalizes the chroma for each frame of the
|
||||
* chrom_energy to mean 0 and std. dev. 1.
|
||||
*/
|
||||
void norm_chroma( int len, float *chrom_energy );
|
||||
|
||||
/* GEN_DIST
|
||||
*
|
||||
* This function generates the Euclidean distance for points i
|
||||
* and j in two chroma vectors for use with dynamic time warping of
|
||||
* the chroma vectors.
|
||||
*/
|
||||
float gen_dist(int i, int j, float *chrom_energy1,
|
||||
float *chrom_energy2 );
|
||||
|
@@ -8,6 +8,7 @@
|
||||
*/
|
||||
|
||||
#include "assert.h"
|
||||
#include <math.h>
|
||||
#include "comp_chroma.h"
|
||||
#include "sautils.h"
|
||||
// the following are needed to get Scorealign
|
||||
@@ -48,9 +49,15 @@ void save_path(char *filename);
|
||||
|
||||
class Curvefit : public Hillclimb {
|
||||
public:
|
||||
Curvefit(Scorealign *sa_, bool verbose_) { sa = sa_; verbose = verbose_; }
|
||||
Curvefit(Scorealign *sa_, bool verbose_) {
|
||||
sa = sa_;
|
||||
verbose = verbose_;
|
||||
p1_cache = p2_cache = d_cache = x = NULL;
|
||||
}
|
||||
~Curvefit();
|
||||
virtual double evaluate();
|
||||
void setup(int n);
|
||||
void set_step_size(double ss);
|
||||
double *get_x() { return x; }
|
||||
private:
|
||||
Scorealign *sa;
|
||||
@@ -101,35 +108,41 @@ void Curvefit::setup(int segments)
|
||||
// number of parameters is greater than segments because the left
|
||||
// col of segment i is parameter i, so the right col of
|
||||
// the last segment == parameter[segments].
|
||||
n = segments + 1;
|
||||
parameters = ALLOC(double, n);
|
||||
Hillclimb::setup(segments + 1);
|
||||
p1_cache = ALLOC(double, n);
|
||||
p2_cache = ALLOC(double, n);
|
||||
d_cache = ALLOC(double, n);
|
||||
x = ALLOC(double, n);
|
||||
step_size = ALLOC(double, n);
|
||||
min_param = ALLOC(double, n);
|
||||
max_param = ALLOC(double, n);
|
||||
int i;
|
||||
// ideal frames per segment
|
||||
float seg_length = ((float) (sa->file1_frames - 1)) / segments;
|
||||
float seg_length = ((float) (sa->last_x - sa->first_x)) / segments;
|
||||
for (i = 0; i < n; i++) { // initialize cache keys to garbage
|
||||
p1_cache[i] = p2_cache[i] = -999999.99;
|
||||
// initialize x values
|
||||
x[i] = ROUND(i * seg_length);
|
||||
x[i] = ROUND(sa->first_x + i * seg_length);
|
||||
// now initialize parameters based on pathx/pathy/time_map
|
||||
// time_map has y values for each x
|
||||
parameters[i] = sa->time_map[(int) x[i]];
|
||||
assert(parameters[i] >= 0);
|
||||
if (verbose)
|
||||
printf("initial x[%d] = %g, parameters[%d] = %g\n",
|
||||
i, x[i], i, parameters[i]);
|
||||
step_size[i] = 0.5;
|
||||
min_param[i] = 0;
|
||||
max_param[i] = sa->file2_frames - 1;
|
||||
max_param[i] = sa->last_y;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Curvefit::~Curvefit()
|
||||
{
|
||||
if (p1_cache) FREE(p1_cache);
|
||||
if (p2_cache) FREE(p2_cache);
|
||||
if (d_cache) FREE(d_cache);
|
||||
if (x) FREE(x);
|
||||
}
|
||||
|
||||
|
||||
// distance_rc -- look up or compute distance between chroma vectors
|
||||
// at row, col in similarity matrix
|
||||
//
|
||||
@@ -142,7 +155,7 @@ void Curvefit::setup(int segments)
|
||||
// Since distance can be computed relatively quickly, a better plan
|
||||
// would be to cache values along the path. Here's a brief design
|
||||
// (for the future, assuming this routine is actually a hot spot):
|
||||
// Allocate a matrix that is, say, 20 x file1_frames to contain distances
|
||||
// Allocate a matrix that is, say, 20 x file0_frames to contain distances
|
||||
// that are +/- 10 frames from the path. Initialize cells to -1.
|
||||
// Allocate an array of integer offsets of size file1_frames.
|
||||
// Fill in the integer offsets with the column number (pathy) value of
|
||||
@@ -157,7 +170,10 @@ void Curvefit::setup(int segments)
|
||||
//
|
||||
double Curvefit::distance_rc(int row, int col)
|
||||
{
|
||||
return gen_dist(row, col, sa->chrom_energy1, sa->chrom_energy2);
|
||||
double dist = sa->gen_dist(row, col);
|
||||
if (dist > 20) // DEBUGGING
|
||||
printf("internal error");
|
||||
return dist;
|
||||
}
|
||||
|
||||
|
||||
@@ -190,6 +206,7 @@ double Curvefit::compute_dist(int i)
|
||||
double dx = x2 - x1, dy = y2 - y1;
|
||||
double sum = 0;
|
||||
int n;
|
||||
assert(x1 >= 0 && x2 >= 0 && y1 >= 0 && y2 >= 0);
|
||||
if (dx > dy) { // evauate at each x
|
||||
n = (int) dx;
|
||||
for (int x = (int) x1; x < x2; x++) {
|
||||
@@ -204,14 +221,52 @@ double Curvefit::compute_dist(int i)
|
||||
}
|
||||
}
|
||||
// normalize using line length: sum/n is average distance. Multiply
|
||||
// avg. distance (cost per unit length) by length to get total cost:
|
||||
// avg. distance (cost per unit length) by length to get total cost.
|
||||
// Note: this gives an advantage to direct diagonal paths without bends
|
||||
// because longer path lengths result in higher total cost. This also
|
||||
// gives heigher weight to longer segments, although all segments are
|
||||
// about the same length.
|
||||
double rslt = sqrt(dx*dx + dy*dy) * sum / n;
|
||||
// printf("compute_dist %d: x1 %g y1 %g x2 %g y2 %g sum %g rslt %g\n",
|
||||
// i, x1, y1, x2, y2, sum, rslt);
|
||||
if (rslt < 0 || rslt > 20 * n) { // DEBUGGING
|
||||
printf("internal error");
|
||||
}
|
||||
return rslt;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Curvefit::set_step_size(double ss)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
step_size[i] = ss;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static long curvefit_iterations;
|
||||
|
||||
// This is a callback from Hillclimb::optimize to report progress
|
||||
// We can't know percentage completion because we don't know how
|
||||
// many iterations it will take to converge, so we just report
|
||||
// iterations. The SAProgress class assumes some number based
|
||||
// on experience.
|
||||
//
|
||||
// Normally, the iterations parameter is a good indicator of work
|
||||
// expended so far, but since we call Hillclimb::optimize twice
|
||||
// (second time with a finer grid to search), ignore iterations
|
||||
// and use curvefit_iterations, a global counter, instead. This
|
||||
// assumes that curvefit_progress is called once for each iteration.
|
||||
//
|
||||
void curvefit_progress(void *cookie, int iterations, double best)
|
||||
{
|
||||
Scorealign *sa = (Scorealign *) cookie;
|
||||
if (sa->progress) {
|
||||
sa->progress->set_smoothing_progress(++curvefit_iterations);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void curve_fitting(Scorealign *sa, bool verbose)
|
||||
{
|
||||
if (verbose)
|
||||
@@ -220,12 +275,17 @@ void curve_fitting(Scorealign *sa, bool verbose)
|
||||
Curvefit curvefit(sa, verbose);
|
||||
double *parameters;
|
||||
double *x;
|
||||
curvefit_iterations = 0;
|
||||
// how many segments? About total time / line_time:
|
||||
int segments =
|
||||
(int) (0.5 + (sa->actual_frame_period_1 * sa->file1_frames) /
|
||||
(int) (0.5 + (sa->actual_frame_period_0 * (sa->last_x - sa->first_x)) /
|
||||
sa->line_time);
|
||||
curvefit.setup(segments);
|
||||
curvefit.optimize();
|
||||
curvefit.optimize(&curvefit_progress, sa);
|
||||
// further optimization with smaller step sizes:
|
||||
// this step size will interpolate 0.25s frames down to 10ms
|
||||
curvefit.set_step_size(0.04);
|
||||
curvefit.optimize(&curvefit_progress, sa);
|
||||
parameters = curvefit.get_parameters();
|
||||
x = curvefit.get_x();
|
||||
// now, rewrite pathx and pathy according to segments
|
||||
|
@@ -111,7 +111,7 @@ void FFT3(int NumSamples,
|
||||
int i, j, k, n;
|
||||
int BlockSize, BlockEnd;
|
||||
|
||||
float angle_numerator = 2.0 * M_PI;
|
||||
float angle_numerator = float(2.0 * M_PI);
|
||||
float tr, ti; /* temp real, temp imaginary */
|
||||
|
||||
if (!IsPowerOfTwo(NumSamples)) {
|
||||
@@ -224,7 +224,7 @@ void RealFFT3(int NumSamples, float *RealIn, float *RealOut, float *ImagOut)
|
||||
int Half = NumSamples / 2;
|
||||
int i;
|
||||
|
||||
float theta = M_PI / Half;
|
||||
float theta = float(M_PI / Half);
|
||||
|
||||
float *tmpReal = (float *) alloca(sizeof(float) * Half);
|
||||
float *tmpImag = (float *) alloca(sizeof(float) * Half);
|
||||
@@ -289,7 +289,7 @@ void PowerSpectrum3(int NumSamples, float *In, float *Out)
|
||||
int Half = NumSamples / 2;
|
||||
int i;
|
||||
|
||||
float theta = M_PI / Half;
|
||||
float theta = float(M_PI / Half);
|
||||
|
||||
float *tmpReal = (float *) alloca(sizeof(float) * Half);;
|
||||
float *tmpImag = (float *) alloca(sizeof(float) * Half);
|
||||
|
@@ -30,7 +30,6 @@ using namespace std;
|
||||
// each row is one chroma vector,
|
||||
// data is stored as an array of chroma vectors:
|
||||
// vector 1, vector 2, ...
|
||||
#define CHROM(row, column) AREF2((*chrom_energy), row, column)
|
||||
|
||||
float hz_to_step(float hz)
|
||||
{
|
||||
@@ -40,21 +39,19 @@ float hz_to_step(float hz)
|
||||
/* GEN_MAGNITUDE
|
||||
given the real and imaginary portions of a complex FFT function, compute
|
||||
the magnitude of the fft bin.
|
||||
given input of 2 arrays (inR and inI) of length n, takes the ith element
|
||||
from each, squares them, sums them, takes the square root of the sum and
|
||||
puts the output into the ith position in the array out.
|
||||
|
||||
NOTE: out should be length n
|
||||
*/
|
||||
void gen_Magnitude(float* inR,float* inI, int low, int hi, float* out)
|
||||
void gen_Magnitude(float* inR, float* inI, int low, int hi, float* out)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = low; i < hi; i++) {
|
||||
float magVal = sqrt(inR[i] * inR[i] + inI[i] * inI[i]);
|
||||
//printf(" %d: sqrt(%g^2+%g^2)=%g\n",i,inR[i],inI[i+1],magVal);
|
||||
out[i]= magVal;
|
||||
#ifdef SA_VERBOSE
|
||||
if (i == 1000) printf("gen_Magnitude: %d %g\n", i, magVal);
|
||||
if (i == 1000) fprintf(dbf, "gen_Magnitude: %d %g\n", i, magVal);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -116,17 +113,12 @@ int min_Bin_Num(float* bins, int numBins){
|
||||
applies the hamming function to each sample.
|
||||
n specifies the length of in and out.
|
||||
*/
|
||||
void gen_Hamming(float* in, int n, float* out)
|
||||
void gen_Hamming(float* h, int n)
|
||||
{
|
||||
int k = 0;
|
||||
for(k = 0; k < n; k++) {
|
||||
float internalValue = 2.0 * M_PI * k * (1.0 / (n - 1));
|
||||
float cosValue = cos(internalValue);
|
||||
float hammingValue = 0.54F + (-0.46F * cosValue);
|
||||
#ifdef SA_VERBOSE
|
||||
if (k == 1000) printf("Hamming %g\n", hammingValue);
|
||||
#endif
|
||||
out[k] = hammingValue * in[k];
|
||||
int k;
|
||||
for (k = 0; k < n; k++) {
|
||||
float cos_value = (float) cos(2.0 * M_PI * k * (1.0 / n));
|
||||
h[k] = 0.54F + (-0.46F * cos_value);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,6 +134,36 @@ int nextPowerOf2(int n)
|
||||
}
|
||||
|
||||
|
||||
// normalize a chroma vector (from audio or midi) to have
|
||||
// mean of 0 and std. dev. of 1
|
||||
//
|
||||
static void normalize(float *cv)
|
||||
{
|
||||
float avg = 0;
|
||||
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
avg += cv[i];
|
||||
}
|
||||
avg /= CHROMA_BIN_COUNT;
|
||||
|
||||
/* Normalize this frame to avg. 0 */
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++)
|
||||
cv[i] -= avg;
|
||||
|
||||
/* Calculate std. dev. for this frame */
|
||||
float sum = 0;
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
float x = cv[i];
|
||||
sum += x * x;
|
||||
}
|
||||
float dev = sqrt(sum / CHROMA_BIN_COUNT);
|
||||
if (dev == 0.0) dev = 1.0F; /* don't divide by zero */
|
||||
|
||||
/* Normalize this frame to std. dev. 1*/
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++) cv[i] /= dev;
|
||||
}
|
||||
|
||||
|
||||
/* GEN_CHROMA_AUDIO -- compute chroma for an audio file
|
||||
*/
|
||||
/*
|
||||
@@ -153,8 +175,8 @@ int nextPowerOf2(int n)
|
||||
(aka the length of the 1st dimention of chrom_energy)
|
||||
*/
|
||||
int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
|
||||
int lcutoff, float **chrom_energy, float *actual_frame_period,
|
||||
int id, bool verbose)
|
||||
int lcutoff, float **chrom_energy, double *actual_frame_period,
|
||||
int id)
|
||||
{
|
||||
int i;
|
||||
double sample_rate = reader.get_sample_rate();
|
||||
@@ -165,9 +187,12 @@ int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
|
||||
printf ("==============FILE %d====================\n", id);
|
||||
reader.print_info();
|
||||
}
|
||||
// this seems like a poor way to set actual_frame_period_1 or _2 in
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "******** BEGIN AUDIO CHROMA COMPUTATION *********\n");
|
||||
#endif
|
||||
// this seems like a poor way to set actual_frame_period_0 or _1 in
|
||||
// the Scorealign object, but I'm not sure what would be better:
|
||||
*actual_frame_period = reader.actual_frame_period;
|
||||
*actual_frame_period = float(reader.actual_frame_period);
|
||||
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
reg11[i] = -999;
|
||||
@@ -230,7 +255,7 @@ int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
|
||||
// sample_rate / full_data_size);
|
||||
double freq = low_bin * sample_rate / full_data_size;
|
||||
for (i = low_bin; i < high_bin; i++) {
|
||||
float raw_bin = hz_to_step(freq);
|
||||
float raw_bin = hz_to_step(float(freq));
|
||||
int round_bin = (int) (raw_bin + 0.5F);
|
||||
int mod_bin = round_bin % 12;
|
||||
bin_map[i] = mod_bin;
|
||||
@@ -238,24 +263,35 @@ int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
|
||||
}
|
||||
// printf("BIN_COUNT is !!!!!!!!!!!!! %d\n",CHROMA_BIN_COUNT);
|
||||
|
||||
// create Hamming window data
|
||||
float *hamming = ALLOC(float, reader.samples_per_frame);
|
||||
gen_Hamming(hamming, reader.samples_per_frame);
|
||||
|
||||
while (reader.read_window(full_data)) {
|
||||
//fill out array with 0's till next power of 2
|
||||
#ifdef SA_VERBOSE
|
||||
printf("samples_per_frame %d sample %g\n", reader.samples_per_frame,
|
||||
full_data[0]);
|
||||
fprintf(dbf, "samples_per_frame %d sample %g\n",
|
||||
reader.samples_per_frame, full_data[0]);
|
||||
#endif
|
||||
for (i = reader.samples_per_frame; i < full_data_size; i++)
|
||||
full_data[i] = 0;
|
||||
|
||||
#ifdef AS_VERBOSE
|
||||
printf("preFFT: full_data[1000] %g\n", full_data[1000]);
|
||||
#ifdef SA_VERBOSE
|
||||
fprintf(dbf, "preFFT: full_data[1000] %g\n", full_data[1000]);
|
||||
#endif
|
||||
|
||||
//the data from the wave file, each point mult by a hamming value
|
||||
gen_Hamming(full_data, full_data_size, full_data);
|
||||
// compute the RMS, then apply the Hamming window to the data
|
||||
float rms = 0.0f;
|
||||
for (i = 0; i < reader.samples_per_frame; i++) {
|
||||
float x = full_data[i];
|
||||
rms += x * x;
|
||||
full_data[i] = x * hamming[i];
|
||||
}
|
||||
rms = sqrt(rms / reader.samples_per_frame);
|
||||
|
||||
#ifdef SA_VERBOSE
|
||||
printf("preFFT: hammingData[1000] %g\n", full_data[1000]);
|
||||
fprintf(dbf, "preFFT: hammingData[1000] %g\n",
|
||||
full_data[1000]);
|
||||
#endif
|
||||
FFT3(full_data_size, 0, full_data, NULL, fft_dataR, fft_dataI); //fft3
|
||||
|
||||
@@ -322,19 +358,42 @@ int Scorealign::gen_chroma_audio(Audio_reader &reader, int hcutoff,
|
||||
//put chrom energy into the returned array
|
||||
|
||||
#ifdef SA_VERBOSE
|
||||
printf("cv_index %d\n", cv_index);
|
||||
fprintf(dbf, "cv_index %d\n", cv_index);
|
||||
#endif
|
||||
assert(cv_index < reader.frame_count);
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++)
|
||||
CHROM(cv_index, i) = binEnergy[i] / binCount[i];
|
||||
float *cv = AREF1(*chrom_energy, cv_index);
|
||||
for (i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
cv[i] = binEnergy[i] / binCount[i];
|
||||
}
|
||||
if (rms < silence_threshold) {
|
||||
// "silence" flag
|
||||
cv[CHROMA_BIN_COUNT] = 1.0f;
|
||||
} else {
|
||||
cv[CHROMA_BIN_COUNT] = 0.0f;
|
||||
// normalize the non-silent frames
|
||||
normalize(cv);
|
||||
}
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "%d@%g) ", cv_index, cv_index * reader.actual_frame_period);
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
fprintf(dbf, "%d:%g ", i, cv[i]);
|
||||
}
|
||||
fprintf(dbf, " sil?:%g\n\n", cv[CHROMA_BIN_COUNT]);
|
||||
#endif
|
||||
cv_index++;
|
||||
if (progress && cv_index % 10 == 0 &&
|
||||
!progress->set_feature_progress(
|
||||
float(cv_index * reader.actual_frame_period))) {
|
||||
break;
|
||||
}
|
||||
} // end of while ((readcount = read_mono_floats...
|
||||
|
||||
free(hamming);
|
||||
free(fft_dataI);
|
||||
free(fft_dataR);
|
||||
free(full_data);
|
||||
if (verbose)
|
||||
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames);
|
||||
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file0_frames);
|
||||
return cv_index;
|
||||
}
|
||||
|
||||
@@ -362,7 +421,7 @@ typedef Event_list *Event_list_ptr;
|
||||
The chroma energy is placed in the float *chrom_energy.
|
||||
this 2D is an array of pointers.
|
||||
The function returns the number of frames
|
||||
(aka the length of the 1st dimention of chrom_energy)
|
||||
(aka the length of the 1st dimension of chrom_energy)
|
||||
*
|
||||
*
|
||||
Notes: keep a list of notes that are sounding.
|
||||
@@ -374,25 +433,33 @@ typedef Event_list *Event_list_ptr;
|
||||
How many frames?
|
||||
*/
|
||||
|
||||
int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
|
||||
float **chrom_energy, float *actual_frame_period,
|
||||
int id, bool verbose)
|
||||
int Scorealign::gen_chroma_midi(Alg_seq &seq, float dur, int nnotes,
|
||||
int hcutoff, int lcutoff,
|
||||
float **chrom_energy, double *actual_frame_period,
|
||||
int id)
|
||||
{
|
||||
// silence_threshold is compared to the *average* of chroma bins.
|
||||
// Rather than divide the sum by CHROMA_BIN_COUNT to compute the
|
||||
// average, just compute the sum and compare to silence_threshold * 12
|
||||
float threshold = (float) (silence_threshold * CHROMA_BIN_COUNT);
|
||||
|
||||
if (verbose) {
|
||||
printf ("==============FILE %d====================\n", id);
|
||||
SA_V(seq.write(cout, true));
|
||||
}
|
||||
/*=============================================================*/
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "******** BEGIN MIDI CHROMA COMPUTATION *********\n");
|
||||
#endif /*=============================================================*/
|
||||
|
||||
*actual_frame_period = (frame_period) ; // since we don't quantize to samples
|
||||
*actual_frame_period = frame_period; // since we don't quantize to samples
|
||||
|
||||
/*=============================================================*/
|
||||
|
||||
seq.convert_to_seconds();
|
||||
/* find duration */
|
||||
float dur = 0.0F;
|
||||
int nnotes = 0;
|
||||
nnotes= find_midi_duration(seq, &dur);
|
||||
///* find duration */
|
||||
//float dur = 0.0F;
|
||||
//int nnotes = 0;
|
||||
//nnotes = find_midi_duration(seq, &dur);
|
||||
|
||||
/*================================================================*/
|
||||
|
||||
@@ -417,13 +484,15 @@ int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
|
||||
|
||||
/*====================================================*/
|
||||
|
||||
float frame_begin = max((cv_index * (frame_period)) -
|
||||
window_size/2 , 0.0F); //chooses zero if negative
|
||||
float frame_begin = (float) max(cv_index * frame_period -
|
||||
window_size / 2.0, 0.0);
|
||||
//chooses zero if negative
|
||||
|
||||
float frame_end= frame_begin +(window_size/2);
|
||||
float frame_end = (float) (cv_index * frame_period + window_size / 2.0);
|
||||
/*============================================================*/
|
||||
float *cv = AREF1(*chrom_energy, cv_index);
|
||||
/* zero the vector */
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++) CHROM(cv_index, i) = 0;
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT + 1; i++) cv[i] = 0;
|
||||
/* add new notes that are in the frame */
|
||||
while (event && event->time < frame_end) {
|
||||
if (event->is_note()) {
|
||||
@@ -442,6 +511,7 @@ int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
|
||||
}
|
||||
if (*ptr) ptr = &((*ptr)->next);
|
||||
}
|
||||
float sum = 0.0;
|
||||
for (Event_list_ptr item = list; item; item = item->next) {
|
||||
/* compute duration of overlap */
|
||||
float overlap =
|
||||
@@ -450,18 +520,34 @@ int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
|
||||
float velocity = item->note->loud;
|
||||
float weight = overlap * velocity;
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "%3d pitch %g key %d overlap %g velocity %g\n",
|
||||
cv_index, item->note->pitch, item->note->get_identifier(),
|
||||
overlap, velocity);
|
||||
fprintf(dbf, "%3d pitch %g starting %g key %d overlap %g velocity %g\n",
|
||||
cv_index, item->note->pitch, item->note->time,
|
||||
item->note->get_identifier(), overlap, velocity);
|
||||
#endif
|
||||
CHROM(cv_index, (int)item->note->pitch % 12) += weight;
|
||||
cv[(int) item->note->pitch % 12] += weight;
|
||||
sum += weight;
|
||||
}
|
||||
|
||||
|
||||
if (sum < threshold) {
|
||||
cv[CHROMA_BIN_COUNT] = 1.0;
|
||||
} else {
|
||||
normalize(cv);
|
||||
}
|
||||
|
||||
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "%d@%g) ", cv_index, frame_begin);
|
||||
for (int i = 0; i < CHROMA_BIN_COUNT; i++) {
|
||||
fprintf(dbf, "%d:%g ", i, CHROM(cv_index, i));
|
||||
fprintf(dbf, "%d:%g ", i, cv[i]);
|
||||
}
|
||||
fprintf(dbf, "\n\n");
|
||||
fprintf(dbf, " sil?:%g\n\n", cv[CHROMA_BIN_COUNT]);
|
||||
#endif
|
||||
if (cv_index % 10 == 0 && progress &&
|
||||
!progress->set_feature_progress(
|
||||
float(cv_index * *actual_frame_period))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
while (list) {
|
||||
Event_list_ptr temp = list;
|
||||
@@ -470,6 +556,6 @@ int Scorealign::gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
|
||||
}
|
||||
iterator.end();
|
||||
if (verbose)
|
||||
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file1_frames);
|
||||
printf("\nGenerated Chroma. file%d_frames is %i\n", id, file0_frames);
|
||||
return frame_count;
|
||||
}
|
||||
|
@@ -2,5 +2,10 @@
|
||||
|
||||
bool is_midi_file(char *filename);
|
||||
|
||||
#define AREF2(chrom_energy, row, column) \
|
||||
(chrom_energy[row * (CHROMA_BIN_COUNT + 1) + column])
|
||||
// index into matrix to extract chroma vector
|
||||
#define AREF1(chrom_energy, row) \
|
||||
((chrom_energy) + (row) * (CHROMA_BIN_COUNT + 1))
|
||||
|
||||
// index into matrix to extract element of chroma vector
|
||||
#define AREF2(chrom_energy, row, column) AREF1(chrom_energy, row)[column]
|
||||
|
||||
|
@@ -26,12 +26,33 @@
|
||||
* maximum.
|
||||
*/
|
||||
|
||||
#include "hillclimb.h"
|
||||
|
||||
#include "stdio.h"
|
||||
#include "malloc.h"
|
||||
#include "sautils.h"
|
||||
#include "hillclimb.h"
|
||||
|
||||
#define HC_VERBOSE 0
|
||||
#define V if (HC_VERBOSE)
|
||||
|
||||
Hillclimb::~Hillclimb()
|
||||
{
|
||||
if (parameters) FREE(parameters);
|
||||
if (step_size) FREE(step_size);
|
||||
if (min_param) FREE(min_param);
|
||||
if (max_param) FREE(max_param);
|
||||
}
|
||||
|
||||
|
||||
void Hillclimb::setup(int n_) {
|
||||
n = n_;
|
||||
parameters = ALLOC(double, n);
|
||||
step_size = ALLOC(double, n);
|
||||
min_param = ALLOC(double, n);
|
||||
max_param = ALLOC(double, n);
|
||||
}
|
||||
|
||||
|
||||
void Hillclimb::set_parameters(double *p, double *ss,
|
||||
double *min_, double *max_, int plen)
|
||||
{
|
||||
@@ -108,17 +129,20 @@ double Hillclimb::optimize()
|
||||
}
|
||||
*/
|
||||
|
||||
double Hillclimb::optimize()
|
||||
double Hillclimb::optimize(Report_fn_ptr report, void *cookie)
|
||||
{
|
||||
double best = evaluate();
|
||||
int iterations = 0;
|
||||
while (true) {
|
||||
(*report)(cookie, iterations, best);
|
||||
V printf("best %g ", best);
|
||||
// eval partial derivatives
|
||||
int i;
|
||||
// variables to search for max partial derivative
|
||||
double max_y = best; // max of evaluate() so far
|
||||
int max_i; // index where best max was found
|
||||
double max_parameter; // the good parameter value for max_i
|
||||
int max_i = 0; // index where best max was found
|
||||
// the good parameter value for max_i:
|
||||
double max_parameter = parameters[0];
|
||||
// now search over all parameters for best improvement
|
||||
for (i = 0; i < n; i++) {
|
||||
V printf("optimize at %d param %g ", i, parameters[i]);
|
||||
@@ -148,8 +172,10 @@ double Hillclimb::optimize()
|
||||
parameters[i] = save_param;
|
||||
V printf("\n");
|
||||
}
|
||||
iterations++; // for debugging, reporting
|
||||
if (max_y <= best) { // no improvement, we're done
|
||||
V printf("\nCompleted hillclimbing, best %g\n", best);
|
||||
(*report)(cookie, iterations, best);
|
||||
return best;
|
||||
}
|
||||
// improvement because max_y higher than best:
|
||||
|
@@ -15,6 +15,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
// while optimizing, this function is called to report progress
|
||||
typedef void (*Report_fn_ptr)(void *cookie, int iteration, double best);
|
||||
|
||||
class Hillclimb {
|
||||
protected:
|
||||
double *parameters; // parameters to optimize
|
||||
@@ -24,12 +27,17 @@ protected:
|
||||
double *max_param; // maximum parameter values
|
||||
int n; // number of parameters
|
||||
public:
|
||||
Hillclimb() {
|
||||
parameters = step_size = min_param = max_param = NULL;
|
||||
}
|
||||
void setup(int n_);
|
||||
~Hillclimb();
|
||||
void set_parameters(double *parameters_, double *step_size_,
|
||||
double *min_, double *max_, int n_);
|
||||
// retrieve parameters after optimization:
|
||||
double *get_parameters() { return parameters; }
|
||||
virtual double evaluate() = 0;
|
||||
double optimize();
|
||||
double optimize(Report_fn_ptr report, void *cookie);
|
||||
};
|
||||
|
||||
|
||||
|
@@ -8,6 +8,7 @@
|
||||
*/
|
||||
|
||||
#define ALLOC(t, n) (t *) malloc(sizeof(t) * (n))
|
||||
#define FREE(p) free(p)
|
||||
|
||||
#define ROUND(x) ((int) (0.5 + (x)))
|
||||
|
||||
|
@@ -4,21 +4,41 @@
|
||||
*/
|
||||
|
||||
#include "allegro.h"
|
||||
#include "scorealign-glue.h"
|
||||
#include "audioreader.h"
|
||||
#include "audiomixerreader.h"
|
||||
#include "scorealign.h"
|
||||
#include "scorealign-glue.h"
|
||||
#include "audiomixerreader.h"
|
||||
|
||||
void scorealign(void *mixer, mixer_process_fn fn_ptr, int chans, double srate,
|
||||
double end_time, Alg_seq *seq)
|
||||
|
||||
int scorealign(void *mixer, mixer_process_fn fn_ptr, int chans, double srate,
|
||||
double end_time, Alg_seq *seq, SAProgress *progress,
|
||||
ScoreAlignParams ¶ms)
|
||||
{
|
||||
Scorealign sa;
|
||||
sa.frame_period = 0.2;
|
||||
sa.window_size = 0.2;
|
||||
sa.frame_period = params.mFramePeriod;
|
||||
sa.window_size = params.mWindowSize;
|
||||
sa.silence_threshold = params.mSilenceThreshold;
|
||||
sa.force_final_alignment = (params.mForceFinalAlignment != 0.0);
|
||||
sa.ignore_silence = (params.mIgnoreSilence != 0.0);
|
||||
sa.presmooth_time = params.mPresmoothTime;
|
||||
sa.line_time = params.mLineTime;
|
||||
sa.smooth_time = params.mSmoothTime;
|
||||
|
||||
Audio_mixer_reader reader(mixer, fn_ptr, chans, srate, end_time);
|
||||
reader.calculate_parameters(sa, false);
|
||||
sa.align_midi_to_audio(*seq, reader, true);
|
||||
sa.midi_tempo_align(*seq, false);
|
||||
sa.progress = progress;
|
||||
int result = sa.align_midi_to_audio(*seq, reader);
|
||||
|
||||
params.mMidiStart = sa.first_x * sa.actual_frame_period_0;
|
||||
params.mMidiEnd = (sa.last_x + 1) * sa.actual_frame_period_0;
|
||||
params.mAudioStart = sa.first_y * sa.actual_frame_period_1;
|
||||
params.mAudioEnd = (sa.last_y + 1) * sa.actual_frame_period_1;
|
||||
|
||||
if (result != SA_SUCCESS) {
|
||||
return result;
|
||||
}
|
||||
|
||||
sa.midi_tempo_align(*seq);
|
||||
// seq has now been modified to conform to audio provided by mixer
|
||||
seq->set_real_dur(end_time);
|
||||
return SA_SUCCESS; // success
|
||||
}
|
||||
|
@@ -1,5 +1,8 @@
|
||||
typedef long (*mixer_process_fn)(void *mix, float **buffer, long n);
|
||||
|
||||
void scorealign(void *mixer, mixer_process_fn fn_ptr,
|
||||
#include "ScoreAlignParams.h"
|
||||
|
||||
int scorealign(void *mixer, mixer_process_fn fn_ptr,
|
||||
int chans, double srate,
|
||||
double end_time, Alg_seq *seq);
|
||||
double end_time, Alg_seq *seq, SAProgress *progress,
|
||||
ScoreAlignParams ¶ms);
|
||||
|
@@ -24,7 +24,7 @@
|
||||
#define LOW_CUTOFF 40
|
||||
#define HIGH_CUTOFF 2000
|
||||
|
||||
// Note: There are "verbose" flags passed as parameters that
|
||||
// Note: There is a "verbose" flag in Score_align objects that
|
||||
// enable some printing. The SA_VERBOSE compiler flag causes a
|
||||
// lot more debugging output, so it could be called VERY_VERBOSE
|
||||
// as opposed to the quieter verbose flags.
|
||||
@@ -36,10 +36,10 @@
|
||||
// for presmoothing, how near does a point have to be to be "on the line"
|
||||
#define NEAR 1.5
|
||||
|
||||
// path is file1_frames by file2_frames array, so first index
|
||||
// (rows) is in [0 .. file1_frames]. Array is sequence of rows.
|
||||
// columns (j) ranges from [0 .. file2_frames]
|
||||
#define PATH(i,j) (path[(i) * file2_frames + (j)])
|
||||
// path is file0_frames by file1_frames array, so first index
|
||||
// (rows) is in [0 .. file0_frames]. Array is sequence of rows.
|
||||
// columns (j) ranges from [0 .. file1_frames]
|
||||
#define PATH(i,j) (path[(i) * file1_frames + (j)])
|
||||
|
||||
/*===========================================================================*/
|
||||
|
||||
@@ -48,21 +48,52 @@ FILE *dbf = NULL;
|
||||
#endif
|
||||
|
||||
|
||||
Scorealign::Scorealign() {
|
||||
frame_period = SA_DFT_FRAME_PERIOD;
|
||||
window_size = SA_DFT_WINDOW_SIZE;
|
||||
force_final_alignment = SA_DFT_FORCE_FINAL_ALIGNMENT;
|
||||
ignore_silence = SA_DFT_IGNORE_SILENCE;
|
||||
silence_threshold = SA_DFT_SILENCE_THRESHOLD;
|
||||
presmooth_time = SA_DFT_PRESMOOTH_TIME;
|
||||
line_time = SA_DFT_LINE_TIME;
|
||||
smooth_time = SA_DFT_SMOOTH_TIME;
|
||||
pathlen = 0;
|
||||
path_count = 0;
|
||||
pathx = NULL;
|
||||
pathy = NULL;
|
||||
verbose = false;
|
||||
progress = NULL;
|
||||
#if DEBUG_LOG
|
||||
dbf = fopen("debug-log.txt", "w");
|
||||
assert(dbf);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Scorealign::~Scorealign() {
|
||||
if (pathx) free(pathx);
|
||||
if (pathy) free(pathy);
|
||||
#if DEBUG_LOG
|
||||
fclose(dbf);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* MAP_TIME
|
||||
lookup time of file1 in smooth_time_map and interpolate
|
||||
to get time in file2
|
||||
lookup time of file0 in smooth_time_map and interpolate
|
||||
to get time in file1
|
||||
*/
|
||||
|
||||
float Scorealign::map_time(float t1)
|
||||
{
|
||||
t1 /= actual_frame_period_1; // convert from seconds to frames
|
||||
t1 /= (float) actual_frame_period_0; // convert from seconds to frames
|
||||
int i = (int) t1; // round down
|
||||
if (i < 0) i = 0;
|
||||
if (i >= file1_frames - 1) i = file1_frames - 2;
|
||||
if (i >= file0_frames - 1) i = file0_frames - 2;
|
||||
// interpolate to get time
|
||||
return actual_frame_period_2 *
|
||||
return float(actual_frame_period_1 *
|
||||
interpolate(i, smooth_time_map[i], i+1, smooth_time_map[i+1],
|
||||
t1);
|
||||
t1));
|
||||
}
|
||||
|
||||
|
||||
@@ -86,7 +117,7 @@ int find_midi_duration(Alg_seq &seq, float *dur)
|
||||
Alg_event_ptr e = notes[i];
|
||||
if (e->is_note()) {
|
||||
Alg_note_ptr n = (Alg_note_ptr) e;
|
||||
float note_end = n->time + n->dur;
|
||||
float note_end = float(n->time + n->dur);
|
||||
if (note_end > *dur) *dur = note_end;
|
||||
nnotes++;
|
||||
}
|
||||
@@ -127,9 +158,9 @@ void Scorealign::path_step(int i, int j)
|
||||
{
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "(%i,%i) ", i, j);
|
||||
if (++path_count % 5 == 0 ||
|
||||
(i == 0 && j == 0))
|
||||
fprintf(dbf, "\n");
|
||||
if (++path_count % 5 == 0 ||
|
||||
(i == first_x && j == first_y))
|
||||
fprintf(dbf, "\n");
|
||||
#endif
|
||||
pathx[pathlen] = i;
|
||||
pathy[pathlen] = j;
|
||||
@@ -169,8 +200,8 @@ returns the first index in pathy where the element is bigger than sec
|
||||
*/
|
||||
int Scorealign::sec_to_pathy_index(float sec)
|
||||
{
|
||||
for (int i = 0 ; i < (file1_frames + file2_frames); i++) {
|
||||
if (smooth_time_map[i] * actual_frame_period_2 >= sec) {
|
||||
for (int i = 0 ; i < (file0_frames + file1_frames); i++) {
|
||||
if (smooth_time_map[i] * actual_frame_period_1 >= sec) {
|
||||
return i;
|
||||
}
|
||||
//printf("%i\n" ,pathy[i]);
|
||||
@@ -184,17 +215,21 @@ given a chrom_energy vector, sees how many
|
||||
of the inital frames are designated as silent
|
||||
*/
|
||||
|
||||
int frames_of_init_silence( float *chrom_energy, int frame_count)
|
||||
int frames_of_init_silence(float *chrom_energy, int frame_count)
|
||||
{
|
||||
bool silence = true;
|
||||
int frames=0;
|
||||
while (silence) {
|
||||
if (silent(frames, chrom_energy))
|
||||
frames++;
|
||||
else
|
||||
silence=false;
|
||||
int frames;
|
||||
for (frames = 0; frames < frame_count; frames++) {
|
||||
if (!silent(frames, chrom_energy)) break;
|
||||
}
|
||||
return frames;
|
||||
}
|
||||
|
||||
int last_non_silent_frame(float *chrom_energy, int frame_count)
|
||||
{
|
||||
int frames;
|
||||
for (frames = frame_count - 1; frames > 0; frames--) {
|
||||
if (!silent(frames, chrom_energy)) break;
|
||||
}
|
||||
|
||||
return frames;
|
||||
}
|
||||
|
||||
@@ -202,95 +237,130 @@ int frames_of_init_silence( float *chrom_energy, int frame_count)
|
||||
/* COMPARE_CHROMA
|
||||
Perform Dynamic Programming to find optimal alignment
|
||||
*/
|
||||
void Scorealign::compare_chroma(bool verbose)
|
||||
int Scorealign::compare_chroma()
|
||||
{
|
||||
float *path;
|
||||
int x = 0;
|
||||
int y = 0;
|
||||
|
||||
/* Allocate the distance matrix */
|
||||
path = (float *) calloc(file1_frames * file2_frames, sizeof(float));
|
||||
path = (float *) calloc(file0_frames * file1_frames, sizeof(float));
|
||||
|
||||
/* Initialize first row and column */
|
||||
/* skip over initial silence in signals */
|
||||
if (ignore_silence) {
|
||||
first_x = frames_of_init_silence(chrom_energy0, file0_frames);
|
||||
last_x = last_non_silent_frame(chrom_energy0, file0_frames);
|
||||
first_y = frames_of_init_silence(chrom_energy1, file1_frames);
|
||||
last_y = last_non_silent_frame(chrom_energy1, file1_frames);
|
||||
} else {
|
||||
first_x = 0;
|
||||
last_x = file0_frames - 1;
|
||||
first_y = 0;
|
||||
last_y = file1_frames - 1;
|
||||
}
|
||||
|
||||
/* allow free skip over initial silence in either signal, but not both */
|
||||
/* silence is indicated by a run of zeros along the first row and or
|
||||
* column, starting at the origin (0,0). After computing these runs, we
|
||||
* put the proper value at (0,0)
|
||||
*/
|
||||
if (verbose) printf("Performing silent skip DP \n");
|
||||
PATH(0, 0) = (silent(0, chrom_energy1) ? 0 :
|
||||
gen_dist(0, 0, chrom_energy1, chrom_energy2));
|
||||
for (int i = 1; i < file1_frames; i++)
|
||||
PATH(i, 0) = (PATH(i-1, 0) == 0 && silent(i, chrom_energy1) ? 0 :
|
||||
gen_dist(i, 0, chrom_energy1, chrom_energy2) +
|
||||
PATH(i-1, 0));
|
||||
PATH(0, 0) = (silent(0, chrom_energy2) ? 0 :
|
||||
gen_dist(0, 0, chrom_energy1, chrom_energy2));
|
||||
for (int j = 1; j < file2_frames; j++)
|
||||
PATH(0, j) = (PATH(0, j-1) == 0 && silent(j, chrom_energy2) ? 0 :
|
||||
gen_dist(0, j, chrom_energy1, chrom_energy2) +
|
||||
PATH(0, j-1));
|
||||
/* first row and first column are done, put proper value at (0,0) */
|
||||
PATH(0, 0) = (!silent(0, chrom_energy1) || !silent(0, chrom_energy2) ?
|
||||
gen_dist(0, 0, chrom_energy1, chrom_energy2) : 0);
|
||||
|
||||
if (last_x - first_x <= 0 || last_y - first_y <= 0) {
|
||||
return SA_TOOSHORT;
|
||||
}
|
||||
|
||||
/* Initialize first row and column */
|
||||
if (verbose) printf("Performing DP\n");
|
||||
PATH(first_x, first_y) = gen_dist(first_x, first_y);
|
||||
for (int x = first_x + 1; x <= last_x; x++)
|
||||
PATH(x, first_y) = gen_dist(x, first_y) + PATH(x - 1, first_y);
|
||||
for (int y = 1; y <= last_y; y++)
|
||||
PATH(first_x, y) = gen_dist(first_x, y) + PATH(first_x, y - 1);
|
||||
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "DISTANCE MATRIX ***************************\n");
|
||||
#endif
|
||||
/* Perform DP for the rest of the matrix */
|
||||
for (int i = 1; i < file1_frames; i++)
|
||||
for (int j = 1; j < file2_frames; j++)
|
||||
PATH(i, j) = gen_dist(i, j, chrom_energy1, chrom_energy2) +
|
||||
min3(PATH(i-1, j-1), PATH(i-1, j), PATH(i, j-1));
|
||||
|
||||
for (int x = first_x + 1; x <= last_x; x++) {
|
||||
for (int y = first_y + 1; y <= last_y; y++) {
|
||||
PATH(x, y) = gen_dist(x, y) +
|
||||
float(min3(PATH(x-1, y-1), PATH(x-1, y), PATH(x, y-1)));
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "(%d %d %g) ", x, y, gen_dist(x, y), PATH(x, y));
|
||||
#endif
|
||||
}
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "\n");
|
||||
#endif
|
||||
// report progress for each file0_frame (column)
|
||||
// This is not quite right if we are ignoring silence because
|
||||
// then only a sub-matrix is computed.
|
||||
if (progress && !progress->set_matrix_progress(file1_frames))
|
||||
return SA_CANCEL;
|
||||
}
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "END OF DISTANCE MATRIX ********************\n");
|
||||
#endif
|
||||
|
||||
if (verbose) printf("Completed Dynamic Programming.\n");
|
||||
|
||||
|
||||
x = file1_frames - 1;
|
||||
y = file2_frames - 1;
|
||||
|
||||
//x and y are the ending points, it can end at either the end of midi,
|
||||
// or end of audio but not both
|
||||
pathx = ALLOC(short, (x + y + 2));
|
||||
pathy = ALLOC(short, (x + y + 2));
|
||||
// or end of audio or both
|
||||
pathx = ALLOC(short, (file0_frames + file1_frames));
|
||||
pathy = ALLOC(short, (file0_frames + file1_frames));
|
||||
|
||||
assert(pathx != NULL);
|
||||
assert(pathy != NULL);
|
||||
|
||||
// map from file1 time to file2 time
|
||||
time_map = ALLOC(float, file1_frames);
|
||||
smooth_time_map = ALLOC(float, file1_frames);
|
||||
// map from file0 time to file1 time
|
||||
time_map = ALLOC(float, file0_frames);
|
||||
smooth_time_map = ALLOC(float, file0_frames);
|
||||
|
||||
int x = last_x;
|
||||
int y = last_y;
|
||||
|
||||
if (!force_final_alignment) {
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "\nOptimal Path: ");
|
||||
fprintf(dbf, "\nOptimal Path: ");
|
||||
#endif
|
||||
while (1) {
|
||||
/* Check for stopping */
|
||||
if (x == 0 & y == 0) {
|
||||
path_step(0, 0);
|
||||
path_reverse();
|
||||
break;
|
||||
// find end point, the lowest cost matrix value at one of the
|
||||
// sequence endings
|
||||
float min_cost = 1.0E10;
|
||||
for (int i = first_x; i <= last_x; i++) {
|
||||
if (PATH(i, last_y) <= min_cost) {
|
||||
min_cost = PATH(i, last_y);
|
||||
x = i;
|
||||
y = last_y;
|
||||
}
|
||||
}
|
||||
|
||||
/* Print the current coordinate in the path*/
|
||||
for (int j = first_y; j <= last_y; j++) {
|
||||
if (PATH(last_x, j) <= min_cost) {
|
||||
min_cost = PATH(last_x, j);
|
||||
x = last_x;
|
||||
y = j;
|
||||
}
|
||||
}
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "Min cost at %d %d\n\nPATH:\n", x, y);
|
||||
#endif
|
||||
}
|
||||
|
||||
while ((x != first_x) || (y != first_y)) {
|
||||
path_step(x, y);
|
||||
|
||||
/* Check for the optimal path backwards*/
|
||||
if (x > 0 && y > 0 && PATH(x-1, y-1) <= PATH(x-1, y) &&
|
||||
if (x > first_x && y > first_y && PATH(x-1, y-1) <= PATH(x-1, y) &&
|
||||
PATH(x-1, y-1) <= PATH(x, y-1)) {
|
||||
x--;
|
||||
y--;
|
||||
} else if (x > 0 && y > 0 && PATH(x-1, y) <= PATH(x, y-1)) {
|
||||
} else if (x > first_x && y > first_y && PATH(x-1, y) <= PATH(x, y-1)) {
|
||||
x--;
|
||||
} else if (y > 0) {
|
||||
} else if (y > first_y) {
|
||||
y--;
|
||||
} else if (x > 0) {
|
||||
} else if (x > first_x) {
|
||||
x--;
|
||||
}
|
||||
}
|
||||
path_step(x, y);
|
||||
path_reverse();
|
||||
free(path);
|
||||
return SA_SUCCESS; // success
|
||||
}
|
||||
|
||||
|
||||
|
||||
void Scorealign::linear_regression(int n, int width, float &a, float &b)
|
||||
{
|
||||
int hw = (width - 1) / 2; // a more convenient form: 1/2 width
|
||||
@@ -316,32 +386,36 @@ void Scorealign::linear_regression(int n, int width, float &a, float &b)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* COMPUTE_SMOOTH_TIME_MAP
|
||||
compute regression line and estimate point at i
|
||||
|
||||
Number of points in regression is smooth (an odd number). First
|
||||
index to compute is (smooth-1)/2. Use that line for the first
|
||||
(smooth+1)/2 points. The last index to compute is
|
||||
(file1_frames - (smooth+1)/2). Use that line for the last
|
||||
(file0_frames - (smooth+1)/2). Use that line for the last
|
||||
(smooth+1)/2 points.
|
||||
*/
|
||||
void Scorealign::compute_smooth_time_map()
|
||||
{
|
||||
int i;
|
||||
int hw = (smooth - 1) / 2; // half width of smoothing window
|
||||
|
||||
// find the first point
|
||||
for (i = 0; i < first_x; i++) {
|
||||
smooth_time_map[i] = NOT_MAPPED;
|
||||
}
|
||||
|
||||
// do the first points:
|
||||
float a, b;
|
||||
linear_regression((smooth - 1) / 2, smooth, a, b);
|
||||
int i;
|
||||
for (i = 0; i < (smooth + 1) / 2; i++) {
|
||||
smooth_time_map[i] = a + b*i;
|
||||
linear_regression(first_x + hw, smooth, a, b);
|
||||
for (i = first_x; i <= first_x + hw; i++) {
|
||||
smooth_time_map[i] = a + b * i;
|
||||
}
|
||||
|
||||
// do the middle points:
|
||||
for (i = (smooth + 1) / 2; i < file1_frames - (smooth + 1) / 2; i++) {
|
||||
for (i = first_x + hw + 1; i < last_x - hw; i++) {
|
||||
linear_regression(i, smooth, a, b);
|
||||
smooth_time_map[i] = a + b*i;
|
||||
smooth_time_map[i] = a + b * i;
|
||||
|
||||
#if DEBUG_LOG
|
||||
fprintf(dbf, "time_map[%d] = %g, smooth_time_map[%d] = %g\n",
|
||||
@@ -349,14 +423,15 @@ void Scorealign::compute_smooth_time_map()
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
// do the last points
|
||||
linear_regression(file1_frames - (smooth + 1) / 2, smooth, a, b);
|
||||
for (i = file1_frames - (smooth + 1) / 2; i < file1_frames; i++) {
|
||||
smooth_time_map[i] = a + b*i;
|
||||
linear_regression(last_x - hw, smooth, a, b);
|
||||
for (i = last_x - hw; i <= last_x; i++) {
|
||||
smooth_time_map[i] = a + b * i;
|
||||
}
|
||||
|
||||
|
||||
// finally, fill with NOT_MAPPED
|
||||
for (i = last_x + 1; i < file0_frames; i++)
|
||||
smooth_time_map[i] = NOT_MAPPED;
|
||||
}
|
||||
|
||||
|
||||
@@ -401,16 +476,17 @@ short *path_copy(short *path, int len)
|
||||
*/
|
||||
void Scorealign::presmooth()
|
||||
{
|
||||
int n = ROUND(presmooth_time / actual_frame_period_2);
|
||||
int n = ROUND(presmooth_time / actual_frame_period_1);
|
||||
n = (n + 3) & ~3; // round up to multiple of 4
|
||||
int i = 0;
|
||||
while (pathx[i] + n < file2_frames) {
|
||||
while (i < pathlen - 1 && pathx[i] + n <= last_x) {
|
||||
/* line goes from i to i+n-1 */
|
||||
int x1 = pathx[i];
|
||||
int xmid = x1 + n/2;
|
||||
int x2 = x1 + n;
|
||||
int y1 = pathy[i];
|
||||
int y2;
|
||||
int y2 = pathy[i + 1]; // make sure it has a value. y2 should be
|
||||
// set in the loop below.
|
||||
int j;
|
||||
/* search for y2 = pathy[j] s.t. pathx[j] == x2 */
|
||||
for (j = i + n; j < pathlen; j++) {
|
||||
@@ -424,7 +500,8 @@ void Scorealign::presmooth()
|
||||
int k = i;
|
||||
int count = 0;
|
||||
while (pathx[k] < xmid) { // search first half
|
||||
if (near_line(x1, y1, x2, y2, pathx[k], pathy[k])) {
|
||||
if (near_line(float(x1), float(y1), float(x2), float(y2),
|
||||
pathx[k], pathy[k])) {
|
||||
count++;
|
||||
regr.point(pathx[k], pathy[k]);
|
||||
}
|
||||
@@ -437,7 +514,8 @@ void Scorealign::presmooth()
|
||||
}
|
||||
/* see if line fits top half of the data */
|
||||
while (pathx[k] < x2) {
|
||||
if (near_line(x1, y1, x2, y2, pathx[k], pathy[k])) {
|
||||
if (near_line(float(x1), float(y1), float(x2), float(y2),
|
||||
pathx[k], pathy[k])) {
|
||||
count++;
|
||||
regr.point(pathx[k], pathy[k]);
|
||||
}
|
||||
@@ -511,11 +589,6 @@ void Scorealign::presmooth()
|
||||
// make sure new path is no longer than original path
|
||||
// the last point we wrote was k - 1
|
||||
k = k - 1; // the last point we wrote is now k
|
||||
// DEBUG
|
||||
if (k > j) {
|
||||
printf("oops: k %d, j %d\n", k, j);
|
||||
SA_V(print_path_range(pathx, pathy, i, k);)
|
||||
}
|
||||
assert(k <= j);
|
||||
// if new path is shorter than original, then fix up path
|
||||
if (k < j) {
|
||||
@@ -539,19 +612,28 @@ void Scorealign::presmooth()
|
||||
*/
|
||||
void Scorealign::compute_regression_lines()
|
||||
{
|
||||
// first, compute the y value of the path at
|
||||
int i;
|
||||
// fill in time_map with NOT_MAPPED until the first point
|
||||
// of the path
|
||||
for (i = 0; i < pathx[0]; i++) {
|
||||
time_map[i] = NOT_MAPPED;
|
||||
}
|
||||
// now, compute the y value of the path at
|
||||
// each x value. If the path has multiple values
|
||||
// on x, take the average.
|
||||
int p = 0;
|
||||
int i;
|
||||
int upper, lower;
|
||||
for (i = 0; i < file1_frames; i++) {
|
||||
for (i = pathx[0]; p < pathlen; i++) {
|
||||
lower = pathy[p];
|
||||
while (p < pathlen && pathx[p] == i) {
|
||||
upper = pathy[p];
|
||||
p = p + 1;
|
||||
}
|
||||
time_map[i] = (lower + upper) * 0.5;
|
||||
time_map[i] = (lower + upper) * 0.5F;
|
||||
}
|
||||
// fill in rest of time_map with NOT_MAPPED
|
||||
for (i = pathx[pathlen - 1] + 1; i <= last_x; i++) {
|
||||
time_map[i] = NOT_MAPPED;
|
||||
}
|
||||
// now fit a line to the nearest WINDOW points and record the
|
||||
// line's y value for each x.
|
||||
@@ -559,115 +641,196 @@ void Scorealign::compute_regression_lines()
|
||||
}
|
||||
|
||||
|
||||
void Scorealign::midi_tempo_align(Alg_seq &seq, bool verbose)
|
||||
void Scorealign::midi_tempo_align(Alg_seq &seq)
|
||||
{
|
||||
// We create a new time map out of the alignment, and replace
|
||||
// the original time map in the Alg_seq sequence
|
||||
Alg_seq new_time_map_seq;
|
||||
|
||||
/** align at all integer beats **/
|
||||
int totalbeats;
|
||||
float dur_in_sec;
|
||||
// probably alignment should respect the real_dur encoded into the seq
|
||||
// rather than computing real_dur based on note off times -- the
|
||||
// caller should be required to set real_dur to a good value, and
|
||||
// the find_midi_duration() function should be available to the caller
|
||||
// if necessary -RBD
|
||||
find_midi_duration(seq, &dur_in_sec);
|
||||
//
|
||||
// totalbeat = lastbeat + 1 and round up the beat
|
||||
totalbeats = (int) (seq.get_time_map()->time_to_beat(dur_in_sec) + 2);
|
||||
if (verbose)
|
||||
// totalbeats = lastbeat + 1 and round up the beat
|
||||
int totalbeats = (int) seq.get_beat_dur() + 2;
|
||||
if (verbose) {
|
||||
double dur_in_sec = seq.get_real_dur();
|
||||
printf("midi duration = %f, totalbeats=%i \n", dur_in_sec, totalbeats);
|
||||
|
||||
}
|
||||
#ifdef DEBUG_LOG
|
||||
fprintf(dbf, "***************** CONSTRUCTING TIME MAP ***************\n");
|
||||
#endif
|
||||
// turn off last tempo flag so last tempo will extrapolate
|
||||
new_time_map_seq.get_time_map()->last_tempo_flag = false;
|
||||
int first_beat = -1;
|
||||
for (int i = 0; i < totalbeats; i++) {
|
||||
double newtime = map_time(seq.get_time_map()->beat_to_time(i));
|
||||
if (newtime > 0)
|
||||
double newtime = map_time(float(seq.get_time_map()->beat_to_time(i)));
|
||||
if (newtime > 0) {
|
||||
new_time_map_seq.insert_beat(newtime, (double) i);
|
||||
// remember where the new time map begins
|
||||
if (first_beat < 0) first_beat = i;
|
||||
#ifdef DEBUG_LOG
|
||||
fprintf(dbf, "map beat %d to time %g\n", i, newtime);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
seq.convert_to_beats();
|
||||
seq.set_time_map(new_time_map_seq.get_time_map());
|
||||
double end_beat = seq.get_dur();
|
||||
Alg_time_map_ptr map = new_time_map_seq.get_time_map();
|
||||
seq.set_time_map(map);
|
||||
// the new time map begins where the alignment began, but due to
|
||||
// smoothing and rounding, there may be some edge effects.
|
||||
// Try to set the tempo before the first_beat to match the tempo
|
||||
// at the first beat by introducing another time map point at least
|
||||
// one beat before the first_beat. To do this, we need at least
|
||||
// 2 beats before first_beat and at least 2 beats in the time map
|
||||
// (needed to compute initial tempo). Furthermore, the tempo at
|
||||
// first_beat could be so slow that we do not have enough time
|
||||
// before first_beat to anticipate the tempo.
|
||||
if (first_beat >= 2 && totalbeats > first_beat + 1) {
|
||||
int new_beat = first_beat / 2;
|
||||
// compute initial tempo from first_beat and first_beat + 1
|
||||
int i = map->locate_beat(first_beat);
|
||||
double t1 = map->beats[i].time;
|
||||
double t2 = map->beats[i + 1].time;
|
||||
double spb = (t2 - t1); // seconds per beat, beat period
|
||||
double new_time = t1 - (first_beat - new_beat) * spb;
|
||||
if (new_time <= 0.2) {
|
||||
// not enough time to start at new_time, new_beat
|
||||
// let's try using half the time rather than half the beats
|
||||
new_time = t1 / 2.0;
|
||||
// this will round down, so new_beat < first_beat
|
||||
new_beat = int(first_beat - (t1 / 2) / spb);
|
||||
new_time = t1 - (first_beat - new_beat) * spb;
|
||||
}
|
||||
// need to check again if new_beat would be too early
|
||||
if (new_time > 0.2) {
|
||||
map->insert_beat(new_time, new_beat);
|
||||
}
|
||||
}
|
||||
// Note: final tempo is extrapolated, so no need to insert new
|
||||
// time map points beyond the last one
|
||||
seq.set_dur(end_beat);
|
||||
#ifdef DEBUG_LOG
|
||||
fprintf(dbf, "\nend_beat %g end time %g\n",
|
||||
seq.get_beat_dur(), seq.get_real_dur());
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// this routine performs an alignment by adjusting midi to match audio
|
||||
//
|
||||
void Scorealign::align_midi_to_audio(Alg_seq &seq, Audio_reader &reader,
|
||||
bool verbose)
|
||||
int Scorealign::align_midi_to_audio(Alg_seq &seq, Audio_reader &reader)
|
||||
{
|
||||
/* Generate the chroma for file 1
|
||||
float dur = 0.0F;
|
||||
int nnotes = find_midi_duration(seq, &dur);
|
||||
if (progress) {
|
||||
progress->set_frame_period(frame_period);
|
||||
progress->set_smoothing(line_time > 0.0);
|
||||
progress->set_duration(0, false, dur);
|
||||
progress->set_duration(1, true, float(reader.actual_frame_period *
|
||||
reader.frame_count));
|
||||
progress->set_phase(0);
|
||||
}
|
||||
/* Generate the chroma for file 0
|
||||
* This will always be the MIDI File when aligning midi with audio.
|
||||
*/
|
||||
file1_frames = gen_chroma_midi(seq, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy1, &actual_frame_period_1, 1, verbose);
|
||||
file0_frames = gen_chroma_midi(seq, dur, nnotes, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy0, &actual_frame_period_0, 0);
|
||||
|
||||
/* Generate the chroma for file 2 */
|
||||
file2_frames = gen_chroma_audio(reader, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy2, &actual_frame_period_2, 2, verbose);
|
||||
|
||||
align_chromagrams(verbose);
|
||||
/* Generate the chroma for file 1 */
|
||||
if (progress) progress->set_phase(1);
|
||||
file1_frames = gen_chroma_audio(reader, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy1, &actual_frame_period_1, 1);
|
||||
return align_chromagrams();
|
||||
}
|
||||
|
||||
void Scorealign::align_audio_to_audio(Audio_reader &reader1,
|
||||
Audio_reader &reader2, bool verbose)
|
||||
int Scorealign::align_audio_to_audio(Audio_reader &reader0,
|
||||
Audio_reader &reader1)
|
||||
{
|
||||
if (progress) {
|
||||
progress->set_frame_period(frame_period);
|
||||
progress->set_duration(0, true, float(reader0.actual_frame_period *
|
||||
reader0.frame_count));
|
||||
progress->set_duration(1, true, float(reader1.actual_frame_period *
|
||||
reader1.frame_count));
|
||||
|
||||
progress->set_phase(0);
|
||||
progress->set_smoothing(line_time > 0.0);
|
||||
}
|
||||
file0_frames = gen_chroma_audio(reader0, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy0, &actual_frame_period_0, 0);
|
||||
|
||||
if (progress) progress->set_phase(1);
|
||||
file1_frames = gen_chroma_audio(reader1, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy1, &actual_frame_period_1, 1, verbose);
|
||||
file2_frames = gen_chroma_audio(reader2, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy2, &actual_frame_period_2, 2, verbose);
|
||||
align_chromagrams(verbose);
|
||||
&chrom_energy1, &actual_frame_period_1, 1);
|
||||
|
||||
return align_chromagrams();
|
||||
}
|
||||
|
||||
|
||||
void Scorealign::align_midi_to_midi(Alg_seq &seq1, Alg_seq &seq2,
|
||||
bool verbose)
|
||||
int Scorealign::align_midi_to_midi(Alg_seq &seq0, Alg_seq &seq1)
|
||||
{
|
||||
file1_frames = gen_chroma_midi(seq1, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy1, &actual_frame_period_1, 1, verbose);
|
||||
float dur0 = 0.0F;
|
||||
int nnotes0 = find_midi_duration(seq0, &dur0);
|
||||
float dur1 = 0.0F;
|
||||
int nnotes1 = find_midi_duration(seq1, &dur1);
|
||||
if (progress) {
|
||||
progress->set_frame_period(frame_period);
|
||||
progress->set_smoothing(line_time > 0.0);
|
||||
progress->set_duration(0, false, dur0);
|
||||
progress->set_duration(1, false, dur1);
|
||||
|
||||
file2_frames = gen_chroma_midi(seq2, HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy2, &actual_frame_period_2, 2, verbose);
|
||||
progress->set_phase(0);
|
||||
}
|
||||
file0_frames = gen_chroma_midi(seq0, dur0, nnotes0,
|
||||
HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy0, &actual_frame_period_0, 0);
|
||||
|
||||
align_chromagrams(verbose);
|
||||
if (progress) progress->set_phase(1);
|
||||
file1_frames = gen_chroma_midi(seq1, dur1, nnotes1,
|
||||
HIGH_CUTOFF, LOW_CUTOFF,
|
||||
&chrom_energy1, &actual_frame_period_1, 1);
|
||||
|
||||
return align_chromagrams();
|
||||
}
|
||||
|
||||
void Scorealign::align_chromagrams(bool verbose)
|
||||
int Scorealign::align_chromagrams()
|
||||
{
|
||||
if (progress) progress->set_phase(2);
|
||||
if (verbose)
|
||||
printf("\nGenerated Chroma.\n");
|
||||
/* now that we have actual_frame_period_2, we can compute smooth */
|
||||
/* now that we have actual_frame_period_1, we can compute smooth */
|
||||
// smooth is an odd number of frames that spans about smooth_time
|
||||
smooth = ROUND(smooth_time / actual_frame_period_2);
|
||||
smooth = ROUND(smooth_time / actual_frame_period_1);
|
||||
if (smooth < 3) smooth = 3;
|
||||
if (!(smooth & 1)) smooth++; // must be odd
|
||||
if (verbose) {
|
||||
printf("smoothing time is %g\n", smooth_time);
|
||||
printf("smooth count is %d\n", smooth);
|
||||
}
|
||||
/* Normalize the chroma frames */
|
||||
norm_chroma(file1_frames, chrom_energy1);
|
||||
SA_V(printf("Chromagram data for file 0:\n");)
|
||||
SA_V(print_chroma_table(chrom_energy0, file0_frames);)
|
||||
SA_V(printf("Chromagram data for file 1:\n");)
|
||||
SA_V(print_chroma_table(chrom_energy1, file1_frames);)
|
||||
norm_chroma(file2_frames, chrom_energy2);
|
||||
SA_V(printf("Chromagram data for file 2:\n");)
|
||||
SA_V(print_chroma_table(chrom_energy2, file2_frames);)
|
||||
if (verbose)
|
||||
printf("Normalized Chroma.\n");
|
||||
|
||||
/* Compare the chroma frames */
|
||||
compare_chroma(verbose);
|
||||
int result = compare_chroma();
|
||||
if (result != SA_SUCCESS) {
|
||||
return result;
|
||||
}
|
||||
if (progress) progress->set_phase(3);
|
||||
/* Compute the smooth time map now for use by curve-fitting */
|
||||
compute_regression_lines();
|
||||
/* if line_time is set, do curve-fitting */
|
||||
if (line_time > 0.0) {
|
||||
curve_fitting(this, verbose);
|
||||
/* Redo the smooth time map after curve fitting or smoothing */
|
||||
compute_regression_lines();
|
||||
}
|
||||
/* if presmooth_time is set, do presmoothing */
|
||||
if (presmooth_time > 0.0) {
|
||||
presmooth();
|
||||
/* Redo the smooth time map after curve fitting or smoothing */
|
||||
compute_regression_lines();
|
||||
}
|
||||
/* if line_time is set, do curve-fitting */
|
||||
if (line_time > 0.0) {
|
||||
curve_fitting(this, verbose);
|
||||
/* Redo the smooth time map after curve fitting or smoothing */
|
||||
compute_regression_lines();
|
||||
}
|
||||
if (progress) progress->set_phase(4);
|
||||
return SA_SUCCESS;
|
||||
}
|
||||
|
@@ -12,52 +12,142 @@
|
||||
#define SA_V(stmt)
|
||||
#endif
|
||||
|
||||
// a class to report (optionally) score alignment progress
|
||||
class SAProgress {
|
||||
public:
|
||||
SAProgress() { smoothing = false; }
|
||||
// we need the frame period to convert seconds to work units
|
||||
// call this before set_duration()
|
||||
virtual void set_frame_period(double seconds) { frame_period = seconds; };
|
||||
// index = 0 or 1 to tell which file (first or second)
|
||||
// is_audio = true (audio) or false (midi)
|
||||
// seconds = duration of audio or midi data
|
||||
virtual void set_duration(int index, bool audio_flag, double seconds) {
|
||||
durations[index] = seconds;
|
||||
is_audio[index] = audio_flag; };
|
||||
// if fitting pwl path to path, set smoothing to true
|
||||
virtual void set_smoothing(bool s) { smoothing = s; }
|
||||
// which alignment phase are we working on?
|
||||
// 0 = first file chroma, 1 = second file chroma, 2 = compute matrix,
|
||||
// 3 = smoothing
|
||||
// Note: set_phase(0) is REQUIRED and must be called only ONCE.
|
||||
// This is when we calculate total work
|
||||
// and initialize any local state needed to handle set_feature_progress()
|
||||
// and set_matrix_progress().
|
||||
virtual void set_phase(int i) { phase = i; };
|
||||
// how many seconds have we processed (in phase 1 or 2)
|
||||
// return value is normally true; false is request to cancel
|
||||
virtual bool set_feature_progress(float seconds) { return true; };
|
||||
// report that some matrix elements have been computed?
|
||||
// return value is normally true; false is request to cancel
|
||||
virtual bool set_matrix_progress(int cells) { return true; };
|
||||
// report iterations of line smoothing
|
||||
virtual bool set_smoothing_progress(int i) { return true; };
|
||||
protected:
|
||||
double frame_period;
|
||||
int phase;
|
||||
double durations[2];
|
||||
bool is_audio[2];
|
||||
bool smoothing;
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
SA_SUCCESS = 0,
|
||||
SA_TOOSHORT,
|
||||
SA_CANCEL
|
||||
};
|
||||
|
||||
|
||||
#define SA_DFT_FRAME_PERIOD 0.2
|
||||
#define SA_DFT_FRAME_PERIOD_TEXT wxT("0.20 secs")
|
||||
|
||||
#define SA_DFT_WINDOW_SIZE 0.2
|
||||
#define SA_DFT_WINDOW_SIZE_TEXT wxT("0.20 secs")
|
||||
|
||||
#define SA_DFT_FORCE_FINAL_ALIGNMENT true
|
||||
#define SA_DFT_FORCE_FINAL_ALIGNMENT_STRING wxT("true")
|
||||
|
||||
#define SA_DFT_IGNORE_SILENCE true
|
||||
#define SA_DFT_IGNORE_SILENCE_STRING wxT("true")
|
||||
|
||||
#define SA_DFT_SILENCE_THRESHOLD 0.1
|
||||
#define SA_DFT_SILENCE_THRESHOLD_TEXT wxT("0.100")
|
||||
|
||||
#define SA_DFT_PRESMOOTH_TIME 0
|
||||
#define SA_DFT_PRESMOOTH_TIME_TEXT wxT("(off)")
|
||||
|
||||
#define SA_DFT_LINE_TIME 0
|
||||
#define SA_DFT_LINE_TIME_TEXT wxT("(off)")
|
||||
|
||||
#define SA_DFT_SMOOTH_TIME 1.75
|
||||
#define SA_DFT_SMOOTH_TIME_TEXT wxT("1.75 secs")
|
||||
|
||||
|
||||
class Scorealign {
|
||||
public:
|
||||
float frame_period; // time in seconds
|
||||
float window_size;
|
||||
float presmooth_time;
|
||||
float line_time;
|
||||
float smooth_time; // duration of smoothing window
|
||||
double frame_period; // time in seconds
|
||||
double window_size;
|
||||
double silence_threshold;
|
||||
bool force_final_alignment;
|
||||
bool ignore_silence;
|
||||
double presmooth_time;
|
||||
double line_time;
|
||||
double smooth_time; // duration of smoothing window
|
||||
int smooth; // number of points used to compute the smooth time map
|
||||
|
||||
Scorealign() {
|
||||
frame_period = 0.25;
|
||||
window_size = 0.25;
|
||||
presmooth_time = 0.0;
|
||||
line_time = 0.0;
|
||||
smooth_time = 1.75;
|
||||
pathlen = 0;
|
||||
path_count = 0;
|
||||
pathx = NULL;
|
||||
pathy = NULL;
|
||||
}
|
||||
Scorealign();
|
||||
~Scorealign();
|
||||
|
||||
~Scorealign() {
|
||||
if (pathx) free(pathx);
|
||||
if (pathy) free(pathy);
|
||||
}
|
||||
SAProgress *progress;
|
||||
bool verbose;
|
||||
|
||||
// chromagrams and lengths, path data
|
||||
float *chrom_energy0;
|
||||
int file0_frames; // number of frames in file0
|
||||
float *chrom_energy1;
|
||||
int file1_frames; // number of frames in file1
|
||||
float *chrom_energy2;
|
||||
int file2_frames; //number of frames in file2
|
||||
int file1_frames; //number of frames in file1
|
||||
// pathx, pathy, and pathlen describe the shortest path through the
|
||||
// matrix from first_x, first_y to last_x, last_y (from the first
|
||||
// non-silent frame to the last non-silent frame). The length varies
|
||||
// depending upon the amount of silence that is ignored and how many
|
||||
// path steps are diagonal.
|
||||
short *pathx; //for midi (when aligning midi and audio)
|
||||
short *pathy; //for audio (when aligning midi and audio)
|
||||
int pathlen;
|
||||
// first_x, first_y, last_x, last_y are the starting and ending
|
||||
// points of the path. (It's not 0, 0, file0_frames, file1_frames
|
||||
// because silent frames may be trimmed from beginning and ending.
|
||||
int first_x;
|
||||
int first_y;
|
||||
int last_x;
|
||||
int last_y;
|
||||
|
||||
void set_pathlen(int p) { pathlen = p; }
|
||||
// time_map is, for each sequence 0 frame, the time of the matching
|
||||
// frame in sequence 1. If the path associates a frame of sequence 0
|
||||
// with multiple frames in sequence 1, the sequence 1 frame times
|
||||
// are averaged. The frames that are not mapped to sequence 1 are
|
||||
// marked with a time of -9999 or NOT_MAPPED.
|
||||
// These will be silent frames of sequence 0.
|
||||
#define NOT_MAPPED -9999.0F
|
||||
float *time_map;
|
||||
// smooth_time_map is a smoothed version of time_map. It also has
|
||||
// non-mapped frames marked with times of -9999 or NOT_MAPPED.
|
||||
// Because of smoothing, frames in smooth_time_map may map to
|
||||
// negative times in sequence 1.
|
||||
// These negative times will not be as negative as -9999, but
|
||||
// the recommended coding style is to compare for equality with
|
||||
// NOT_MAPPED to test for that value.
|
||||
float *smooth_time_map;
|
||||
|
||||
// chroma vectors are calculated from an integer number of samples
|
||||
// that approximates the nominal frame_period. Actual frame period
|
||||
// is calculated and stored here:
|
||||
// time in seconds for midi (when aligning midi and audio)
|
||||
float actual_frame_period_1;
|
||||
double actual_frame_period_0;
|
||||
// time in seconds for audio (when aligning midi and audio)
|
||||
float actual_frame_period_2;
|
||||
double actual_frame_period_1;
|
||||
|
||||
/* gen_chroma.cpp stuff:
|
||||
generates the chroma energy for a given file
|
||||
@@ -69,36 +159,43 @@ class Scorealign {
|
||||
(i.e. the length of the 1st dimention of chrom_energy
|
||||
*/
|
||||
int gen_chroma_audio(Audio_reader &reader, int hcutoff, int lcutoff,
|
||||
float **chrom_energy, float *actual_frame_period,
|
||||
int id, bool verbose);
|
||||
float **chrom_energy, double *actual_frame_period,
|
||||
int id);
|
||||
|
||||
int gen_chroma_midi(Alg_seq &seq, int hcutoff, int lcutoff,
|
||||
float **chrom_energy, float *actual_frame_period,
|
||||
int id, bool verbose);
|
||||
int gen_chroma_midi(Alg_seq &seq, float dur, int nnotes,
|
||||
int hcutoff, int lcutoff,
|
||||
float **chrom_energy, double *actual_frame_period,
|
||||
int id);
|
||||
|
||||
/* comp_chroma.cpp stuff */
|
||||
/* GEN_DIST
|
||||
*
|
||||
* This function generates the Euclidean distance for points i
|
||||
* and j in two chroma vectors for use with dynamic time warping of
|
||||
* the chroma vectors.
|
||||
*/
|
||||
float gen_dist(int i, int j);
|
||||
|
||||
/* scorealign.cpp stuff: */
|
||||
float map_time(float t1);
|
||||
void midi_tempo_align(Alg_seq &seq , char *midiname, char *beatname);
|
||||
void align_midi_to_audio(Alg_seq &seq, Audio_reader &reader,
|
||||
bool verbose);
|
||||
void align_midi_to_midi(Alg_seq &seq1, Alg_seq &seq2, bool verbose);
|
||||
void align_audio_to_audio(Audio_reader &reader1,
|
||||
Audio_reader &reader2, bool verbose);
|
||||
void align_chromagrams(bool verbose);
|
||||
int align_midi_to_audio(Alg_seq &seq, Audio_reader &reader);
|
||||
int align_midi_to_midi(Alg_seq &seq0, Alg_seq &seq2);
|
||||
int align_audio_to_audio(Audio_reader &reader1, Audio_reader &reader2);
|
||||
int align_chromagrams();
|
||||
|
||||
int path_count; // for debug log formatting
|
||||
void path_step(int i, int j);
|
||||
void path_reverse();
|
||||
int sec_to_pathy_index(float sec);
|
||||
void compare_chroma(bool verbose);
|
||||
int compare_chroma();
|
||||
void linear_regression(int n, int width, float &a, float &b);
|
||||
void compute_smooth_time_map();
|
||||
void presmooth();
|
||||
void compute_regression_lines();
|
||||
void midi_tempo_align(Alg_seq &seq, bool verbose);
|
||||
void midi_tempo_align(Alg_seq &seq);
|
||||
};
|
||||
|
||||
#define DEBUG_LOG 0
|
||||
//#define DEBUG_LOG 1
|
||||
#if DEBUG_LOG
|
||||
extern FILE *dbf;
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user