1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-10-25 07:43:54 +02:00
Files
audacity/lib-src/libscorealign/main.cpp
2013-11-01 15:59:33 +00:00

408 lines
13 KiB
C++

/* main.cpp -- the command line interface for scorealign
*
* 14-Jul-08 RBD
*/
#include "stdio.h"
#include "main.h"
#include <fstream>
#include "allegro.h"
#include "audioreader.h"
#include "scorealign.h"
#include "sautils.h"
#include "alignfiles.h"
#include "gen_chroma.h"
#include "comp_chroma.h"
// a global object with score alignment parameters and data
Scorealign sa;
static void print_usage(const char *progname)
{
printf("\nUsage: %s [-<flags> [<period> <windowsize> <path> <smooth> "
"<trans> <midi> <beatmap> <image>]] <file0> [<file1>]\n", progname);
printf(" specifying only <file0> simply transcribes MIDI in <file0> "
"to\n");
printf(" transcription.txt. Otherwise, align <file0> and <file1>.\n");
printf(" -h 0.25 indicates a frame period of 0.25 seconds\n");
printf(" -w 0.25 indicates a window size of 0.25 seconds.\n");
printf(" -d 0.1 indicates a silence threshold of RMS=0.1.\n");
printf(" -r indicates filename to write raw alignment path to "
"(default path.data)\n");
printf(" -s is filename to write smoothed alignment path(default is "
"smooth.data)\n");
printf(" -t is filename to write the time aligned transcription "
"(default is transcription.txt)\n");
printf(" -m is filename to write the time aligned midi file "
"(default is midi.mid)\n");
printf(" -b is filename to write the time aligned beat times "
"(default is beatmap.txt)\n");
printf(" -i is filename to write an image of the distance matrix "
"(default is distance.pnm)\n");
printf(" -o 2.0 indicates a smoothing window time of 2.0s\n");
printf(" -p 3.0 indicates presmoothing with a 3s window\n");
printf(" -x 6.0 indicates 6s line segment approximation\n");
#if (defined (_WIN32) || defined (WIN32))
printf(" This is a Unix style command line application which\n"
" should be run in a MSDOS box or Command Shell window.\n\n");
printf(" Type RETURN to exit.\n") ;
getchar();
#endif
} /* print_usage */
/* SAVE_SMOOTH_FILE
saves the smooth time map in SMOOTH_FILENAME
*/
void save_smooth_file(const char *smooth_filename, Scorealign &sa) {
FILE *smoothf = fopen(smooth_filename, "w");
assert(smoothf);
for (int i = 0; i < sa.file0_frames; i++) {
fprintf(smoothf, "%g \t%g\n", i * sa.actual_frame_period_0,
sa.smooth_time_map[i] * sa.actual_frame_period_1);
}
fclose(smoothf);
}
/* PRINT_BEAT_MAP
prints the allegro beat_map (for debugging) which contain
the time, beat pair for a song
*/
void print_beat_map(Alg_seq &seq, const char *filename) {
FILE *beatmap_print = fopen(filename, "w");
Alg_beats &b = seq.get_time_map()->beats;
long num_beats = seq.get_time_map()->length();
for(int i = 0; i < num_beats; i++) {
fprintf(beatmap_print," %f %f \n", b[i].beat, b[i].time);
}
fclose(beatmap_print);
}
/* EDIT_TRANSCRIPTION
edit the allegro time map structure according
to the warping and output a midi file and transcription
file
*/
void edit_transcription(Alg_seq &seq , bool warp, FILE *outf,
const char *midi_filename, const char *beat_filename) {
int note_x = 1;
seq.convert_to_seconds();
Alg_iterator iter(&seq, false); // no note-offs
iter.begin();
Alg_event_ptr e = iter.next();
while (e) {
if (e->is_note()) {
Alg_note_ptr n = (Alg_note_ptr) e;
fprintf(outf, "%d %ld %d %d ",
note_x++, n->chan, ROUND(n->pitch), ROUND(n->loud));
// now compute onset time mapped to audio time
double start = n->time;
double finish = n->time + n->dur;
if (warp) {
start = sa.map_time(start);
finish = sa.map_time(finish);
}
fprintf(outf, "%.3f %.3f\n", start, finish-start);
}
e = iter.next();
}
iter.end();
fclose(outf);
if (warp) {
// align the midi file and write out
sa.midi_tempo_align(seq);
seq.smf_write(midi_filename);
print_beat_map(seq, beat_filename);
}
}
// save image of distance matrix
void save_image(const char *image_filename, Scorealign &sa)
{
FILE *outf = fopen(image_filename, "wb");
if (!outf) {
fprintf(stderr, "Error: could not open %s for write.\n",
image_filename);
}
float max_d = 0.0;
float min_d = 999999.0;
fputs("P5\n", outf);
fprintf(outf, "%d %d 255\n", sa.file1_frames, sa.file0_frames);
for (int row = 0; row < sa.file0_frames; row++) {
for (int col = 0; col < sa.file1_frames; col++) {
float d = sa.gen_dist(row, col);
#ifdef DEBUG_LOG
fprintf(dbf, "%d %d %g\n", row, col, d);
#endif
if (d > max_d) max_d = d;
if (d < min_d) min_d = d;
int pixel = (int) (255 * (d / 6.0) + 0.5);
if (pixel > 255) pixel = 255;
putc(pixel, outf);
}
}
fclose(outf);
printf("max distance %g, min distance %g\n", max_d, min_d);
}
/* SAVE_TRANSCRIPTION
write note data corresponding to audio file
assume audio file is file 1 and midi file is file 2
so pathx is index into audio, pathy is index into MIDI
If warp is false, simply write a transcription of the midi file.
Every note has 6 fields separated by a space character. The fields are:
<sequence number> <channel> <pitch> <velocity> <onset> <duration>
Where
<sequence number> is just an integer note number, e.g. 1, 2, 3, ...
<channel> is MIDI channel from 0 to 15
<pitch> is MIDI key number (60 = middle C)
<velocity> is MIDI key velocity (1 to 127)
<onset> is time in seconds, rounded to 3 decimal places (milliseconds)
<duration> is time in seconds, rounded to 3 decimal places
*/
void save_transcription(const char *file0, const char *file1,
bool warp, const char *filename, const char *smooth_filename,
const char *midi_filename, const char *beat_filename)
{
const char *midiname; //midi file to be read
const char *audioname; //audio file to be read
if (warp) save_smooth_file(smooth_filename, sa);
//If either is a midifile
if (is_midi_file(file0) || is_midi_file(file1)) {
if (is_midi_file(file0)) {
midiname=file0;
audioname=file1;
} else {
midiname=file1;
audioname=file0;
}
Alg_seq seq(midiname, true);
FILE *outf = fopen(filename, "w");
if (!outf) {
printf("Error: could not open %s\n", filename);
return;
}
fprintf(outf, "# transcription of %s\n", midiname);
if (warp) {
fprintf(outf, "# note times are aligned to %s\n", audioname);
} else {
fprintf(outf, "# times are unmodified from those in MIDI file\n");
}
fprintf(outf, "# transcription format : <sequence number> "
"<channel> <pitch> <velocity> <onset> <duration>\n");
edit_transcription(seq, warp, outf, midi_filename, beat_filename);
}
}
/* SAVE_PATH
write the alignment path to FILENAME
*/
void save_path(const char *filename, int pathlen, short* pathx, short *pathy,
float actual_frame_period_0, float actual_frame_period_1)
{
// print the path to a (plot) file
FILE *pathf = fopen(filename, "w");
assert(pathf);
int p;
for (p = 0; p < pathlen; p++) {
fprintf(pathf, "%g %g\n", pathx[p] * actual_frame_period_0,
pathy[p] * actual_frame_period_1);
}
fclose(pathf);
}
/*
Prints the chroma table (for debugging)
*/
void print_chroma_table(const float *chrom_energy, int frames)
{
int i, j;
for (j = 0; j < frames; j++) {
for (i = 0; i <= CHROMA_BIN_COUNT; i++) {
printf("%5.2f | ", AREF2(chrom_energy, j, i));
}
printf("\n");
}
}
int main(int argc, char *argv [])
{
const char *progname, *infilename0, *infilename1;
const char *smooth_filename, *path_filename, *trans_filename;
const char *midi_filename, *beat_filename, *image_filename;
//just transcribe if trasncribe == 1
int transcribe = 0;
// Default for the user definable parameters
path_filename = "path.data";
smooth_filename = "smooth.data";
trans_filename = "transcription.txt";
midi_filename = "midi.mid";
beat_filename = "beatmap.txt";
image_filename = "distance.pnm";
progname = strrchr(argv [0], '/');
progname = progname ? progname + 1 : argv[0] ;
// If no arguments, return usage
if (argc < 2) {
print_usage(progname);
return 1;
}
/*******PARSING CODE BEGINS*********/
int i = 1;
while (i < argc) {
//expected flagged argument
if (argv[i][0] == '-') {
char flag = argv[i][1];
if (flag == 'h') {
sa.frame_period = atof(argv[i+1]);
} else if (flag == 'w') {
sa.window_size = atof(argv[i+1]);
} else if (flag == 'r') {
path_filename = argv[i+1];
} else if (flag == 's') {
smooth_filename = argv[i+1];
} else if (flag == 't') {
trans_filename = argv[i+1];
} else if (flag == 'm') {
midi_filename = argv[i+1];
} else if (flag == 'i') {
image_filename = argv[i+1];
} else if (flag == 'b') {
beat_filename = argv[i+1];
} else if (flag == 'o') {
sa.smooth_time = atof(argv[i+1]);
} else if (flag == 'p') {
sa.presmooth_time = atof(argv[i+1]);
} else if (flag == 'x') {
sa.line_time = atof(argv[i+1]);
} else if (flag == 'd') {
sa.silence_threshold = atof(argv[i+1]);
}
i++;
}
// When aligning audio to midi we must force file0 to be midi
else {
// file 1 is midi
if (transcribe == 0) {
infilename0 = argv[i];
transcribe++;
}
// file 2 is audio or a second midi
else {
infilename1 = argv[i];
transcribe++;
}
}
i++;
}
/**********END PARSING ***********/
if (sa.presmooth_time > 0 && sa.line_time > 0) {
printf("WARNING: both -p and -x options selected.\n");
}
if (transcribe == 1) {
// if only one midi file, just write transcription and exit,
// no alignment
save_transcription(infilename0, "", false, trans_filename,NULL, NULL, NULL);
printf("Wrote %s\n", trans_filename);
goto finish;
}
// if midi only in infilename1, make it infilename0
if (is_midi_file(infilename1) && !is_midi_file(infilename0)) {
const char *temp;
temp = infilename0;
infilename0 = infilename1;
infilename1 = temp;
}
if (!align_files(infilename0, infilename1, sa, true /* verbose */)) {
printf("An error occurred, not saving path and transcription data\n");
goto finish;
}
if (sa.file0_frames <= 2 || sa.file1_frames <= 2) {
printf("Error: file frame counts are low: %d (for input 1) and %d "
"for input 2)\n...not saving path and transcription data\n",
sa.file0_frames, sa.file1_frames);
goto finish;
}
// save path
save_path(path_filename, sa.pathlen, sa.pathx, sa.pathy,
sa.actual_frame_period_0, sa.actual_frame_period_1);
// save image of distance matrix
save_image(image_filename, sa);
// save smooth, midi, transcription
save_transcription(infilename0, infilename1, true, trans_filename,
smooth_filename, midi_filename, beat_filename);
// print what the chroma matrix looks like
/*
printf("file0 chroma table: \n");
print_chroma_table(chrom_energy0,file0_frames);
printf("\nfile1 chroma table: \n");
print_chroma_table(chrom_energy1, file1_frames);
*/
// only path and smooth are written when aligning two audio files
if (is_midi_file(infilename0) || is_midi_file(infilename1))
printf("Wrote %s, %s, %s, and %s.\n", path_filename, smooth_filename,
trans_filename, beat_filename);
else
printf("Wrote %s and %s.", path_filename, smooth_filename);
finish:
#ifdef WIN32
printf("Type RETURN to exit\n");
getchar();
#endif
return 0 ;
} /* main */
/* print_path_range -- debugging output */
/**/
void print_path_range(const short *pathx, const short *pathy, int i, int j)
{
while (i <= j) {
printf("%d %d\n", pathx[i], pathy[i]);
i++;
}
}