diff --git a/lib-src/libnyquist/nyquist/cmupv/src/cmupv.c b/lib-src/libnyquist/nyquist/cmupv/src/cmupv.c new file mode 100644 index 000000000..1e81e6b25 --- /dev/null +++ b/lib-src/libnyquist/nyquist/cmupv/src/cmupv.c @@ -0,0 +1,1097 @@ +/* cmupv.c -- phase vocoder */ + +/* Computation is driven by demands for output. The client calls either +pv_get_output() or pv_get_output2(). Either way, output is returned +one blocksize at a time. The blocksize is set by pv_set_blocksize() +and defaults to the synthesis hopsize, which defaults to fftsize/8. + +Since the blocksize and hopsize are not necessarily matched in any +way, there is a buffer to accumulate the overlapping "grains" of sound +which we call the synthesis frames (each synthesis frame is computed +by adjusting phases of an analysis frame). The buffer is called +output_buffer, and the length (in floats) is output_buffer_len. + +The output_buffer_len has to be big enough to contain blocksize samples +which are about to be returned as output plus fftsize samples which +overlap with future synthesis frames that will be added later. The +output_buffer_len also has to be hopsize + fftsize samples (probably +these worst-case sizes are conservative, but it's easier to set +workable upper bounds than to think about all the special cases when +output_buffer_len, blocksize, hopsize, and fftsize are all arbitrary). + +The basic structure to produce blocksize samples is as follows: +out_next is the pointer to the next block of blocksize samples. This +is a pointer directly into output_buffer, initially the first sample +in output_buffer. + +We also keep a pointer frame_next, which is where the next synthesis +frame will be added into output_buffer. Thus, the number of samples +that have been completely computed but not output (i.e. no more +overlapping frames will be added) is frame_next - out_next. When +(frame_next - out_next) > blocksize, we can deliver blocksize samples +to the caller. + +The next constraint is that as we add overlapping synthesis frames into +output_buffer, we have to have room for them, so frame_next + fftsize +must not be greater than output_buffer + output_buffer_len. If this +constraint is not met, we need to shift everything toward the beginning +of the output_buffer. Since we've output everything up to out_next, we +shift by out_next - output_buffer. + +Because output_buffer_len >= blocksize + fftsize, we are guaranteed that +there is always room to add in any synthesis frames that overlap the +blocksize samples to be returned next. + +PHASE COHERENCE + +To preserve phase relationships when a sine spills over multiple bins +(which happens always due to windowing), we adjust phase based on peaks +in the magnitude spectrum. The phases of neighboring bins are adjusted +by the same angle. + +Specification: We want to divide the spectrum into peaks: between local +minima, adjust according to the peak. + +Algorithm: Set "previous" minimum to -1 and previous magnitude to 0 +- search for the "previous" peak +- iteratively do the following: + - search for the next minimum + - search for the next peak + - compute range of bins to modify, assign minimum to the largest peak + - compute the phase adjustment for the next peak + - apply the phase adjustment to range of bins + - set previous minumum to next minimum + - set previous peak to next peak +*/ + +/* BEGIN PV_SINE_TEST debugging code... + This debugging code is for testing a special case: the input is a sine tone +with amplitude about 1.0 and frequency about 689Hz at 44100Hz sample rate, +resulting in a period of exactly 64 samples, which will be bin 8 with an +fftsize of 512. + The goal here is to compute exactly what the phasevocoder should be doing +so we can compare to what it does. When SINETEST is enabled, data will be +printed. At the time this code is being written, the result with a stretch +factor of exactly 1.1 is a pulsing sound when the absolute interface is used. +With the absolute interface, the analysis hopsize is mostly 58, with a hop of +59 samples every 5 or 6 frames. +*/ +// #define PV_SINE_TEST 1 +#ifdef PV_SINE_TEST +double pvst_frequency = 44100.0 / 64.0; // about 689Hz +long pvst_offset = -1000; // -1 means we haven't seen first frame yet +// the first hop size is bogus, so we ignore it, then accumulate hop sizes here +#endif +/* END PV_SINE_TEST */ + +#include +#include +#include +#include +#include "internal.h" +// only needed for some debugging code which is probably commented out +#include "cmupvdbg.h" + +// define PHASE_FIX to tie neighboring bin phase to phase of spectral peaks +// #define PHASE_FIX 1 + +// debugging output on or off +// #define D if (1) +#define D if (0) + +// more debugging +#define DD if (0) + +#define LOGFFTSIZE_DEFAULT 11 +#define RATIO_DEFAULT 1 +#define BOOL int +#define FALSE 0 +#define TRUE 1 +#define TWOPI (2.0 * M_PI) + +// These macros are used for memory allocation and freeing. Note that +// only ZERO acts like a function. The rest only work on fields of pv. +#define PVFREE(field) if (pv->field) { pv->free(pv->field); pv->field = NULL; } +#define PVALLOC(field, size) \ + pv->field = (float *)pv->malloc((size) * sizeof(*(pv->field))) +#define PVREALLOC(field, size) { PVFREE(field); PVALLOC(field, size); } +#define ZERO(array, size) memset((array), 0, (size) * sizeof(*(array))) + + +#include "cmupv.h" +#include "fftext.h" + +typedef enum { + PV_UNINITIALIZED, + PV_START, + PV_GOT_COUNT, + PV_GOT_INPUT } pv_phase_type; + +struct position // each element in the structure array +{ + long ana_pos; // the sample number of the center of analysis frame + long syn_pos; // the sample number of the center of synthesis frame +}; + +typedef struct { + void *(*malloc)(size_t); // malloc is used to allocate memory. If you + // have a real-time system and want to avoid the standard library + // malloc() which may have priority inversion problems due to locks, + // you can supply your own lock-free implementation + void (*free)(void *); // if you provide a custom malloc, you should + // provide a matching custom free() + int blocksize; // the size of audio blocks produced by the phase vocoder. + int fftsize; // the number of samples in each FFT. Should be power of 2. + int log2_fft; // 2 is the log base + int syn_hopsize; // the hopsize used in reconstructing the output + float ratio; // the time-stretch ratio. NOTE: even though ratio is + // specified as the amount input is stretched, THIS ratio is + // the reciprocal, i.e. input duration / output duration + int max_ana_hopsize; // fftsize / 3 is max hopsize + float pre_ratio; // previous ratio is used to calculate + // the previous input_length + int mode; // 0 - normal, 1 - phase fix, 2 - robovoice + float *ana_win; // the window function used on input (analysis) + float *syn_win; // the window function used on output (synthesis) + long input_eff_pos; // input effective position is the sample number + // of the input that corresponds to the current output + float *input_buffer; // used to buffer input samples + long input_buffer_len; // how many floats can input_buffer hold? + float *output_buffer; // used to buffer output samples + long output_buffer_len; // how big in floats is the output buffer? + float *input_head; // pointer to the start of the previous + // analysis frame. input_head is updated by + // hopsize before reading each frame. + float *input_rear; // pointer to the end of the input data + int frames_to_compute; // how many frames we'll compute in pv_get_output() + int expected_input; // the value computed by pv_get_input_count() + // we check that pv_put_input delivers what we asked for + float *out_next; // pointer into the output buffer from where the + // next output sample will delivered + float *frame_next; // pointer into the output buffer to where the + // next frame will by added + Pv_callback callback; // function to retrieve an input frame + void *rock; // object pointer or context info to be passed to callback + pv_phase_type phase; + BOOL first_time; // true only on the first fft output frame + BOOL absolute; // true if using the callback protocol -- set by create2() + float *ana_frame; // analysis frame + float *syn_frame; // synthesis frame + float *mag; // magnitude for points in the frame being processed + float *ana_phase; // phase for points in the analysis frame + // being processed + float *syn_phase; // phase for points in the synthesis frame + // being processed + float *pre_ana_phase; // recording last analysis phase for estimating + // the frequency + float *pre_syn_phase; // recording last systhesis phase for rebuilting + // the new phase + float *bin_freq; // bin frequency, used in phase unwrapping; + + struct position *pos_buffer; // Circular array storing the sample + // number of the middle of the frames + // (both for analysis and synthesis frames) + struct position *pos_buffer_head; // beginning of the circular array, + // points to oldest entry. If equal to pos_buffer_rear, the queue + // is empty. Never points to + // pos_buffer + queue_length. It wraps around to pos_buffer. + struct position *pos_buffer_rear; // rear of the circular array, + // points to slot AFTER the most recent entry. If equal to + // pos_buffer_rear, the queue is empty. Never points to + // pos_buffer + queue_length. It wraps around to pos_buffer. + long queue_length; // length of the circular queue of corresponding times + long input_total; // how many input samples did we get so far? + // initially 0, and incremented upon pv_put_input() + // so the input_total count corresponds to the input_rear pointer. + long output_total; // how many output samples did we produce so far? + // initially 0, and incremented by block_size after each call to + // pv_get_output(). Corresponds to out_next. +} PV; + +//extern long int sig; +//extern long int sig1; +//extern long int sig2; + +// round_log_power - round fftsize up to a power of 2 +// return log2(rounded up fftsize) +// optionally set size_ptr to rounded up fftsize +// +int round_log_power(int fftsize, int *size_ptr) +{ + long double log2_fft = log2l(fftsize); + int round_log2_fft = (int) log2_fft; + if (round_log2_fft < log2_fft) { + round_log2_fft += 1; + } + if (log2_fft > 20 || ((1 << round_log2_fft) != fftsize)) { + round_log2_fft = 1024; // on error, substitute a sane value + } + if (size_ptr) *size_ptr = 1 << round_log2_fft; + return round_log2_fft; +} + + +Phase_vocoder pv_create(void *(*mallocfn)(size_t), void (*freefn)(void *)) +{ + if (!mallocfn) mallocfn = &malloc; + PV *pv = (PV *)mallocfn(sizeof(PV)); + ZERO(pv, 1); + pv->phase = PV_UNINITIALIZED; + pv->malloc = mallocfn; + pv->free = freefn; + pv_set_fftsize(pv, 1 << LOGFFTSIZE_DEFAULT); + // syn_hopsize will now be FFTSIZE_DEFAULT / 8 + // pv_set_syn_hopsize(pv, FFTSIZE_DEFAULT / 8); + pv->blocksize = pv->syn_hopsize; + pv_set_ratio(pv, RATIO_DEFAULT); + pv->first_time = TRUE; + pv->mode = 0; + return (Phase_vocoder)pv; +} + + +Phase_vocoder pv_create2(void *(*mallocfn)(size_t), void (*freefn)(void *), + Pv_callback callback, void *rock) +{ + PV *pv = (PV *)pv_create(mallocfn, freefn); + pv->absolute = TRUE; + pv_set_callback(pv, callback, rock); + return (Phase_vocoder)pv; +} + + +void pv_end(Phase_vocoder *x) +{ + PV *pv = (PV *)(*x); + fftFree(); + PVFREE(ana_win); + PVFREE(syn_win); + PVFREE(input_buffer); + PVFREE(output_buffer); + PVFREE(ana_frame); + PVFREE(syn_frame); + PVFREE(mag); + PVFREE(ana_phase); + PVFREE(syn_phase); + PVFREE(pre_ana_phase); + PVFREE(pre_syn_phase); + PVFREE(bin_freq); + PVFREE(pos_buffer); + pv->free(pv); + *x = NULL; +} + +void pv_set_callback(Phase_vocoder x, Pv_callback callback, void *rock) +{ + PV *pv = (PV *)x; + pv->callback = callback; + pv->rock = rock; +} + +void pv_set_blocksize(Phase_vocoder x, int n) +{ + PV *pv = (PV *)x; + pv->blocksize = n; + pv->phase = PV_UNINITIALIZED; +} + +void pv_set_fftsize(Phase_vocoder x, int n) +{ + PV *pv = (PV *)x; + // n must be power of 2 and for sanity, we'll require it to be at least 16 + // power of 2 test: only if n is a power of 2 will n-1 clear the high order + // bit: + if ((n & (n - 1)) || (n < 16)) { + return; // ignore bad argument + } + // preserve the same syn_hopsize ratio, e.g. if syn_hopsize + // is fftsize/8, then after setting fftsize, new syn_hopsize + // will be (new fftsize)/8 + int hop = (pv->syn_hopsize == 0 ? 8 : + pv->fftsize / pv->syn_hopsize); // the divisor + pv->fftsize = n; + pv->log2_fft = round_log_power(n, &(pv->fftsize)); + pv_set_syn_hopsize(x, n / hop); + pv->phase = PV_UNINITIALIZED; + pv->max_ana_hopsize = n / 3; +} + +void pv_set_ratio(Phase_vocoder x, float ratio) +{ + PV *pv = (PV *)x; + assert(pv->phase == PV_START || pv->phase == PV_UNINITIALIZED); + pv->pre_ratio = pv->ratio; + pv->ratio = 1.0F / ratio; +} + +void pv_set_syn_hopsize(Phase_vocoder x, int n) + // set the hopsize. Must be fftsize divided by a power of 2. + // non-power-of-two n will be rounded up. + // hopsize must be at least 1 and at most fftsize/4. + // out-of-bound n will be put within bounds +{ + PV *pv = (PV *)x; + if (n < 1) n = 1; + round_log_power(n, &(pv->syn_hopsize)); + if (pv->syn_hopsize > pv->fftsize / 4) { + pv->syn_hopsize = pv->fftsize / 4; + } + pv->phase = PV_UNINITIALIZED; +} + +void pv_initialize(Phase_vocoder x) +{ + PV *pv = (PV *)x; + + // allocate space and initialize for window + if (! pv->ana_win) + pv->ana_win = pv_window(pv, hann); //default analysis window is Hanning + if (! pv->syn_win) + pv->syn_win = pv_window(pv, hann); //default synthesis window is Hanning + + // allocate space and initialize for input buffer and output buffer + if (pv->blocksize <= pv->syn_hopsize) { + pv->input_buffer_len = pv->fftsize; + } else { + // The maximum of ana_hopsize is fftsize/3, so the + // pv->input_buffer_len is set to the maximum so as to avoid + // freeing and allocating memory for input buffer many times + // due to the changing of time-stretching ratio. + // The maximum amount needed is fftsize to produce one frame of + // output plus: fftsize/3 for each additional frame. The total + // number of frames we must compute to form blocksize samples + // of output is blocksize / hopsize. + pv->input_buffer_len = pv->fftsize + 2 /* to avoid rounding error */ + + lroundf((((float) pv->blocksize / pv->syn_hopsize) - 1) * (pv->fftsize / 3.0F)); + } + if (! pv->absolute) { + PVREALLOC(input_buffer, pv->input_buffer_len); + // preload with fftsize/2 zeros so that the first sample of the input + // will be at the center of the fft window + pv->input_head = pv->input_buffer; + ZERO(pv->input_buffer, pv->fftsize / 2); + pv->input_rear = pv->input_buffer + pv->fftsize / 2; + } + + // how long does the output buffer need to be? + // It has to be long enough to add in an entire fft frame, so + // at least fftsize. It should then be at least syn_hopsize - 2 + // bigger, so that if we're outputting syn_hopsize - 1 samples + // (a really bad choice, by the way), we could have syn_hopsize - 2 + // samples in the output buffer and the next buffer gets added + // starting at location syn_hopsize - 2, so we need syn_hopsize - 2 + + // fftsize. Let's make it an even syn_hopsize + fftsize. + // It also has to be long enough to hold an output buffer length + + // fftsize. + // So overall, we need max(syn_hopsize, blocksize) + fftsize. + // + pv->output_buffer_len = pv->blocksize; + if (pv->blocksize <= pv->syn_hopsize) { + pv->output_buffer_len = pv->syn_hopsize; + } + pv->output_buffer_len += pv->fftsize; + PVREALLOC(output_buffer, pv->output_buffer_len); + D printf("pv_initialize: input_buffer_len %ld\n", pv->input_buffer_len); + D printf(" output_buffer_len %ld\n", pv->output_buffer_len); + D printf(" blocksize %d\n", pv->blocksize); + D printf(" fftsize %d\n", pv->fftsize); + D printf(" syn_hopsize %d\n", pv->syn_hopsize); + pv->out_next = pv->output_buffer; + pv->frame_next = pv->output_buffer; + ZERO(pv->output_buffer, pv->output_buffer_len); + PVREALLOC(ana_frame, pv->fftsize); + PVREALLOC(syn_frame, pv->fftsize); + PVREALLOC(mag, pv->fftsize / 2 + 1); + // allocate space for phase and pre_phase which will be used in + // phase unwrapping + PVREALLOC(ana_phase, pv->fftsize / 2 + 1); + PVREALLOC(syn_phase, pv->fftsize / 2 + 1); + PVREALLOC(pre_ana_phase, pv->fftsize / 2 + 1); + PVREALLOC(pre_syn_phase, pv->fftsize / 2 + 1); + // bin frequency, used in phase unwrapping + PVREALLOC(bin_freq, pv->fftsize / 2 + 1); + int i; + for (i = 0; i <= pv->fftsize / 2; i++) + pv->bin_freq[i] = (float) (TWOPI * i / pv->fftsize); + // get_effective_pos() maps from the beginning of the + // next block to the corresponding input sample. Since + // the output hopsize is fixed, the next output sample + // was at the center of the frame if we go back by + // framesize / syn_hopsize frames. Thus we need + // framesize / syn_hopsize entries in the queue. We'll + // pad by a couple to deal with rounding issues: + pv->queue_length = pv->fftsize / (pv->syn_hopsize * 2) + 2; + if (!pv->absolute) { + PVFREE(pos_buffer); + pv->pos_buffer = (struct position *) + (pv->malloc((pv->queue_length + 1) * sizeof(struct position))); + + pv->pos_buffer_head = pv->pos_buffer; + pv->pos_buffer_rear = pv->pos_buffer; + } + // make sure tables are constructed before we start real-time processing + int fft_error_sign = fftInit(pv->log2_fft); // target fftInit + assert(!fft_error_sign); + + pv->phase = PV_START; +} + + +void pv_set_ana_window(Phase_vocoder x, float *window) +{ + PV *pv = (PV*)x; + PVREALLOC(ana_win, pv->fftsize); + memcpy(pv->ana_win, window, pv->fftsize * sizeof(float)); +} + + +void pv_set_syn_window(Phase_vocoder x, float *window) +{ + PV *pv = (PV*)x; + PVREALLOC(syn_win, pv->fftsize); + memcpy(pv->syn_win, window, pv->fftsize * sizeof(float)); +} + + +void pv_set_mode(Phase_vocoder x, int mode) +{ + PV *pv = (PV*)x; + if (mode >= 0 && mode <= 2) { + pv->mode = mode; + } +} + + +float *pv_window(Phase_vocoder x, float (*window_type)(double x)) + // window is after normalized +{ + PV *pv = (PV *)x; + float sum_window_square = 0, COLA_factor; + int window_length = pv->fftsize; + float *window = (float *)pv->malloc(window_length * sizeof(float)); + int i; + for (i = 0; i < window_length; i++) { + window[i] = window_type((double)i / window_length); + // note that the computation is all double even if window[i] is float + sum_window_square += window[i] * window[i]; + } + COLA_factor = sum_window_square / pv->syn_hopsize; + for (i = 0; i <= pv->fftsize - 1; i++) + window[i] = (float) (window[i] / sqrt(COLA_factor)); + return window; +} + + +int pv_get_input_count(Phase_vocoder x) +{ + PV *pv = (PV*)x; + int ana_hopsize = lroundf((pv->syn_hopsize) * (pv->ratio)); + if (ana_hopsize > pv->max_ana_hopsize) { + ana_hopsize = pv->max_ana_hopsize; + } + + assert(pv->phase == PV_START); + + // To produce blocksize, how many samples do we need? The next + // sample to output is at out_next, and the next frame will be + // added at frame_next, so we've already computed + // out_next - frame_next: + int need = pv->blocksize - (int)(pv->frame_next - pv->out_next); + // need is now the number of output samples we need + // How many fft frames will be required? Round up by adding hopsize-1: + int frames = (need + pv->syn_hopsize - 1) / pv->syn_hopsize; + if (frames > 0) { + // Skip hopsize frames except on the first time, where + // we always put first sample in the middle of the frame: + if (!pv->first_time) { + pv->input_head += ana_hopsize; + } + // We need framesize + hopsize * (frames - 1) in order to compute + // frames overlapping fft frames of size framesize. + need = pv->fftsize + ana_hopsize * (frames - 1); + // Now how many input samples do we have already? + long have = (long) (pv->input_rear - pv->input_head); + need -= have; + // See if we have room for need samples in the buffer: + if (pv->input_rear + need > pv->input_buffer + pv->input_buffer_len) { + // not enough room. Shift the input buffer to make space. + long shift = (long) (pv->input_head - pv->input_buffer); + memmove(pv->input_buffer, pv->input_head, + (pv->input_rear - pv->input_head) * + sizeof(*(pv->input_buffer))); + pv->input_head -= shift; + pv->input_rear -= shift; + D printf(" after input shift by %ld, head at %ld\n", + shift, (long) (pv->input_head - pv->input_buffer)); + } + // make sure our assumptions are true and we now have space: + assert(pv->input_rear + need <= + pv->input_buffer + pv->input_buffer_len); + // See if we have room in the output_buffer + // last sample will be at frame_next + (frames - 1) * hopsize + fftsize + float *last_output = pv->frame_next + + (frames - 1) * pv->syn_hopsize + pv->fftsize; + if (last_output > pv->output_buffer + pv->output_buffer_len) { + // not enough room. Shift the output buffer to make space. + long shift = (long) (pv->out_next - pv->output_buffer); + memmove(pv->output_buffer, pv->out_next, + (pv->fftsize - pv->syn_hopsize) * + sizeof(*(pv->output_buffer))); + pv->frame_next -= shift; + pv->out_next -= shift; + } + } else { + frames = 0; + need = 0; + } + pv->frames_to_compute = frames; + pv->phase = PV_GOT_COUNT; + pv->expected_input = need; + return need; +} + + +double pv_get_effective_pos(Phase_vocoder x) +{ + PV *pv = (PV*)x; + assert(pv->phase == PV_START); + + // Find the appropriate position struct for the computation of + // effective audio position. We are given pv->output_total, the + // sample count at which we want the equivalent input sample + // count. We want to interpolate between two queue entries, so + // we'll search the queue until we have an entry that is greater + // than output_total. Then we set head to the previous entry + // because it will make future searches go faster. + + struct position *pos_find = pv->pos_buffer_head; + struct position *pos_find_prev = NULL; + while (pos_find != pv->pos_buffer_rear && + pos_find->syn_pos <= pv->output_total) { + pos_find_prev = pos_find; + pos_find++; + if (pos_find == pv->pos_buffer + pv->queue_length) { + pos_find = pv->pos_buffer; // wrap + } + } + // if pos_find and pos_find_prev both point to something, we + // can interpolate: + if (pos_find != pv->pos_buffer_rear && pos_find_prev) { + // we can drop old positions from queue now: + pv->pos_buffer_head = pos_find_prev; + // interpolate + long output_step = pos_find->syn_pos - pos_find_prev->syn_pos; + long input_step = pos_find->ana_pos - pos_find_prev->ana_pos; + return pos_find_prev->ana_pos + input_step * + (double)(pv->output_total - pos_find_prev->syn_pos) / + (double)output_step; + // if there's nothing in the queue, then we must be starting. + // after the first frame there are TWO entries + } else if (pos_find_prev == NULL) { + // if any of these fail, maybe the queue_length is too small + // and we dropped some history too early + assert(pos_find == pv->pos_buffer_rear); + assert(pv->first_time); + assert(pv->output_total == 0); + return -(pv->ratio * pv->fftsize / 2.0); + } else { // I can't think of any other case. + assert(FALSE); + } +} + + +// Send samples to phase vocoder. size should match the number of +// samples computed by get_input_count. +// +void pv_put_input(Phase_vocoder x, int size, float *samples) + // 'samples' points to samples to be sent each time +{ + PV *pv = (PV *)x; + assert(pv->phase == PV_GOT_COUNT); + // size must agree with the value computed by pv_get_input_count: + assert(pv->expected_input == size); + D printf("pv_put_input: size %d, %g at %ld\n", + size, *samples, (long) (pv->input_rear - pv->input_buffer)); + if (size > 0) { + memcpy(pv->input_rear, samples, size * sizeof(*pv->input_rear)); + pv->input_rear += size; + pv->input_total += size; + } + pv->phase = PV_GOT_INPUT; +} + + +void compute_one_frame(PV *pv, int ana_hopsize) +{ + float *syn_frame = pv->syn_frame; + float *ana_frame = pv->ana_frame; + int fftsize = pv->fftsize; + int log2_fft = pv->log2_fft; + float *mag = pv->mag; + float *ana_phase = pv->ana_phase; + float *syn_phase = pv->syn_phase; + float *syn_win = pv->syn_win; + float *frame_next = pv->frame_next; + int syn_hopsize = pv->syn_hopsize; + float *pre_ana_phase = pv->pre_ana_phase; + float *pre_syn_phase = pv->pre_syn_phase; + float *bin_freq = pv->bin_freq; + int i; +//#define SKIP_PHASE_ADJUST +#ifdef SKIP_PHASE_ADJUST + // for debugging we just copy the windowed input to the output + // without adjusting phases. + memcpy(syn_frame, ana_frame, fftsize * sizeof(*syn_frame)); +#else + /*DBG + long zeros = pv->output_total + frame_next - pv->out_next; + write_pv_frame(zeros, ana_frame, fftsize, "pvsyn"); + DBG*/ + OneDimensionFFTshift(ana_frame, fftsize); // FFTshift + fftInit(log2_fft); + rffts(ana_frame, log2_fft, 1); + + /* get magnitude and phase */ + mag[0] = ana_frame[0]; + ana_phase[0] = 0; + mag[fftsize / 2] = ana_frame[1]; + ana_phase[fftsize / 2] = 0; + for (i = 1; i < fftsize / 2; i++) { + float real = ana_frame[2 * i]; + float imag = ana_frame[2 * i + 1]; + mag[i] = (float)sqrt(real * real + imag * imag); + ana_phase[i] = (float)atan2(imag, real); + } +#ifdef PV_SINE_TEST + if (pvst_offset == -1000) pvst_offset = -24; + else pvst_offset += ana_hopsize; + // phase of the sine should be (offset / 64) * TWO_PI, + // but when sine phase is 0, atan2(0, -1) is -pi/2 so we have + // atan2_phase = sine_phase - pi/2 + // adding 3 pi/2 instead of subtracting pi/2 to make fmod result positive + double est_atan2_phase = fmod((pvst_offset / 64.0) * TWOPI + (3 * M_PI_2), TWOPI); + if (est_atan2_phase > M_PI) est_atan2_phase -= TWOPI; + printf("offset %ld hop %ld ph[8] %5f est.ph %5f real %5f imag %5f\n", + pvst_offset, ana_hopsize, ana_phase[8], est_atan2_phase, + ana_frame[16], ana_frame[17]); +#endif + /* phase unwrapping & set synthesis phase */ + if (pv->first_time) { + D printf("phasevocoder fftsize %d hopsize %d\n", + fftsize, syn_hopsize); + memcpy(syn_phase, ana_phase, + ((fftsize / 2) + 1) * sizeof(*syn_phase)); + } else if (pv->mode == PV_MODE_PHASEFIX) { + // we'll start each iteration with prev_min_x set to the lowest bin + // that will be assigned to the peak at prev_peak_x. We'll find the + // next_min_x and the following next_peak_x. Update the phases. + // + int prev_peak_x = 0; // index of previous peak + float prev_peak_mag; // magnitude of previous peak + int prev_min_x = 0; // index of previous local minimum + int next_peak_x; // index of peak between prev_min_x and next_min_x + float next_peak_mag; // magnitude of next peak + int next_min_x; // index of next minimum (after peak_x) + float next_min_mag; // magnitude at next_min_x + float last_mag = mag[0]; // used in search for peaks + float this_mag = mag[1]; + float next_mag; + int i; // loop index + // decide if we're starting on a peak or a minimum: + if (last_mag <= this_mag) { // starting on a minimum + // find peak + for (i = 1; i < fftsize / 2; i++) { + next_mag = mag[i + 1]; // invariant: last_mag <= this_mag + if (this_mag > next_mag) { // found peak + prev_peak_x = i; + prev_peak_mag = this_mag; + break; // invariant: this_mag > next_mag + } + // invariant: this_mag <= next_mag + last_mag = this_mag; + this_mag = next_mag; // invariant: last_mag <= this_mag + } + if (i >= fftsize / 2) { + prev_peak_x = i; + } + } else { // set up to start loop + next_mag = this_mag; // invariant: last_mag > this_mag + this_mag = last_mag; // invariant: this_mag > next_mag + prev_peak_mag = last_mag; + } + while (prev_min_x <= fftsize / 2) { + // invariant: prev_min_x is previous local minimum or 0 + // prev_peak_x is first local peak after prev_min_x (or 0) + // last_mag is mag at prev_peak_x - 1 + // this_mag is mag at prev_peak_x + // next_mag is mag at prev_peak_x + 1 + // find next minimum + // Note: prev_peak_x might be fftsize/2, so i might be fftsize/2 + 1 + for (i = prev_peak_x + 1; i < fftsize / 2; i++) { + last_mag = this_mag; // invariant: this_mag > next_mag + this_mag = next_mag; // invariant: last_mag > this_mag + next_mag = mag[i + 1]; + if (this_mag <= next_mag) { // found minimum + // here, last_mag at i-1, this_mag at i, next_mag at i+1 + // and next_mix_x == i + break; + } // loop invariant: this_mag > next_mag + } + // invariant: this_mag <= next_mag || i == fftsize/2 + if (i >= fftsize / 2) { // special case at end of spectrum + // no minimum found + next_min_x = fftsize / 2 + 1; + } else { + next_min_x = i; // either we found peak or we set i to fftsize/2 + next_min_mag = mag[i]; + } // invariant: this_mag <= next_mag || i == fftsize/2 + // search for second peak; + for (i = next_min_x + 1; i < fftsize / 2; i++) { + last_mag = this_mag; // invariant: this_mag <= next_mag + this_mag = next_mag; // invariant: last_mag <= this_mag + next_mag = mag[i + 1]; + if (this_mag > next_mag) { // found peak + // here, last_mag at i-1, this_mag at i, next_mag at i+1 + // and next_peak_x == i + break; + } // loop invariant: this_mag <= next_mag + } + next_peak_x = i; + + // special case if we're at the end: + if (i >= fftsize / 2) { + if (next_mag < this_mag) { + next_peak_x = fftsize / 2 + 1; + } else { + next_peak_x = fftsize / 2; // this may not be necessary + next_peak_mag = mag[next_peak_x]; + } + } else { + next_peak_mag = mag[i]; + } + // now we have prev_min, prev_peak, next_min, and next_peak. We + // want bins from prev_min to next_min to get the phase shift of + // prev_peak. First decide if next_min gets assigned to prev_peak + // or next_peak. Assign to the closest peak, but break ties by + // picking the largest peak. + if (next_min_x - prev_peak_x < next_peak_x - next_min_x) { + // closer to prev_peak, so include min with it + next_min_x++; + } else if ((next_min_x - prev_peak_x == next_peak_x - next_min_x) && + // equidistant so see if prev_peak_mag > next_peak_mag + (prev_peak_mag > next_peak_mag)) { + next_min_x++; + } + // Now we want to adjust phases of prev_min_x, prev_min_x + 1, + // prev_min_x + 2, ..., next_min_x - 1. + // + // Assign the new phases according to the peak... + // increment between actual phase increment value + // and the phase increment value got when the + // it's the nearest bin frequency. Used + // in phase unwrapping: + int j = prev_peak_x; // just to make more concise notation + + double phase_increment = ana_phase[j] - pre_ana_phase[j] - + bin_freq[j] * ana_hopsize; + // need to get phase_increment between -M_PI and +M_PI. + // Algorithm: add M_PI, get phase_increment between 0 and TWO_PI, + // then subtract M_PI: + phase_increment = fmod(phase_increment + M_PI, TWOPI); + if (phase_increment < 0) + phase_increment += TWOPI; + phase_increment -= M_PI; + // estimated frequency from phase unwrapping + /*DBG + if (j == 8) + printf("phase_increment %g hopsize %d\n", phase_increment, ana_hopsize); + DBG*/ + float estimate_freq = (float) (phase_increment / ana_hopsize + + bin_freq[j]); + // get synthesis phase adjustment + phase_increment = pre_syn_phase[j] + + syn_hopsize * estimate_freq - ana_phase[j]; + /*DBG + if (j == 8) + printf("phase_increment at %d is %g, freq %g ", j, phase_increment, + estimate_freq); + DBG*/ + // update the range of bins: + for (i = prev_min_x; i < next_min_x; i++) { + syn_phase[i] = fmodf((float) (ana_phase[i] + phase_increment), + (float) TWOPI); + } + /*DBG + if (j == 8) printf("syn_phase[8] %g\n", syn_phase[8]); + DBG*/ + // now get ready for the next iteration + prev_min_x = next_min_x; + prev_peak_x = next_peak_x; + prev_peak_mag = next_peak_mag; + + } + } else if (pv->mode == PV_MODE_STANDARD) { + for (i = 0; i <= fftsize / 2; i++) { + // increment between actual phase increment value + // and the phase increment value got when the + // it's the nearest bin frequency. Used + // in phase unwrapping: + double phase_increment = ana_phase[i] - pre_ana_phase[i] - + bin_freq[i] * ana_hopsize; + // need to get phase_increment between -M_PI and +M_PI. + // Algorithm: add M_PI, get phase_increment between 0 and TWO_PI, + // then subtract M_PI: + phase_increment = fmod(phase_increment + M_PI, TWOPI); + if (phase_increment < 0) + phase_increment += TWOPI; + phase_increment -= M_PI; + + // estimated frequency from phase unwrapping + float estimate_freq = (float) (phase_increment / ana_hopsize + + bin_freq[i]); + // set synthesis phase + syn_phase[i] = fmodf((float) (pre_syn_phase[i] + + syn_hopsize * estimate_freq), + (float) TWOPI); + } + } else if (pv->mode == PV_MODE_ROBOVOICE) { + ; // syn_phase[] is unmodified, i.e. constant + } else { + assert(FALSE); // bad mode value + } + for (i = 0; i < fftsize / 2; i++) { + // record phases + pre_ana_phase[i] = ana_phase[i]; + pre_syn_phase[i] = syn_phase[i]; + + // update realpart and imagpart + syn_frame[i * 2] = (float) (mag[i] * cos(syn_phase[i])); + syn_frame[i * 2 + 1] = (float) (mag[i] * sin(syn_phase[i])); + } + pre_ana_phase[i] = ana_phase[i]; + pre_syn_phase[i] = syn_phase[i]; + // update realpart and imagpart + syn_frame[1] = (float) (mag[i] * cos(syn_phase[i])); + // inverse FFT + riffts(syn_frame, log2_fft, 1); + + // fftshift + OneDimensionFFTshift(syn_frame, fftsize); +#endif // SKIP_PHASE_ADJUST + D printf(" mid syn_frame->%g\n", syn_frame[pv->fftsize / 2]); + //D printf(" frame offset %ld\n", frame_next - pv->output_buffer); + // window the frame and then add it to the output buffer + // assume here that there is room to add in syn_frame + D printf(" add to frame_next: %ld\n", + (long) (pv->frame_next - pv->output_buffer)); + /*DBG + float tmp_frame[4096]; + for (int i = 0; i < fftsize; i++) { + tmp_frame[i] = syn_win[i] * syn_frame[i]; + } + write_pv_frame(zeros, syn_win, fftsize, "pvsyn"); + write_pv_frame(zeros, syn_frame, fftsize, "pvsyn"); + write_pv_frame(zeros, tmp_frame, fftsize, "pvsyn"); + DBG*/ + int sum_count = fftsize - syn_hopsize; + for (i = 0; i < sum_count; i++) { + frame_next[i] += syn_win[i] * syn_frame[i]; + } + for (/* continue from i */; i < fftsize; i++) { + DD assert(frame_next[i] == 0); + frame_next[i] = syn_win[i] * syn_frame[i]; + } + frame_next += syn_hopsize; + pv->frame_next = frame_next; +} + + +void update_position_queue(PV *pv, float *ana_center) +{ + int fftsize = pv->fftsize; + float *frame_next = pv->frame_next; + int syn_hopsize = pv->syn_hopsize; + float *out_next = pv->out_next; + + // put the positions of the processed frame into the queue + if (pv->first_time) { + // insert a special starting correspondence: + pv->pos_buffer_rear->ana_pos = lroundf(-pv->ratio * fftsize / 2); + pv->pos_buffer_rear->syn_pos = 0; + pv->pos_buffer_rear++; + } + // center of analysis window was at ana_center + // input_total corresponds to input_rear + pv->pos_buffer_rear->ana_pos = + pv->input_total - (long) (pv->input_rear - ana_center); + // output window center was at frame_next - syn_hopsize + fftsize / 2 + // output_total corresponds to out_next + pv->pos_buffer_rear->syn_pos = pv->output_total + + (long) ((frame_next - syn_hopsize + (fftsize / 2)) - out_next); + + // set pos_buffer_rear to new last element in the queue + pv->pos_buffer_rear++; + if (pv->pos_buffer_rear == pv->pos_buffer + pv->queue_length) { + pv->pos_buffer_rear = pv->pos_buffer; // wrap + } + // if queue is too full, remove first element at pos_buffer_head + if (pv->pos_buffer_head == pv->pos_buffer_rear) { + pv->pos_buffer_head++; + if (pv->pos_buffer_head == pv->pos_buffer + pv->queue_length) { + pv->pos_buffer_head = pv->pos_buffer; // wrap + } + } +} + + +float *finish_output(PV *pv) +{ + assert(pv->frame_next - pv->out_next >= pv->blocksize); + pv->phase = PV_START; + // remember the current output: + float *block = pv->out_next; + // update out_next where next output block will be computed + pv->out_next = block + pv->blocksize; + pv->output_total += pv->blocksize; + D printf(" return offset %ld = %g\n", + (long) (pv->out_next - pv->output_buffer), *(pv->out_next)); + /* DEBUG: To produce a 32767-sample-long sawtooth from 0 to 1 (roughly) + * as output, uncomment the following loop. This might be the first step + * in debugging. If you do not get a smoothly increasing ramp for 32K + * samples, then you are not handling the output of cmupv properly. + */ + /* for (int i = 0; i < pv->blocksize; i++) { + * block[i] = ((pv->output_total - pv->blocksize + i) % 32767) / 32768.0; + * } + */ + return block; +} + + +float *pv_get_output(Phase_vocoder x) +{ + PV *pv = (PV *)x; + assert(pv->phase == PV_GOT_INPUT); + + long blocksize = pv->blocksize; + int fftsize = pv->fftsize; + int frames_to_compute = pv->frames_to_compute; + int syn_hopsize = pv->syn_hopsize; + float *ana_win = pv->ana_win; + float ratio = pv->ratio; + float *input_head = pv->input_head; + float *out_next = pv->out_next; + float *ana_frame = pv->ana_frame; + float *ana_center; + + int ana_hopsize = lroundf(syn_hopsize * ratio); + if (ana_hopsize > pv->max_ana_hopsize) { + ana_hopsize = pv->max_ana_hopsize; + } + + // compute frames and add them to the output_buffer until there + // are blocksize samples ready to deliver + D printf("pv_get_output: frames_to_compute %d\n", frames_to_compute); + int frame; + for (frame = 0; frame < frames_to_compute; frame++) { + assert(pv->frame_next - out_next < blocksize); + int i; + for (i = 0; i < fftsize; i++) // get and window the buffer + ana_frame[i] = input_head[i] * ana_win[i]; + ana_center = input_head + fftsize / 2; + D printf(" mid ana_frame->%g at %ld\n", *ana_center, + (long) (ana_center - pv->input_buffer)); + if (frame < frames_to_compute - 1) { + input_head += ana_hopsize; // get ready for next iteration, + // but on the last iteration, we do not add hopsize because + // ratio might change. ana_hopsize is added in get_input_count() + // to set up the next analysis frame location + } else { + // on the last iteration, update pv->input_head. + // Equivalently, after the for loop we could do + // if (frames_to_compute > 0) + // pv->input_head += ana_hopsize * (frames_to_compute - 1); + pv->input_head = input_head; + } + compute_one_frame(pv, ana_hopsize); + update_position_queue(pv, ana_center); + // first_time is not reset in update_position_queue where it is tested + // because it is also used in pv_get_output2, which does not call + // update_position_queue() + pv->first_time = FALSE; + } + return finish_output(pv); +} + + +float *pv_get_output2(Phase_vocoder x) +{ + PV *pv = (PV *)x; + assert(pv->phase == PV_START); + + long blocksize = pv->blocksize; + int fftsize = pv->fftsize; + float *out_next = pv->out_next; + float *output_buffer = pv->output_buffer; + float *ana_frame = pv->ana_frame; + float *ana_win = pv->ana_win; + long output_buffer_len = pv->output_buffer_len; + + D printf("pv_get_output2: blocksize %ld frame_next %ld " + "out_next %ld buffer offset %ld\n", + blocksize, (long) (pv->frame_next - output_buffer), + (long) (out_next - output_buffer), + (long) (pv->output_total - (out_next - output_buffer))); + + // To produce blocksize, how many samples do we need? The next + // sample to output is at out_next, and the next frame will be + // addded at frame_next, so we've already computed + // out_next - frame_next: + while (blocksize > (pv->frame_next - out_next)) { + long out_cnt = (long) (pv->output_total + (pv->frame_next - out_next) + + fftsize / 2); + // if there's no room in the output buffer, shift the samples. + // This is done here to avoid extra work (sometimes pv_get_output2 + // can be called and the samples are already in the buffer so there's + // no need to shift. + if (pv->frame_next + fftsize > output_buffer + output_buffer_len) { + long shift = (long) (out_next - output_buffer); + D printf("shift output by %ld\n", shift); + memmove(output_buffer, out_next, + (output_buffer_len - shift) * sizeof(*output_buffer)); + /* for debugging, fill the end with zero. When we write (rather + than add) to the buffer, assert that we're over-writing zeros */ + DD ZERO(output_buffer + output_buffer_len - shift, shift); + out_next = output_buffer; + pv->out_next = output_buffer; + pv->frame_next -= shift; + } + int ana_hopsize = (*pv->callback)(out_cnt, ana_frame, + fftsize, pv->rock); + /* DEBUG - To check input, the following commented code will + write each analysis frame as a file. The analysis frame is + prefixed with zeros so that it will be placed at the right + time, but this generates N^2 samples, so only the first 20 + frames are written */ + /*DBG + write_pv_frame(out_cnt - fftsize / 2, ana_frame, fftsize, "pvana"); + DBG*/ + int i; + for (i = 0; i < fftsize; i++) ana_frame[i] *= ana_win[i]; + compute_one_frame(pv, ana_hopsize); + pv->first_time = FALSE; + D printf("pv_get_output2: blocksize %ld frame_next %ld " + "out_next %ld buffer offset %ld\n", + blocksize, (long) (pv->frame_next - output_buffer), + (long) (out_next - output_buffer), + (long) (pv->output_total - (out_next - output_buffer))); + } + D printf("pv_get_output2 returning at offset %ld abs %ld\n", + (long) (pv->out_next - pv->output_buffer), pv->output_total); + /*DBG out_next position is output_total, so if we subtract out_next - output_buffer, + we get the absolute position of the output_buffer + write_pv_frame(pv->output_total - (pv->out_next - pv->output_buffer), + pv->output_buffer, pv->output_buffer_len, "pvbuf"); + DBG*/ + return finish_output(pv); +} diff --git a/lib-src/libnyquist/nyquist/cmupv/src/cmupv.h b/lib-src/libnyquist/nyquist/cmupv/src/cmupv.h new file mode 100644 index 000000000..a67f66258 --- /dev/null +++ b/lib-src/libnyquist/nyquist/cmupv/src/cmupv.h @@ -0,0 +1,164 @@ +// +// cmupv.h +// + +#ifndef __cmupv_h__ +#define __cmupv_h__ + +#ifdef __cplusplus +extern "C" +{ +#endif + +// Protocol: The following is the order of calls with +// optional calls [like_this], and {iteration in braces} +// pv_create +// optional: pv_set_blocksize +// optional: pv_set_fftsize +// pv_initialize +// repeat: { +// optional: [pv_get_effective_pos] +// optional: [pv_set_ratio] +// pv_get_input_count +// pv_put_input (size must match result of pv_get_input_count) +// pv_get_output } +// pv_end +// +// Alternatively, you can use the "absolute" interface, where the +// Phase_vocoder object tells you when it needs a frame of audio +// and what output time corresponds to the center of the frame. +// The "client" then returns the frame of samples and tells the +// Phase_vocoder what hopsize was used. Of course, these callbacks +// will demand samples for "future" output, so the client must know +// how to find input samples for near-future points in the output: +// pv_create2 +// optional: pv_set_blocksize +// optional: pv_set_fftsize +// pv_initialize +// optional: pv_set_callback +// repeat: { +// pv_get_output2 } ... during these calls, callback will get samples + + +typedef void *Phase_vocoder; // a phase vocoder +// the callback is given a sample count which is the output sample number +// that will be at the center of the window. The callee should fill +// samples with len samples from the input such that the center of the +// len samples should be output at the given output sample count. +// the hopsize should be returned. +// Return value is the hopsize used (hop from previous input frame) +typedef int (*Pv_callback)(long out_count, float *samples, int len, + void *rock); + +// create a phase vocoder. Pass in function pointers to allocate +// and deallocate memory, or NULL to use default malloc() +// and free(): +Phase_vocoder pv_create(void *(*mallocfn)(size_t), void (*freefn)(void *)); + +// Use pv_create2() to use the "absolute" interface and pv_set_callback(). +// As with pv_create, pass in custom malloc and free functions or NULL to +// use default malloc() and free(). The callback is called to get input +// frames, and the "rock" context parameter will be passed to the callback: +Phase_vocoder pv_create2(void *(*mallocfn)(size_t), void (*freefn)(void *), + Pv_callback callback, void *rock); + +void pv_set_callback(Phase_vocoder x, Pv_callback callback, void *rock); + +// call at the end of the program to free the memory +void pv_end(Phase_vocoder *x); + +// set output block size (call before computing first samples) +// must be a power of 2 +// Blocksize should be set beforehand and can't be changed +// when program begins +void pv_set_blocksize(Phase_vocoder x, int n); + +// set FFT size, n must be power of 2 (call before computing +// first samples) +// FFT size should be set beforehand and can't be changed +// when program begins +void pv_set_fftsize(Phase_vocoder x, int n); + +int round_log(int fftsize); //calculate the round num of log2_fft + +// set ratio (call before pv_get_input_count()) +// ratio is the stretch factor: >1 means output sound will be +// longer than input sound +void pv_set_ratio(Phase_vocoder x, float ratio); + +// set synthesis hopsize, 'n' must be power of 2. +// It's better to restrict syn_hopsize can only be +// fftsize/2, fftsize/4, fftsize/8, fftsize/16. +// Synthesis hopsize should be set beforehand and can't be +// changed when program begins +void pv_set_syn_hopsize(Phase_vocoder x, int n); + +// initialize: allocate space for window, set default window, +// allocate space for other parameters +// Note that the user won't have the default window and don't +// need to free it. +void pv_initialize(Phase_vocoder x); + +// set analysis window (if it's not called, default is Hanning window) +// A copy of the window is made and managed by the Phase_vocoder, this +// copy will be freed by the phase vocoder( by calling pv_end() ) +// this function can be called many times, but only the newest window set +// by the user is valid. +void pv_set_ana_window(Phase_vocoder x, float *window); + +// set synthesis window, same requirement as "pv_set_ana_window" +void pv_set_syn_window(Phase_vocoder x, float *window); + +#define PV_MODE_STANDARD 0 +#define PV_MODE_PHASEFIX 1 +#define PV_MODE_ROBOVOICE 2 + +// set mode: to allow for variations in the algorithm +// mode values are: PV_MODE_STANDARD -- standard phase vocoder +// PV_MODE_PHASEFIX -- preserves the phase relationship between bins +// and their nearest peak. This is an experimental mode that may +// be refined in the future. It is inspired by Jean Laroche and +// Mark Dolson, "Phase-Vocoder: About this phasiness business" +// PV_MODE_ROBOVOICE, fixes the phases to create a deliberate +// modulation, generating a pitch due to the hopsize period. +// Thanks to M.C.Sharma for this idea. +// illegal mode values are ignored +void pv_set_mode(Phase_vocoder x, int mode); + +// allocate a window and initialize it with the window_type function +// (see hann() and hamm() as example window_type functions) +// caller becomes the owner of the result, so the owner should free +// the window eventually. +// Note the output is after normalized to make sure the sum of w1*w2 +// equals to 1 rather than other constants. +float *pv_window(Phase_vocoder x, float (*window_type)(double x)); + +// inquire how many samples needed to compute next output +int pv_get_input_count(Phase_vocoder x); + +// get effective position of the next output sample measured in +// samples of input. This may depend on the ratio (pv_set_ratio()). +// Initially, the center of the analysis window will be input 0, +// which will appear at output fftsize/2. There are ratio input +// samples per output sample, so the input sample number corresponding +// to output 0 is -ratio * fftsize / 2. +double pv_get_effective_pos(Phase_vocoder x); + +// send input samples - writes input to be stretched into +// the phase vocoder. Phase vocoder saves as much of the +// signal as needed so that input is always sequential. +// note the "size" should be the output of pv_get_input_count(). +void pv_put_input(Phase_vocoder x, int size, float *samples); + +// get output samples: returns a pointer to n samples, where +// n is the value from pv_set_block_size(). Pointer should +// not be freed by the user. +float *pv_get_output(Phase_vocoder x); +// use pv_get_output2() if you used pv_create2(): +float *pv_get_output2(Phase_vocoder x); + +#ifdef __cplusplus +} +#endif + +#endif /* defined(__cmupv_h__) */ diff --git a/lib-src/libnyquist/nyquist/cmupv/src/cmupvdbg.c b/lib-src/libnyquist/nyquist/cmupv/src/cmupvdbg.c new file mode 100644 index 000000000..5c3fe65d9 --- /dev/null +++ b/lib-src/libnyquist/nyquist/cmupv/src/cmupvdbg.c @@ -0,0 +1,69 @@ +/* cmupvdbg.c -- debug support for cmupv + * Roger B. Dannenberg + * Nov 2015 + */ + +/* To see what data is going into the phase vocoder, write_pv_frame + * saves frames or buffers to disk as audio. + * If something is wrong with your applications delivery of analysis frames, + * probably most frames are wrong, so you should write, say, 10 initial + * files and look at them. Set MAX_FILES to 10 and MAX_SAVE to 10. Then, + * after running your program, use Audacity to open the 10 frames saved + * to disk: use the Files:Import:Audio command to load all 10 in one + * command as different tracks. You should see 10 tracks, each containing + * one frame, shifted to the correct source time location. (If not, then + * find and fix the bug.) + * + * This function is also useful for printing other data, e.g. synthesis frames + * or output buffers. To get the most out of this, you should set the first + * parameter, zeros, to the absolute sample offset, e.g. if you want to view + * synthesis frames that are spaced with a 64 sample hop size, you should set + * zeros to 0, 64, 128, 192, ... so that when you view the files in Audacity, + * the samples will line up in time with each other and with the output file. + * + * You can display multiple frame types and lengths, so the prefix parameter + * allows you to effectively label the data. This prefix is used to construct + * the file name. + * + * MAX_FILES tells how many files to write. Because of padding, file writing + * is an N-squared operation, so only write initial frames if possible, but + * use a large number if you need to see everything. + * + * MAX_SAVE tells how many file *names* to use. Files are numbered, so we'll + * simply take the number modulo MAX_SAVE so that file names are reused and + * old files are overwritten. Use a large value of MAX_SAVE to save everything. + * Use 1 to get only the last frame. + */ + +#include "stdio.h" +#include "sndfile.h" + + +#define MAX_FILES 10 +#define MAX_SAVE 10 +static int file_no = 0; + + +void write_pv_frame(long zeros, float *frame, long fftsize, char *prefix) +{ + float z = 0; + char path[128]; + SF_INFO sfinfo; + SNDFILE *sf; + if (file_no >= MAX_FILES) return; + sprintf(path, "%s-%02d.wav", prefix, (file_no % MAX_SAVE) + 1); + sfinfo.frames = 0; + sfinfo.samplerate = 44100; + sfinfo.channels = 1; + sfinfo.format = SF_FORMAT_WAV | SF_FORMAT_PCM_16; + sfinfo.sections = 0; + sfinfo.seekable = 0; + sf = sf_open(path, SFM_WRITE, &sfinfo); + for (long i = 0; i < zeros; i++) { + sf_writef_float(sf, &z, 1); + } + sf_writef_float(sf, frame, fftsize); + sf_close(sf); + printf("wrote %s, %ld zeros\n", path, zeros); + file_no++; +} diff --git a/lib-src/libnyquist/nyquist/cmupv/src/cmupvdbg.h b/lib-src/libnyquist/nyquist/cmupv/src/cmupvdbg.h new file mode 100644 index 000000000..1c07d4c07 --- /dev/null +++ b/lib-src/libnyquist/nyquist/cmupv/src/cmupvdbg.h @@ -0,0 +1,12 @@ +/* cmupvdbg.h -- cmupv debug function declarations */ + +/* Normally, this function(s) are not called and the corresponding + * cmupvdbg.c need not be linked + */ + +/* See comments in cmupvdbg.c and definitions of MAX_FILES and MAX_SAVE */ + +/* write_pv_frame -- saves an array of floats as a sound file so you + * can see what is going into the phase vocoder + */ +void write_pv_frame(long zeros, float *ana_frame, long fftsize, char *prefix); diff --git a/lib-src/libnyquist/nyquist/cmupv/src/internal.c b/lib-src/libnyquist/nyquist/cmupv/src/internal.c new file mode 100644 index 000000000..eff65f747 --- /dev/null +++ b/lib-src/libnyquist/nyquist/cmupv/src/internal.c @@ -0,0 +1,31 @@ +// +// internal.c +// +// +// +// + +#define _USE_MATH_DEFINES +#include "internal.h" + +float hann(double x) +{ + return (float) (0.5 * (1.0 - cos(2 * M_PI * x))); +} + +float hamm(double x) +{ + return (float) (0.54 - 0.46 * cos(2 * M_PI * x)); +} + +void OneDimensionFFTshift(float vector[], int VectorLength) +{ + float temp; + int i; + for (i = 0; i <= VectorLength / 2 - 1; i++) + { + temp = vector[i]; + vector[i] = vector[VectorLength / 2 + i]; + vector[VectorLength / 2 + i] = temp; + } +} diff --git a/lib-src/libnyquist/nyquist/cmupv/src/internal.h b/lib-src/libnyquist/nyquist/cmupv/src/internal.h new file mode 100644 index 000000000..7695c080a --- /dev/null +++ b/lib-src/libnyquist/nyquist/cmupv/src/internal.h @@ -0,0 +1,30 @@ +// +// internal.h +// +// +// +// + +#ifndef _internal_h +#define _internal_h + +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES 1 +#endif +#include + +// hanning window +// 'x' is the independent variable of the window function. +// 'x' is actually i/window_length, it's from 0 to 1-(1/window_length) +float hann(double x); + +// hamming window +// 'x' is the independent variable of the window function. +// 'x' is actually i/window_length, it's from 0 to 1-(1/window_length) +float hamm(double x); + +// FFT shift +// VectorLength must be an even integer +void OneDimensionFFTshift(float vector[],int VectorLength); + +#endif