/* cmupv.c -- phase vocoder */ /* Computation is driven by demands for output. The client calls either pv_get_output() or pv_get_output2(). Either way, output is returned one blocksize at a time. The blocksize is set by pv_set_blocksize() and defaults to the synthesis hopsize, which defaults to fftsize/8. Since the blocksize and hopsize are not necessarily matched in any way, there is a buffer to accumulate the overlapping "grains" of sound which we call the synthesis frames (each synthesis frame is computed by adjusting phases of an analysis frame). The buffer is called output_buffer, and the length (in floats) is output_buffer_len. The output_buffer_len has to be big enough to contain blocksize samples which are about to be returned as output plus fftsize samples which overlap with future synthesis frames that will be added later. The output_buffer_len also has to be hopsize + fftsize samples (probably these worst-case sizes are conservative, but it's easier to set workable upper bounds than to think about all the special cases when output_buffer_len, blocksize, hopsize, and fftsize are all arbitrary). The basic structure to produce blocksize samples is as follows: out_next is the pointer to the next block of blocksize samples. This is a pointer directly into output_buffer, initially the first sample in output_buffer. We also keep a pointer frame_next, which is where the next synthesis frame will be added into output_buffer. Thus, the number of samples that have been completely computed but not output (i.e. no more overlapping frames will be added) is frame_next - out_next. When (frame_next - out_next) > blocksize, we can deliver blocksize samples to the caller. The next constraint is that as we add overlapping synthesis frames into output_buffer, we have to have room for them, so frame_next + fftsize must not be greater than output_buffer + output_buffer_len. If this constraint is not met, we need to shift everything toward the beginning of the output_buffer. Since we've output everything up to out_next, we shift by out_next - output_buffer. Because output_buffer_len >= blocksize + fftsize, we are guaranteed that there is always room to add in any synthesis frames that overlap the blocksize samples to be returned next. PHASE COHERENCE To preserve phase relationships when a sine spills over multiple bins (which happens always due to windowing), we adjust phase based on peaks in the magnitude spectrum. The phases of neighboring bins are adjusted by the same angle. Specification: We want to divide the spectrum into peaks: between local minima, adjust according to the peak. Algorithm: Set "previous" minimum to -1 and previous magnitude to 0 - search for the "previous" peak - iteratively do the following: - search for the next minimum - search for the next peak - compute range of bins to modify, assign minimum to the largest peak - compute the phase adjustment for the next peak - apply the phase adjustment to range of bins - set previous minumum to next minimum - set previous peak to next peak */ /* BEGIN PV_SINE_TEST debugging code... This debugging code is for testing a special case: the input is a sine tone with amplitude about 1.0 and frequency about 689Hz at 44100Hz sample rate, resulting in a period of exactly 64 samples, which will be bin 8 with an fftsize of 512. The goal here is to compute exactly what the phasevocoder should be doing so we can compare to what it does. When SINETEST is enabled, data will be printed. At the time this code is being written, the result with a stretch factor of exactly 1.1 is a pulsing sound when the absolute interface is used. With the absolute interface, the analysis hopsize is mostly 58, with a hop of 59 samples every 5 or 6 frames. */ // #define PV_SINE_TEST 1 #ifdef PV_SINE_TEST double pvst_frequency = 44100.0 / 64.0; // about 689Hz long pvst_offset = -1000; // -1 means we haven't seen first frame yet // the first hop size is bogus, so we ignore it, then accumulate hop sizes here #endif /* END PV_SINE_TEST */ #include #include #include #include #include "internal.h" // only needed for some debugging code which is probably commented out #include "cmupvdbg.h" // define PHASE_FIX to tie neighboring bin phase to phase of spectral peaks // #define PHASE_FIX 1 // debugging output on or off // #define D if (1) #define D if (0) // more debugging #define DD if (0) #define LOGFFTSIZE_DEFAULT 11 #define RATIO_DEFAULT 1 #define BOOL int #define FALSE 0 #define TRUE 1 #define TWOPI (2.0 * M_PI) // These macros are used for memory allocation and freeing. Note that // only ZERO acts like a function. The rest only work on fields of pv. #define PVFREE(field) if (pv->field) { pv->free(pv->field); pv->field = NULL; } #define PVALLOC(field, size) \ pv->field = (float *)pv->malloc((size) * sizeof(*(pv->field))) #define PVREALLOC(field, size) { PVFREE(field); PVALLOC(field, size); } #define ZERO(array, size) memset((array), 0, (size) * sizeof(*(array))) #include "cmupv.h" #include "fftext.h" typedef enum { PV_UNINITIALIZED, PV_START, PV_GOT_COUNT, PV_GOT_INPUT } pv_phase_type; struct position // each element in the structure array { long ana_pos; // the sample number of the center of analysis frame long syn_pos; // the sample number of the center of synthesis frame }; typedef struct { void *(*malloc)(size_t); // malloc is used to allocate memory. If you // have a real-time system and want to avoid the standard library // malloc() which may have priority inversion problems due to locks, // you can supply your own lock-free implementation void (*free)(void *); // if you provide a custom malloc, you should // provide a matching custom free() int blocksize; // the size of audio blocks produced by the phase vocoder. int fftsize; // the number of samples in each FFT. Should be power of 2. int log2_fft; // 2 is the log base int syn_hopsize; // the hopsize used in reconstructing the output float ratio; // the time-stretch ratio. NOTE: even though ratio is // specified as the amount input is stretched, THIS ratio is // the reciprocal, i.e. input duration / output duration int max_ana_hopsize; // fftsize / 3 is max hopsize float pre_ratio; // previous ratio is used to calculate // the previous input_length int mode; // 0 - normal, 1 - phase fix, 2 - robovoice float *ana_win; // the window function used on input (analysis) float *syn_win; // the window function used on output (synthesis) long input_eff_pos; // input effective position is the sample number // of the input that corresponds to the current output float *input_buffer; // used to buffer input samples long input_buffer_len; // how many floats can input_buffer hold? float *output_buffer; // used to buffer output samples long output_buffer_len; // how big in floats is the output buffer? float *input_head; // pointer to the start of the previous // analysis frame. input_head is updated by // hopsize before reading each frame. float *input_rear; // pointer to the end of the input data int frames_to_compute; // how many frames we'll compute in pv_get_output() int expected_input; // the value computed by pv_get_input_count() // we check that pv_put_input delivers what we asked for float *out_next; // pointer into the output buffer from where the // next output sample will delivered float *frame_next; // pointer into the output buffer to where the // next frame will by added Pv_callback callback; // function to retrieve an input frame void *rock; // object pointer or context info to be passed to callback pv_phase_type phase; BOOL first_time; // true only on the first fft output frame BOOL absolute; // true if using the callback protocol -- set by create2() float *ana_frame; // analysis frame float *syn_frame; // synthesis frame float *mag; // magnitude for points in the frame being processed float *ana_phase; // phase for points in the analysis frame // being processed float *syn_phase; // phase for points in the synthesis frame // being processed float *pre_ana_phase; // recording last analysis phase for estimating // the frequency float *pre_syn_phase; // recording last systhesis phase for rebuilting // the new phase float *bin_freq; // bin frequency, used in phase unwrapping; struct position *pos_buffer; // Circular array storing the sample // number of the middle of the frames // (both for analysis and synthesis frames) struct position *pos_buffer_head; // beginning of the circular array, // points to oldest entry. If equal to pos_buffer_rear, the queue // is empty. Never points to // pos_buffer + queue_length. It wraps around to pos_buffer. struct position *pos_buffer_rear; // rear of the circular array, // points to slot AFTER the most recent entry. If equal to // pos_buffer_rear, the queue is empty. Never points to // pos_buffer + queue_length. It wraps around to pos_buffer. long queue_length; // length of the circular queue of corresponding times long input_total; // how many input samples did we get so far? // initially 0, and incremented upon pv_put_input() // so the input_total count corresponds to the input_rear pointer. long output_total; // how many output samples did we produce so far? // initially 0, and incremented by block_size after each call to // pv_get_output(). Corresponds to out_next. } PV; //extern long int sig; //extern long int sig1; //extern long int sig2; // round_log_power - round fftsize up to a power of 2 // return log2(rounded up fftsize) // optionally set size_ptr to rounded up fftsize // int round_log_power(int fftsize, int *size_ptr) { long double log2_fft = log2l(fftsize); int round_log2_fft = (int) log2_fft; if (round_log2_fft < log2_fft) { round_log2_fft += 1; } if (log2_fft > 20 || ((1 << round_log2_fft) != fftsize)) { round_log2_fft = 1024; // on error, substitute a sane value } if (size_ptr) *size_ptr = 1 << round_log2_fft; return round_log2_fft; } Phase_vocoder pv_create(void *(*mallocfn)(size_t), void (*freefn)(void *)) { if (!mallocfn) mallocfn = &malloc; PV *pv = (PV *)mallocfn(sizeof(PV)); ZERO(pv, 1); pv->phase = PV_UNINITIALIZED; pv->malloc = mallocfn; pv->free = freefn; pv_set_fftsize(pv, 1 << LOGFFTSIZE_DEFAULT); // syn_hopsize will now be FFTSIZE_DEFAULT / 8 // pv_set_syn_hopsize(pv, FFTSIZE_DEFAULT / 8); pv->blocksize = pv->syn_hopsize; pv_set_ratio(pv, RATIO_DEFAULT); pv->first_time = TRUE; pv->mode = 0; return (Phase_vocoder)pv; } Phase_vocoder pv_create2(void *(*mallocfn)(size_t), void (*freefn)(void *), Pv_callback callback, void *rock) { PV *pv = (PV *)pv_create(mallocfn, freefn); pv->absolute = TRUE; pv_set_callback(pv, callback, rock); return (Phase_vocoder)pv; } void pv_end(Phase_vocoder *x) { PV *pv = (PV *)(*x); fftFree(); PVFREE(ana_win); PVFREE(syn_win); PVFREE(input_buffer); PVFREE(output_buffer); PVFREE(ana_frame); PVFREE(syn_frame); PVFREE(mag); PVFREE(ana_phase); PVFREE(syn_phase); PVFREE(pre_ana_phase); PVFREE(pre_syn_phase); PVFREE(bin_freq); PVFREE(pos_buffer); pv->free(pv); *x = NULL; } void pv_set_callback(Phase_vocoder x, Pv_callback callback, void *rock) { PV *pv = (PV *)x; pv->callback = callback; pv->rock = rock; } void pv_set_blocksize(Phase_vocoder x, int n) { PV *pv = (PV *)x; pv->blocksize = n; pv->phase = PV_UNINITIALIZED; } void pv_set_fftsize(Phase_vocoder x, int n) { PV *pv = (PV *)x; // n must be power of 2 and for sanity, we'll require it to be at least 16 // power of 2 test: only if n is a power of 2 will n-1 clear the high order // bit: if ((n & (n - 1)) || (n < 16)) { return; // ignore bad argument } // preserve the same syn_hopsize ratio, e.g. if syn_hopsize // is fftsize/8, then after setting fftsize, new syn_hopsize // will be (new fftsize)/8 int hop = (pv->syn_hopsize == 0 ? 8 : pv->fftsize / pv->syn_hopsize); // the divisor pv->fftsize = n; pv->log2_fft = round_log_power(n, &(pv->fftsize)); pv_set_syn_hopsize(x, n / hop); pv->phase = PV_UNINITIALIZED; pv->max_ana_hopsize = n / 3; } void pv_set_ratio(Phase_vocoder x, float ratio) { PV *pv = (PV *)x; assert(pv->phase == PV_START || pv->phase == PV_UNINITIALIZED); pv->pre_ratio = pv->ratio; pv->ratio = 1.0F / ratio; } void pv_set_syn_hopsize(Phase_vocoder x, int n) // set the hopsize. Must be fftsize divided by a power of 2. // non-power-of-two n will be rounded up. // hopsize must be at least 1 and at most fftsize/4. // out-of-bound n will be put within bounds { PV *pv = (PV *)x; if (n < 1) n = 1; round_log_power(n, &(pv->syn_hopsize)); if (pv->syn_hopsize > pv->fftsize / 4) { pv->syn_hopsize = pv->fftsize / 4; } pv->phase = PV_UNINITIALIZED; } void pv_initialize(Phase_vocoder x) { PV *pv = (PV *)x; // allocate space and initialize for window if (! pv->ana_win) pv->ana_win = pv_window(pv, hann); //default analysis window is Hanning if (! pv->syn_win) pv->syn_win = pv_window(pv, hann); //default synthesis window is Hanning // allocate space and initialize for input buffer and output buffer if (pv->blocksize <= pv->syn_hopsize) { pv->input_buffer_len = pv->fftsize; } else { // The maximum of ana_hopsize is fftsize/3, so the // pv->input_buffer_len is set to the maximum so as to avoid // freeing and allocating memory for input buffer many times // due to the changing of time-stretching ratio. // The maximum amount needed is fftsize to produce one frame of // output plus: fftsize/3 for each additional frame. The total // number of frames we must compute to form blocksize samples // of output is blocksize / hopsize. pv->input_buffer_len = pv->fftsize + 2 /* to avoid rounding error */ + lroundf((((float) pv->blocksize / pv->syn_hopsize) - 1) * (pv->fftsize / 3.0F)); } if (! pv->absolute) { PVREALLOC(input_buffer, pv->input_buffer_len); // preload with fftsize/2 zeros so that the first sample of the input // will be at the center of the fft window pv->input_head = pv->input_buffer; ZERO(pv->input_buffer, pv->fftsize / 2); pv->input_rear = pv->input_buffer + pv->fftsize / 2; } // how long does the output buffer need to be? // It has to be long enough to add in an entire fft frame, so // at least fftsize. It should then be at least syn_hopsize - 2 // bigger, so that if we're outputting syn_hopsize - 1 samples // (a really bad choice, by the way), we could have syn_hopsize - 2 // samples in the output buffer and the next buffer gets added // starting at location syn_hopsize - 2, so we need syn_hopsize - 2 + // fftsize. Let's make it an even syn_hopsize + fftsize. // It also has to be long enough to hold an output buffer length + // fftsize. // So overall, we need max(syn_hopsize, blocksize) + fftsize. // pv->output_buffer_len = pv->blocksize; if (pv->blocksize <= pv->syn_hopsize) { pv->output_buffer_len = pv->syn_hopsize; } pv->output_buffer_len += pv->fftsize; PVREALLOC(output_buffer, pv->output_buffer_len); D printf("pv_initialize: input_buffer_len %ld\n", pv->input_buffer_len); D printf(" output_buffer_len %ld\n", pv->output_buffer_len); D printf(" blocksize %d\n", pv->blocksize); D printf(" fftsize %d\n", pv->fftsize); D printf(" syn_hopsize %d\n", pv->syn_hopsize); pv->out_next = pv->output_buffer; pv->frame_next = pv->output_buffer; ZERO(pv->output_buffer, pv->output_buffer_len); PVREALLOC(ana_frame, pv->fftsize); PVREALLOC(syn_frame, pv->fftsize); PVREALLOC(mag, pv->fftsize / 2 + 1); // allocate space for phase and pre_phase which will be used in // phase unwrapping PVREALLOC(ana_phase, pv->fftsize / 2 + 1); PVREALLOC(syn_phase, pv->fftsize / 2 + 1); PVREALLOC(pre_ana_phase, pv->fftsize / 2 + 1); PVREALLOC(pre_syn_phase, pv->fftsize / 2 + 1); // bin frequency, used in phase unwrapping PVREALLOC(bin_freq, pv->fftsize / 2 + 1); int i; for (i = 0; i <= pv->fftsize / 2; i++) pv->bin_freq[i] = (float) (TWOPI * i / pv->fftsize); // get_effective_pos() maps from the beginning of the // next block to the corresponding input sample. Since // the output hopsize is fixed, the next output sample // was at the center of the frame if we go back by // framesize / syn_hopsize frames. Thus we need // framesize / syn_hopsize entries in the queue. We'll // pad by a couple to deal with rounding issues: pv->queue_length = pv->fftsize / (pv->syn_hopsize * 2) + 2; if (!pv->absolute) { PVFREE(pos_buffer); pv->pos_buffer = (struct position *) (pv->malloc((pv->queue_length + 1) * sizeof(struct position))); pv->pos_buffer_head = pv->pos_buffer; pv->pos_buffer_rear = pv->pos_buffer; } // make sure tables are constructed before we start real-time processing #ifndef NDEBUG int fft_error_sign = #endif fftInit(pv->log2_fft); // target fftInit assert(!fft_error_sign); pv->phase = PV_START; } void pv_set_ana_window(Phase_vocoder x, float *window) { PV *pv = (PV*)x; PVREALLOC(ana_win, pv->fftsize); memcpy(pv->ana_win, window, pv->fftsize * sizeof(float)); } void pv_set_syn_window(Phase_vocoder x, float *window) { PV *pv = (PV*)x; PVREALLOC(syn_win, pv->fftsize); memcpy(pv->syn_win, window, pv->fftsize * sizeof(float)); } void pv_set_mode(Phase_vocoder x, int mode) { PV *pv = (PV*)x; if (mode >= 0 && mode <= 2) { pv->mode = mode; } } float *pv_window(Phase_vocoder x, float (*window_type)(double x)) // window is after normalized { PV *pv = (PV *)x; float sum_window_square = 0, COLA_factor; int window_length = pv->fftsize; float *window = (float *)pv->malloc(window_length * sizeof(float)); int i; for (i = 0; i < window_length; i++) { window[i] = window_type((double)i / window_length); // note that the computation is all double even if window[i] is float sum_window_square += window[i] * window[i]; } COLA_factor = sum_window_square / pv->syn_hopsize; for (i = 0; i <= pv->fftsize - 1; i++) window[i] = (float) (window[i] / sqrt(COLA_factor)); return window; } int pv_get_input_count(Phase_vocoder x) { PV *pv = (PV*)x; int ana_hopsize = lroundf((pv->syn_hopsize) * (pv->ratio)); if (ana_hopsize > pv->max_ana_hopsize) { ana_hopsize = pv->max_ana_hopsize; } assert(pv->phase == PV_START); // To produce blocksize, how many samples do we need? The next // sample to output is at out_next, and the next frame will be // added at frame_next, so we've already computed // out_next - frame_next: int need = pv->blocksize - (int)(pv->frame_next - pv->out_next); // need is now the number of output samples we need // How many fft frames will be required? Round up by adding hopsize-1: int frames = (need + pv->syn_hopsize - 1) / pv->syn_hopsize; if (frames > 0) { // Skip hopsize frames except on the first time, where // we always put first sample in the middle of the frame: if (!pv->first_time) { pv->input_head += ana_hopsize; } // We need framesize + hopsize * (frames - 1) in order to compute // frames overlapping fft frames of size framesize. need = pv->fftsize + ana_hopsize * (frames - 1); // Now how many input samples do we have already? long have = (long) (pv->input_rear - pv->input_head); need -= have; // See if we have room for need samples in the buffer: if (pv->input_rear + need > pv->input_buffer + pv->input_buffer_len) { // not enough room. Shift the input buffer to make space. long shift = (long) (pv->input_head - pv->input_buffer); memmove(pv->input_buffer, pv->input_head, (pv->input_rear - pv->input_head) * sizeof(*(pv->input_buffer))); pv->input_head -= shift; pv->input_rear -= shift; D printf(" after input shift by %ld, head at %ld\n", shift, (long) (pv->input_head - pv->input_buffer)); } // make sure our assumptions are true and we now have space: assert(pv->input_rear + need <= pv->input_buffer + pv->input_buffer_len); // See if we have room in the output_buffer // last sample will be at frame_next + (frames - 1) * hopsize + fftsize float *last_output = pv->frame_next + (frames - 1) * pv->syn_hopsize + pv->fftsize; if (last_output > pv->output_buffer + pv->output_buffer_len) { // not enough room. Shift the output buffer to make space. long shift = (long) (pv->out_next - pv->output_buffer); memmove(pv->output_buffer, pv->out_next, (pv->fftsize - pv->syn_hopsize) * sizeof(*(pv->output_buffer))); pv->frame_next -= shift; pv->out_next -= shift; } } else { frames = 0; need = 0; } pv->frames_to_compute = frames; pv->phase = PV_GOT_COUNT; pv->expected_input = need; return need; } #pragma warning(disable: 4715 4068) // return type and unknown pragma #pragma clang diagnostic ignored "-Wreturn-type" double pv_get_effective_pos(Phase_vocoder x) { PV *pv = (PV*)x; assert(pv->phase == PV_START); // Find the appropriate position struct for the computation of // effective audio position. We are given pv->output_total, the // sample count at which we want the equivalent input sample // count. We want to interpolate between two queue entries, so // we'll search the queue until we have an entry that is greater // than output_total. Then we set head to the previous entry // because it will make future searches go faster. struct position *pos_find = pv->pos_buffer_head; struct position *pos_find_prev = NULL; while (pos_find != pv->pos_buffer_rear && pos_find->syn_pos <= pv->output_total) { pos_find_prev = pos_find; pos_find++; if (pos_find == pv->pos_buffer + pv->queue_length) { pos_find = pv->pos_buffer; // wrap } } // if pos_find and pos_find_prev both point to something, we // can interpolate: if (pos_find != pv->pos_buffer_rear && pos_find_prev) { // we can drop old positions from queue now: pv->pos_buffer_head = pos_find_prev; // interpolate long output_step = pos_find->syn_pos - pos_find_prev->syn_pos; long input_step = pos_find->ana_pos - pos_find_prev->ana_pos; return pos_find_prev->ana_pos + input_step * (double)(pv->output_total - pos_find_prev->syn_pos) / (double)output_step; // if there's nothing in the queue, then we must be starting. // after the first frame there are TWO entries } else if (pos_find_prev == NULL) { // if any of these fail, maybe the queue_length is too small // and we dropped some history too early assert(pos_find == pv->pos_buffer_rear); assert(pv->first_time); assert(pv->output_total == 0); return -(pv->ratio * pv->fftsize / 2.0); } // I can't think of any other case. assert(FALSE); } // Send samples to phase vocoder. size should match the number of // samples computed by get_input_count. // void pv_put_input(Phase_vocoder x, int size, float *samples) // 'samples' points to samples to be sent each time { PV *pv = (PV *)x; assert(pv->phase == PV_GOT_COUNT); // size must agree with the value computed by pv_get_input_count: assert(pv->expected_input == size); D printf("pv_put_input: size %d, %g at %ld\n", size, *samples, (long) (pv->input_rear - pv->input_buffer)); if (size > 0) { memcpy(pv->input_rear, samples, size * sizeof(*pv->input_rear)); pv->input_rear += size; pv->input_total += size; } pv->phase = PV_GOT_INPUT; } void compute_one_frame(PV *pv, int ana_hopsize) { float *syn_frame = pv->syn_frame; float *ana_frame = pv->ana_frame; int fftsize = pv->fftsize; int log2_fft = pv->log2_fft; float *mag = pv->mag; float *ana_phase = pv->ana_phase; float *syn_phase = pv->syn_phase; float *syn_win = pv->syn_win; float *frame_next = pv->frame_next; int syn_hopsize = pv->syn_hopsize; float *pre_ana_phase = pv->pre_ana_phase; float *pre_syn_phase = pv->pre_syn_phase; float *bin_freq = pv->bin_freq; int i; //#define SKIP_PHASE_ADJUST #ifdef SKIP_PHASE_ADJUST // for debugging we just copy the windowed input to the output // without adjusting phases. memcpy(syn_frame, ana_frame, fftsize * sizeof(*syn_frame)); #else /*DBG long zeros = pv->output_total + frame_next - pv->out_next; write_pv_frame(zeros, ana_frame, fftsize, "pvsyn"); DBG*/ OneDimensionFFTshift(ana_frame, fftsize); // FFTshift fftInit(log2_fft); rffts(ana_frame, log2_fft, 1); /* get magnitude and phase */ mag[0] = ana_frame[0]; ana_phase[0] = 0; mag[fftsize / 2] = ana_frame[1]; ana_phase[fftsize / 2] = 0; for (i = 1; i < fftsize / 2; i++) { float real = ana_frame[2 * i]; float imag = ana_frame[2 * i + 1]; mag[i] = (float)sqrt(real * real + imag * imag); ana_phase[i] = (float)atan2(imag, real); } #ifdef PV_SINE_TEST if (pvst_offset == -1000) pvst_offset = -24; else pvst_offset += ana_hopsize; // phase of the sine should be (offset / 64) * TWO_PI, // but when sine phase is 0, atan2(0, -1) is -pi/2 so we have // atan2_phase = sine_phase - pi/2 // adding 3 pi/2 instead of subtracting pi/2 to make fmod result positive double est_atan2_phase = fmod((pvst_offset / 64.0) * TWOPI + (3 * M_PI_2), TWOPI); if (est_atan2_phase > M_PI) est_atan2_phase -= TWOPI; printf("offset %ld hop %ld ph[8] %5f est.ph %5f real %5f imag %5f\n", pvst_offset, ana_hopsize, ana_phase[8], est_atan2_phase, ana_frame[16], ana_frame[17]); #endif /* phase unwrapping & set synthesis phase */ if (pv->first_time) { D printf("phasevocoder fftsize %d hopsize %d\n", fftsize, syn_hopsize); memcpy(syn_phase, ana_phase, ((fftsize / 2) + 1) * sizeof(*syn_phase)); } else if (pv->mode == PV_MODE_PHASEFIX) { // we'll start each iteration with prev_min_x set to the lowest bin // that will be assigned to the peak at prev_peak_x. We'll find the // next_min_x and the following next_peak_x. Update the phases. // int prev_peak_x = 0; // index of previous peak float prev_peak_mag; // magnitude of previous peak int prev_min_x = 0; // index of previous local minimum int next_peak_x; // index of peak between prev_min_x and next_min_x float next_peak_mag; // magnitude of next peak int next_min_x; // index of next minimum (after peak_x) float next_min_mag; // magnitude at next_min_x float last_mag = mag[0]; // used in search for peaks float this_mag = mag[1]; float next_mag; int i; // loop index // decide if we're starting on a peak or a minimum: if (last_mag <= this_mag) { // starting on a minimum // find peak for (i = 1; i < fftsize / 2; i++) { next_mag = mag[i + 1]; // invariant: last_mag <= this_mag if (this_mag > next_mag) { // found peak prev_peak_x = i; prev_peak_mag = this_mag; break; // invariant: this_mag > next_mag } // invariant: this_mag <= next_mag last_mag = this_mag; this_mag = next_mag; // invariant: last_mag <= this_mag } if (i >= fftsize / 2) { prev_peak_x = i; } } else { // set up to start loop next_mag = this_mag; // invariant: last_mag > this_mag this_mag = last_mag; // invariant: this_mag > next_mag prev_peak_mag = last_mag; } while (prev_min_x <= fftsize / 2) { // invariant: prev_min_x is previous local minimum or 0 // prev_peak_x is first local peak after prev_min_x (or 0) // last_mag is mag at prev_peak_x - 1 // this_mag is mag at prev_peak_x // next_mag is mag at prev_peak_x + 1 // find next minimum // Note: prev_peak_x might be fftsize/2, so i might be fftsize/2 + 1 for (i = prev_peak_x + 1; i < fftsize / 2; i++) { last_mag = this_mag; // invariant: this_mag > next_mag this_mag = next_mag; // invariant: last_mag > this_mag next_mag = mag[i + 1]; if (this_mag <= next_mag) { // found minimum // here, last_mag at i-1, this_mag at i, next_mag at i+1 // and next_mix_x == i break; } // loop invariant: this_mag > next_mag } // invariant: this_mag <= next_mag || i == fftsize/2 if (i >= fftsize / 2) { // special case at end of spectrum // no minimum found next_min_x = fftsize / 2 + 1; } else { next_min_x = i; // either we found peak or we set i to fftsize/2 next_min_mag = mag[i]; } // invariant: this_mag <= next_mag || i == fftsize/2 // search for second peak; for (i = next_min_x + 1; i < fftsize / 2; i++) { last_mag = this_mag; // invariant: this_mag <= next_mag this_mag = next_mag; // invariant: last_mag <= this_mag next_mag = mag[i + 1]; if (this_mag > next_mag) { // found peak // here, last_mag at i-1, this_mag at i, next_mag at i+1 // and next_peak_x == i break; } // loop invariant: this_mag <= next_mag } next_peak_x = i; // special case if we're at the end: if (i >= fftsize / 2) { if (next_mag < this_mag) { next_peak_x = fftsize / 2 + 1; } else { next_peak_x = fftsize / 2; // this may not be necessary next_peak_mag = mag[next_peak_x]; } } else { next_peak_mag = mag[i]; } // now we have prev_min, prev_peak, next_min, and next_peak. We // want bins from prev_min to next_min to get the phase shift of // prev_peak. First decide if next_min gets assigned to prev_peak // or next_peak. Assign to the closest peak, but break ties by // picking the largest peak. if (next_min_x - prev_peak_x < next_peak_x - next_min_x) { // closer to prev_peak, so include min with it next_min_x++; } else if ((next_min_x - prev_peak_x == next_peak_x - next_min_x) && // equidistant so see if prev_peak_mag > next_peak_mag (prev_peak_mag > next_peak_mag)) { next_min_x++; } // Now we want to adjust phases of prev_min_x, prev_min_x + 1, // prev_min_x + 2, ..., next_min_x - 1. // // Assign the new phases according to the peak... // increment between actual phase increment value // and the phase increment value got when the // it's the nearest bin frequency. Used // in phase unwrapping: int j = prev_peak_x; // just to make more concise notation double phase_increment = ana_phase[j] - pre_ana_phase[j] - bin_freq[j] * ana_hopsize; // need to get phase_increment between -M_PI and +M_PI. // Algorithm: add M_PI, get phase_increment between 0 and TWO_PI, // then subtract M_PI: phase_increment = fmod(phase_increment + M_PI, TWOPI); if (phase_increment < 0) phase_increment += TWOPI; phase_increment -= M_PI; // estimated frequency from phase unwrapping /*DBG if (j == 8) printf("phase_increment %g hopsize %d\n", phase_increment, ana_hopsize); DBG*/ float estimate_freq = (float) (phase_increment / ana_hopsize + bin_freq[j]); // get synthesis phase adjustment phase_increment = pre_syn_phase[j] + syn_hopsize * estimate_freq - ana_phase[j]; /*DBG if (j == 8) printf("phase_increment at %d is %g, freq %g ", j, phase_increment, estimate_freq); DBG*/ // update the range of bins: for (i = prev_min_x; i < next_min_x; i++) { syn_phase[i] = fmodf((float) (ana_phase[i] + phase_increment), (float) TWOPI); } /*DBG if (j == 8) printf("syn_phase[8] %g\n", syn_phase[8]); DBG*/ // now get ready for the next iteration prev_min_x = next_min_x; prev_peak_x = next_peak_x; prev_peak_mag = next_peak_mag; } } else if (pv->mode == PV_MODE_STANDARD) { for (i = 0; i <= fftsize / 2; i++) { // increment between actual phase increment value // and the phase increment value got when the // it's the nearest bin frequency. Used // in phase unwrapping: double phase_increment = ana_phase[i] - pre_ana_phase[i] - bin_freq[i] * ana_hopsize; // need to get phase_increment between -M_PI and +M_PI. // Algorithm: add M_PI, get phase_increment between 0 and TWO_PI, // then subtract M_PI: phase_increment = fmod(phase_increment + M_PI, TWOPI); if (phase_increment < 0) phase_increment += TWOPI; phase_increment -= M_PI; // estimated frequency from phase unwrapping float estimate_freq = (float) (phase_increment / ana_hopsize + bin_freq[i]); // set synthesis phase syn_phase[i] = fmodf((float) (pre_syn_phase[i] + syn_hopsize * estimate_freq), (float) TWOPI); } } else if (pv->mode == PV_MODE_ROBOVOICE) { ; // syn_phase[] is unmodified, i.e. constant } else { assert(FALSE); // bad mode value } for (i = 0; i < fftsize / 2; i++) { // record phases pre_ana_phase[i] = ana_phase[i]; pre_syn_phase[i] = syn_phase[i]; // update realpart and imagpart syn_frame[i * 2] = (float) (mag[i] * cos(syn_phase[i])); syn_frame[i * 2 + 1] = (float) (mag[i] * sin(syn_phase[i])); } pre_ana_phase[i] = ana_phase[i]; pre_syn_phase[i] = syn_phase[i]; // update realpart and imagpart syn_frame[1] = (float) (mag[i] * cos(syn_phase[i])); // inverse FFT riffts(syn_frame, log2_fft, 1); // fftshift OneDimensionFFTshift(syn_frame, fftsize); #endif // SKIP_PHASE_ADJUST D printf(" mid syn_frame->%g\n", syn_frame[pv->fftsize / 2]); //D printf(" frame offset %ld\n", frame_next - pv->output_buffer); // window the frame and then add it to the output buffer // assume here that there is room to add in syn_frame D printf(" add to frame_next: %ld\n", (long) (pv->frame_next - pv->output_buffer)); /*DBG float tmp_frame[4096]; for (int i = 0; i < fftsize; i++) { tmp_frame[i] = syn_win[i] * syn_frame[i]; } write_pv_frame(zeros, syn_win, fftsize, "pvsyn"); write_pv_frame(zeros, syn_frame, fftsize, "pvsyn"); write_pv_frame(zeros, tmp_frame, fftsize, "pvsyn"); DBG*/ int sum_count = fftsize - syn_hopsize; for (i = 0; i < sum_count; i++) { frame_next[i] += syn_win[i] * syn_frame[i]; } for (/* continue from i */; i < fftsize; i++) { DD assert(frame_next[i] == 0); frame_next[i] = syn_win[i] * syn_frame[i]; } frame_next += syn_hopsize; pv->frame_next = frame_next; } void update_position_queue(PV *pv, float *ana_center) { int fftsize = pv->fftsize; float *frame_next = pv->frame_next; int syn_hopsize = pv->syn_hopsize; float *out_next = pv->out_next; // put the positions of the processed frame into the queue if (pv->first_time) { // insert a special starting correspondence: pv->pos_buffer_rear->ana_pos = lroundf(-pv->ratio * fftsize / 2); pv->pos_buffer_rear->syn_pos = 0; pv->pos_buffer_rear++; } // center of analysis window was at ana_center // input_total corresponds to input_rear pv->pos_buffer_rear->ana_pos = pv->input_total - (long) (pv->input_rear - ana_center); // output window center was at frame_next - syn_hopsize + fftsize / 2 // output_total corresponds to out_next pv->pos_buffer_rear->syn_pos = pv->output_total + (long) ((frame_next - syn_hopsize + (fftsize / 2)) - out_next); // set pos_buffer_rear to new last element in the queue pv->pos_buffer_rear++; if (pv->pos_buffer_rear == pv->pos_buffer + pv->queue_length) { pv->pos_buffer_rear = pv->pos_buffer; // wrap } // if queue is too full, remove first element at pos_buffer_head if (pv->pos_buffer_head == pv->pos_buffer_rear) { pv->pos_buffer_head++; if (pv->pos_buffer_head == pv->pos_buffer + pv->queue_length) { pv->pos_buffer_head = pv->pos_buffer; // wrap } } } float *finish_output(PV *pv) { assert(pv->frame_next - pv->out_next >= pv->blocksize); pv->phase = PV_START; // remember the current output: float *block = pv->out_next; // update out_next where next output block will be computed pv->out_next = block + pv->blocksize; pv->output_total += pv->blocksize; D printf(" return offset %ld = %g\n", (long) (pv->out_next - pv->output_buffer), *(pv->out_next)); /* DEBUG: To produce a 32767-sample-long sawtooth from 0 to 1 (roughly) * as output, uncomment the following loop. This might be the first step * in debugging. If you do not get a smoothly increasing ramp for 32K * samples, then you are not handling the output of cmupv properly. */ /* for (int i = 0; i < pv->blocksize; i++) { * block[i] = ((pv->output_total - pv->blocksize + i) % 32767) / 32768.0; * } */ return block; } float *pv_get_output(Phase_vocoder x) { PV *pv = (PV *)x; assert(pv->phase == PV_GOT_INPUT); #ifndef NDEBUG long blocksize = pv->blocksize; float *out_next = pv->out_next; #endif int fftsize = pv->fftsize; int frames_to_compute = pv->frames_to_compute; int syn_hopsize = pv->syn_hopsize; float *ana_win = pv->ana_win; float ratio = pv->ratio; float *input_head = pv->input_head; float *ana_frame = pv->ana_frame; float *ana_center; int ana_hopsize = lroundf(syn_hopsize * ratio); if (ana_hopsize > pv->max_ana_hopsize) { ana_hopsize = pv->max_ana_hopsize; } // compute frames and add them to the output_buffer until there // are blocksize samples ready to deliver D printf("pv_get_output: frames_to_compute %d\n", frames_to_compute); int frame; for (frame = 0; frame < frames_to_compute; frame++) { assert(pv->frame_next - out_next < blocksize); int i; for (i = 0; i < fftsize; i++) // get and window the buffer ana_frame[i] = input_head[i] * ana_win[i]; ana_center = input_head + fftsize / 2; D printf(" mid ana_frame->%g at %ld\n", *ana_center, (long) (ana_center - pv->input_buffer)); if (frame < frames_to_compute - 1) { input_head += ana_hopsize; // get ready for next iteration, // but on the last iteration, we do not add hopsize because // ratio might change. ana_hopsize is added in get_input_count() // to set up the next analysis frame location } else { // on the last iteration, update pv->input_head. // Equivalently, after the for loop we could do // if (frames_to_compute > 0) // pv->input_head += ana_hopsize * (frames_to_compute - 1); pv->input_head = input_head; } compute_one_frame(pv, ana_hopsize); update_position_queue(pv, ana_center); // first_time is not reset in update_position_queue where it is tested // because it is also used in pv_get_output2, which does not call // update_position_queue() pv->first_time = FALSE; } return finish_output(pv); } float *pv_get_output2(Phase_vocoder x) { PV *pv = (PV *)x; assert(pv->phase == PV_START); long blocksize = pv->blocksize; int fftsize = pv->fftsize; float *out_next = pv->out_next; float *output_buffer = pv->output_buffer; float *ana_frame = pv->ana_frame; float *ana_win = pv->ana_win; long output_buffer_len = pv->output_buffer_len; D printf("pv_get_output2: blocksize %ld frame_next %ld " "out_next %ld buffer offset %ld\n", blocksize, (long) (pv->frame_next - output_buffer), (long) (out_next - output_buffer), (long) (pv->output_total - (out_next - output_buffer))); // To produce blocksize, how many samples do we need? The next // sample to output is at out_next, and the next frame will be // addded at frame_next, so we've already computed // out_next - frame_next: while (blocksize > (pv->frame_next - out_next)) { long out_cnt = (long) (pv->output_total + (pv->frame_next - out_next) + fftsize / 2); // if there's no room in the output buffer, shift the samples. // This is done here to avoid extra work (sometimes pv_get_output2 // can be called and the samples are already in the buffer so there's // no need to shift. if (pv->frame_next + fftsize > output_buffer + output_buffer_len) { long shift = (long) (out_next - output_buffer); D printf("shift output by %ld\n", shift); memmove(output_buffer, out_next, (output_buffer_len - shift) * sizeof(*output_buffer)); /* for debugging, fill the end with zero. When we write (rather than add) to the buffer, assert that we're over-writing zeros */ DD ZERO(output_buffer + output_buffer_len - shift, shift); out_next = output_buffer; pv->out_next = output_buffer; pv->frame_next -= shift; } int ana_hopsize = (*pv->callback)(out_cnt, ana_frame, fftsize, pv->rock); /* DEBUG - To check input, the following commented code will write each analysis frame as a file. The analysis frame is prefixed with zeros so that it will be placed at the right time, but this generates N^2 samples, so only the first 20 frames are written */ /*DBG write_pv_frame(out_cnt - fftsize / 2, ana_frame, fftsize, "pvana"); DBG*/ int i; for (i = 0; i < fftsize; i++) ana_frame[i] *= ana_win[i]; compute_one_frame(pv, ana_hopsize); pv->first_time = FALSE; D printf("pv_get_output2: blocksize %ld frame_next %ld " "out_next %ld buffer offset %ld\n", blocksize, (long) (pv->frame_next - output_buffer), (long) (out_next - output_buffer), (long) (pv->output_total - (out_next - output_buffer))); } D printf("pv_get_output2 returning at offset %ld abs %ld\n", (long) (pv->out_next - pv->output_buffer), pv->output_total); /*DBG out_next position is output_total, so if we subtract out_next - output_buffer, we get the absolute position of the output_buffer write_pv_frame(pv->output_total - (pv->out_next - pv->output_buffer), pv->output_buffer, pv->output_buffer_len, "pvbuf"); DBG*/ return finish_output(pv); }