mirror of
https://github.com/cookiengineer/audacity
synced 2025-07-31 16:09:28 +02:00
------------------------------------------------------------------------ r331 | rbd | 2020-10-13 12:40:12 -0500 (Tue, 13 Oct 2020) | 2 lines Also forgot to install NyquistWords.txt ------------------------------------------------------------------------ r330 | rbd | 2020-10-13 12:34:06 -0500 (Tue, 13 Oct 2020) | 2 lines Forgot to move nyquistman.pdf from docsrc/s2h to release ------------------------------------------------------------------------ r329 | rbd | 2020-10-13 11:32:33 -0500 (Tue, 13 Oct 2020) | 2 lines Updated some version numbers for 3.16. ------------------------------------------------------------------------ r328 | rbd | 2020-10-13 11:20:52 -0500 (Tue, 13 Oct 2020) | 2 lines Fixed NyquistIDE antialiasing for plot text, fix format of message. ------------------------------------------------------------------------ r327 | rbd | 2020-10-12 21:01:53 -0500 (Mon, 12 Oct 2020) | 2 lines Fixed a couple of format problems in manual. This version of Nyquist has been tested wtih macOS, Linux, 32&64-bit Windows. ------------------------------------------------------------------------ r326 | rbd | 2020-10-12 20:21:38 -0500 (Mon, 12 Oct 2020) | 1 line Modified WIN32 32-bit XLisp to use 64-bit FIXNUMs. This allows XLisp and Nyquist to handle big sounds even on 32-bit machines. Probably at some cost, but inner loops are mostly float and int32, and the Nyquist release is 64-bit anyway. Maybe we'll have to run some benchmarks on Audacity, which is still 32-bit on Windows. ------------------------------------------------------------------------ r325 | rbd | 2020-10-12 13:16:57 -0500 (Mon, 12 Oct 2020) | 1 line Win64 passes bigfiletest.lsp now. This version should work on all 64-bit systems now. These changes untested on Linux and macOS. ------------------------------------------------------------------------ r324 | rbd | 2020-10-11 21:31:53 -0500 (Sun, 11 Oct 2020) | 2 lines I couldn't free enough space on my linux box, so I adjusted the bigfiletest to write 8-bit ulaw. It's still >4GB and >4G samples. Works on Linux. ------------------------------------------------------------------------ r323 | rbd | 2020-10-11 19:41:25 -0500 (Sun, 11 Oct 2020) | 2 lines Missing file from last commit. ------------------------------------------------------------------------ r322 | rbd | 2020-10-11 19:36:08 -0500 (Sun, 11 Oct 2020) | 1 line Found another case where WIN64 needs int64_t instead of long for sample count. ------------------------------------------------------------------------ r321 | rbd | 2020-10-11 19:33:25 -0500 (Sun, 11 Oct 2020) | 3 lines Fixed s-save to handle optional and keyword parameters (which should never have been mixed in the first place). Documentation cleanup - should be final for this version. ------------------------------------------------------------------------ r320 | rbd | 2020-10-11 14:44:37 -0500 (Sun, 11 Oct 2020) | 2 lines Fixes to handle IRCAM sound format and tests for big file io working on macOS. ------------------------------------------------------------------------ r319 | rbd | 2020-10-10 21:31:58 -0500 (Sat, 10 Oct 2020) | 2 lines Changes for linux and to avoid compiler warnings on linux. ------------------------------------------------------------------------ r318 | rbd | 2020-10-10 20:50:23 -0500 (Sat, 10 Oct 2020) | 1 line This is the test used for Win64 version. ------------------------------------------------------------------------ r317 | rbd | 2020-10-10 20:34:34 -0500 (Sat, 10 Oct 2020) | 1 line This version works on Win64. Need to test changes on macOS and linux. ------------------------------------------------------------------------ r316 | rbd | 2020-10-10 19:59:15 -0500 (Sat, 10 Oct 2020) | 2 lines PWL changes to avoid compiler warning. ------------------------------------------------------------------------ r315 | rbd | 2020-10-10 19:34:04 -0500 (Sat, 10 Oct 2020) | 2 lines A few more changes for 64-bit sample counts on Win64 ------------------------------------------------------------------------ r314 | rbd | 2020-10-10 13:19:42 -0500 (Sat, 10 Oct 2020) | 2 lines Fixed int64_t declaration in gate.alg ------------------------------------------------------------------------ r313 | rbd | 2020-10-10 12:07:40 -0500 (Sat, 10 Oct 2020) | 2 lines Fixes to gate for long sounds ------------------------------------------------------------------------ r312 | rbd | 2020-10-10 11:47:29 -0500 (Sat, 10 Oct 2020) | 2 lines Fixed sound_save types for intgen ------------------------------------------------------------------------ r311 | rbd | 2020-10-10 11:09:01 -0500 (Sat, 10 Oct 2020) | 2 lines Fixed a 64-bit sample count problem in siosc.alg ------------------------------------------------------------------------ r310 | rbd | 2020-10-10 11:03:12 -0500 (Sat, 10 Oct 2020) | 2 lines Fixed sndmax to handle 64-bit sample counts. ------------------------------------------------------------------------ r309 | rbd | 2020-10-10 10:57:04 -0500 (Sat, 10 Oct 2020) | 2 lines Forgot to re-translate all tran/*.alg files with fix for int64 cast to int32. This version compiles on macOS and ready for test on Win64. ------------------------------------------------------------------------ r308 | rbd | 2020-10-10 10:16:05 -0500 (Sat, 10 Oct 2020) | 2 lines Everything seems to compile and run on macOS now. Moving changes to Windows for test. ------------------------------------------------------------------------ r307 | rbd | 2020-10-10 09:23:45 -0500 (Sat, 10 Oct 2020) | 1 line Added casts to avoid compiler warnings and to review changes to support 64-bit sample counts on Windows. Still not complete, and waiting to regenerate and compile tran directory code after updates to translation code that will insert more casts. ------------------------------------------------------------------------ r306 | rbd | 2020-10-09 21:55:15 -0500 (Fri, 09 Oct 2020) | 2 lines Rebuilt seqfnint.c from header files. ------------------------------------------------------------------------ r305 | rbd | 2020-10-09 21:53:33 -0500 (Fri, 09 Oct 2020) | 1 line Changed some FIXNUMS to LONG to avoid compiler warnings in seqfnint.c ------------------------------------------------------------------------ r304 | rbd | 2020-10-09 21:44:03 -0500 (Fri, 09 Oct 2020) | 2 lines I discovered forgotten regression-test.lsp and added test that requires 64-bit sample counts to pass. Fixed a few bugs revealed by running the type-checking regression tests. ------------------------------------------------------------------------ r303 | rbd | 2020-10-09 12:28:58 -0500 (Fri, 09 Oct 2020) | 2 lines Changes for 64-bit sample counts broke mult-channel s-save. Fixed in the commit for macOS. ------------------------------------------------------------------------ r302 | rbd | 2020-10-09 10:03:39 -0500 (Fri, 09 Oct 2020) | 2 lines Changed snd-play to return samples computed and used that to make a test for computing long sounds that would overflow 32-bit length counts. ------------------------------------------------------------------------ r301 | rbd | 2020-10-09 09:11:26 -0500 (Fri, 09 Oct 2020) | 2 lines corrected mistake in delaycv.alg and re-translated ------------------------------------------------------------------------ r300 | rbd | 2020-10-09 09:09:06 -0500 (Fri, 09 Oct 2020) | 2 lines Fix to delaycv.alg -- "s" changed to "input" to avoid matching "s" in "sample_type". ------------------------------------------------------------------------ r299 | rbd | 2020-10-09 09:03:33 -0500 (Fri, 09 Oct 2020) | 4 lines To avoid compiler warnings, XLisp interfaces to C int and long are now specified as LONG rather than FIXNUM, and the stubs that call the C functions cast FIXNUMs from XLisp into longs before calling C functions. ------------------------------------------------------------------------ r298 | rbd | 2020-10-08 22:20:26 -0500 (Thu, 08 Oct 2020) | 2 lines This commit has many more fixes to handle long (64-bit) sounds, including a lot of fixes for warnings by Visual Studio assigning int64_t to long (works on macOS, doesn't work on VS). This was compiled and tested on macOS, and even computed a 27.1-hour sound using OSC, LP, SUM and MULT (haven't tested I/O yet). ------------------------------------------------------------------------ r297 | rbd | 2020-10-07 13:04:02 -0500 (Wed, 07 Oct 2020) | 2 lines This is a major cleanup. It started with the goal of changing long to int64_t for sample counts so that on 64-bit windows, where long is only 32-bits, the sample counts would nevertheless be 64-bit allowing long sounds, which was a limitation for long recordings in Audacity. Since I was using compiler warnings to track possible loss-of-precision conversions from 64-bit sample counts, and there were *many* warnings, I started cleaning up *all* the warnings and ended up with a very large set of changes, including "modernizing" C declarations that date back to XLisp and CMU MIDI Toolkit code and were never changed. This version runs all the examples.sal code on macOS, but will surely have problems on Windows and Linux given the number of changes. ------------------------------------------------------------------------ r296 | rbd | 2020-10-06 13:34:20 -0500 (Tue, 06 Oct 2020) | 2 lines More changes from long to int64_t for sample counts. ------------------------------------------------------------------------ r295 | rbd | 2020-10-06 11:53:49 -0500 (Tue, 06 Oct 2020) | 2 lines More work on using 64-bit sample counts. Changed MAX_STOP from 32-bit to 64-bit limit. ------------------------------------------------------------------------ r294 | rbd | 2020-10-06 11:48:05 -0500 (Tue, 06 Oct 2020) | 2 lines Made some changes so that sample counts are int64_t (for windows) instead of long to support sample counts above 31 bits. ------------------------------------------------------------------------ r293 | rbd | 2020-10-04 21:30:55 -0500 (Sun, 04 Oct 2020) | 2 lines Fixed a few minor things for Linux and tested on Linux. ------------------------------------------------------------------------ r292 | rbd | 2020-10-04 21:00:28 -0500 (Sun, 04 Oct 2020) | 2 lines Update extensions: all are minor changes. ------------------------------------------------------------------------ r291 | rbd | 2020-09-24 13:59:31 -0500 (Thu, 24 Sep 2020) | 2 lines New implementation of seq and seqrep, added get-real-time, documented get-real-time, fixed examples.sal and examples.lsp which are now in lib rather than extensions (so they are now back in the basic installation), other cleanup. ------------------------------------------------------------------------ r290 | rbd | 2020-08-16 16:24:52 -0500 (Sun, 16 Aug 2020) | 2 lines Fixed bug in snd-gate, revised GATE and NOISE-GATE to handle multi-channel sound. RMS now handles multi-channel input. S-AVG added to take multichannel input (but not used, because RMS could not be written without making SND-SRATE convert multichannel sound to vector of floats. That seems to be going toward a fully vectorized model. Not going there for now. ------------------------------------------------------------------------ r289 | rbd | 2020-07-09 16:27:45 -0500 (Thu, 09 Jul 2020) | 2 lines Added GET-REAL-TIME function to XLISP. May not work yet on Windows. Various fixes for compiler warnings. I noticed FLAC doesn't work (I guess it never did) and I cannot figure out how this even links because flac_min seems to be undefined. Something to look at later.
1102 lines
45 KiB
C
1102 lines
45 KiB
C
/* cmupv.c -- phase vocoder */
|
|
|
|
/* Computation is driven by demands for output. The client calls either
|
|
pv_get_output() or pv_get_output2(). Either way, output is returned
|
|
one blocksize at a time. The blocksize is set by pv_set_blocksize()
|
|
and defaults to the synthesis hopsize, which defaults to fftsize/8.
|
|
|
|
Since the blocksize and hopsize are not necessarily matched in any
|
|
way, there is a buffer to accumulate the overlapping "grains" of sound
|
|
which we call the synthesis frames (each synthesis frame is computed
|
|
by adjusting phases of an analysis frame). The buffer is called
|
|
output_buffer, and the length (in floats) is output_buffer_len.
|
|
|
|
The output_buffer_len has to be big enough to contain blocksize samples
|
|
which are about to be returned as output plus fftsize samples which
|
|
overlap with future synthesis frames that will be added later. The
|
|
output_buffer_len also has to be hopsize + fftsize samples (probably
|
|
these worst-case sizes are conservative, but it's easier to set
|
|
workable upper bounds than to think about all the special cases when
|
|
output_buffer_len, blocksize, hopsize, and fftsize are all arbitrary).
|
|
|
|
The basic structure to produce blocksize samples is as follows:
|
|
out_next is the pointer to the next block of blocksize samples. This
|
|
is a pointer directly into output_buffer, initially the first sample
|
|
in output_buffer.
|
|
|
|
We also keep a pointer frame_next, which is where the next synthesis
|
|
frame will be added into output_buffer. Thus, the number of samples
|
|
that have been completely computed but not output (i.e. no more
|
|
overlapping frames will be added) is frame_next - out_next. When
|
|
(frame_next - out_next) > blocksize, we can deliver blocksize samples
|
|
to the caller.
|
|
|
|
The next constraint is that as we add overlapping synthesis frames into
|
|
output_buffer, we have to have room for them, so frame_next + fftsize
|
|
must not be greater than output_buffer + output_buffer_len. If this
|
|
constraint is not met, we need to shift everything toward the beginning
|
|
of the output_buffer. Since we've output everything up to out_next, we
|
|
shift by out_next - output_buffer.
|
|
|
|
Because output_buffer_len >= blocksize + fftsize, we are guaranteed that
|
|
there is always room to add in any synthesis frames that overlap the
|
|
blocksize samples to be returned next.
|
|
|
|
PHASE COHERENCE
|
|
|
|
To preserve phase relationships when a sine spills over multiple bins
|
|
(which happens always due to windowing), we adjust phase based on peaks
|
|
in the magnitude spectrum. The phases of neighboring bins are adjusted
|
|
by the same angle.
|
|
|
|
Specification: We want to divide the spectrum into peaks: between local
|
|
minima, adjust according to the peak.
|
|
|
|
Algorithm: Set "previous" minimum to -1 and previous magnitude to 0
|
|
- search for the "previous" peak
|
|
- iteratively do the following:
|
|
- search for the next minimum
|
|
- search for the next peak
|
|
- compute range of bins to modify, assign minimum to the largest peak
|
|
- compute the phase adjustment for the next peak
|
|
- apply the phase adjustment to range of bins
|
|
- set previous minumum to next minimum
|
|
- set previous peak to next peak
|
|
*/
|
|
|
|
/* BEGIN PV_SINE_TEST debugging code...
|
|
This debugging code is for testing a special case: the input is a sine tone
|
|
with amplitude about 1.0 and frequency about 689Hz at 44100Hz sample rate,
|
|
resulting in a period of exactly 64 samples, which will be bin 8 with an
|
|
fftsize of 512.
|
|
The goal here is to compute exactly what the phasevocoder should be doing
|
|
so we can compare to what it does. When SINETEST is enabled, data will be
|
|
printed. At the time this code is being written, the result with a stretch
|
|
factor of exactly 1.1 is a pulsing sound when the absolute interface is used.
|
|
With the absolute interface, the analysis hopsize is mostly 58, with a hop of
|
|
59 samples every 5 or 6 frames.
|
|
*/
|
|
// #define PV_SINE_TEST 1
|
|
#ifdef PV_SINE_TEST
|
|
double pvst_frequency = 44100.0 / 64.0; // about 689Hz
|
|
long pvst_offset = -1000; // -1 means we haven't seen first frame yet
|
|
// the first hop size is bogus, so we ignore it, then accumulate hop sizes here
|
|
#endif
|
|
/* END PV_SINE_TEST */
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include "internal.h"
|
|
// only needed for some debugging code which is probably commented out
|
|
#include "cmupvdbg.h"
|
|
|
|
// define PHASE_FIX to tie neighboring bin phase to phase of spectral peaks
|
|
// #define PHASE_FIX 1
|
|
|
|
// debugging output on or off
|
|
// #define D if (1)
|
|
#define D if (0)
|
|
|
|
// more debugging
|
|
#define DD if (0)
|
|
|
|
#define LOGFFTSIZE_DEFAULT 11
|
|
#define RATIO_DEFAULT 1
|
|
#define BOOL int
|
|
#define FALSE 0
|
|
#define TRUE 1
|
|
#define TWOPI (2.0 * M_PI)
|
|
|
|
// These macros are used for memory allocation and freeing. Note that
|
|
// only ZERO acts like a function. The rest only work on fields of pv.
|
|
#define PVFREE(field) if (pv->field) { pv->free(pv->field); pv->field = NULL; }
|
|
#define PVALLOC(field, size) \
|
|
pv->field = (float *)pv->malloc((size) * sizeof(*(pv->field)))
|
|
#define PVREALLOC(field, size) { PVFREE(field); PVALLOC(field, size); }
|
|
#define ZERO(array, size) memset((array), 0, (size) * sizeof(*(array)))
|
|
|
|
|
|
#include "cmupv.h"
|
|
#include "fftext.h"
|
|
|
|
typedef enum {
|
|
PV_UNINITIALIZED,
|
|
PV_START,
|
|
PV_GOT_COUNT,
|
|
PV_GOT_INPUT } pv_phase_type;
|
|
|
|
struct position // each element in the structure array
|
|
{
|
|
long ana_pos; // the sample number of the center of analysis frame
|
|
long syn_pos; // the sample number of the center of synthesis frame
|
|
};
|
|
|
|
typedef struct {
|
|
void *(*malloc)(size_t); // malloc is used to allocate memory. If you
|
|
// have a real-time system and want to avoid the standard library
|
|
// malloc() which may have priority inversion problems due to locks,
|
|
// you can supply your own lock-free implementation
|
|
void (*free)(void *); // if you provide a custom malloc, you should
|
|
// provide a matching custom free()
|
|
int blocksize; // the size of audio blocks produced by the phase vocoder.
|
|
int fftsize; // the number of samples in each FFT. Should be power of 2.
|
|
int log2_fft; // 2 is the log base
|
|
int syn_hopsize; // the hopsize used in reconstructing the output
|
|
float ratio; // the time-stretch ratio. NOTE: even though ratio is
|
|
// specified as the amount input is stretched, THIS ratio is
|
|
// the reciprocal, i.e. input duration / output duration
|
|
int max_ana_hopsize; // fftsize / 3 is max hopsize
|
|
float pre_ratio; // previous ratio is used to calculate
|
|
// the previous input_length
|
|
int mode; // 0 - normal, 1 - phase fix, 2 - robovoice
|
|
float *ana_win; // the window function used on input (analysis)
|
|
float *syn_win; // the window function used on output (synthesis)
|
|
long input_eff_pos; // input effective position is the sample number
|
|
// of the input that corresponds to the current output
|
|
float *input_buffer; // used to buffer input samples
|
|
long input_buffer_len; // how many floats can input_buffer hold?
|
|
float *output_buffer; // used to buffer output samples
|
|
long output_buffer_len; // how big in floats is the output buffer?
|
|
float *input_head; // pointer to the start of the previous
|
|
// analysis frame. input_head is updated by
|
|
// hopsize before reading each frame.
|
|
float *input_rear; // pointer to the end of the input data
|
|
int frames_to_compute; // how many frames we'll compute in pv_get_output()
|
|
int expected_input; // the value computed by pv_get_input_count()
|
|
// we check that pv_put_input delivers what we asked for
|
|
float *out_next; // pointer into the output buffer from where the
|
|
// next output sample will delivered
|
|
float *frame_next; // pointer into the output buffer to where the
|
|
// next frame will by added
|
|
Pv_callback callback; // function to retrieve an input frame
|
|
void *rock; // object pointer or context info to be passed to callback
|
|
pv_phase_type phase;
|
|
BOOL first_time; // true only on the first fft output frame
|
|
BOOL absolute; // true if using the callback protocol -- set by create2()
|
|
float *ana_frame; // analysis frame
|
|
float *syn_frame; // synthesis frame
|
|
float *mag; // magnitude for points in the frame being processed
|
|
float *ana_phase; // phase for points in the analysis frame
|
|
// being processed
|
|
float *syn_phase; // phase for points in the synthesis frame
|
|
// being processed
|
|
float *pre_ana_phase; // recording last analysis phase for estimating
|
|
// the frequency
|
|
float *pre_syn_phase; // recording last systhesis phase for rebuilting
|
|
// the new phase
|
|
float *bin_freq; // bin frequency, used in phase unwrapping;
|
|
|
|
struct position *pos_buffer; // Circular array storing the sample
|
|
// number of the middle of the frames
|
|
// (both for analysis and synthesis frames)
|
|
struct position *pos_buffer_head; // beginning of the circular array,
|
|
// points to oldest entry. If equal to pos_buffer_rear, the queue
|
|
// is empty. Never points to
|
|
// pos_buffer + queue_length. It wraps around to pos_buffer.
|
|
struct position *pos_buffer_rear; // rear of the circular array,
|
|
// points to slot AFTER the most recent entry. If equal to
|
|
// pos_buffer_rear, the queue is empty. Never points to
|
|
// pos_buffer + queue_length. It wraps around to pos_buffer.
|
|
long queue_length; // length of the circular queue of corresponding times
|
|
long input_total; // how many input samples did we get so far?
|
|
// initially 0, and incremented upon pv_put_input()
|
|
// so the input_total count corresponds to the input_rear pointer.
|
|
long output_total; // how many output samples did we produce so far?
|
|
// initially 0, and incremented by block_size after each call to
|
|
// pv_get_output(). Corresponds to out_next.
|
|
} PV;
|
|
|
|
//extern long int sig;
|
|
//extern long int sig1;
|
|
//extern long int sig2;
|
|
|
|
// round_log_power - round fftsize up to a power of 2
|
|
// return log2(rounded up fftsize)
|
|
// optionally set size_ptr to rounded up fftsize
|
|
//
|
|
int round_log_power(int fftsize, int *size_ptr)
|
|
{
|
|
long double log2_fft = log2l(fftsize);
|
|
int round_log2_fft = (int) log2_fft;
|
|
if (round_log2_fft < log2_fft) {
|
|
round_log2_fft += 1;
|
|
}
|
|
if (log2_fft > 20 || ((1 << round_log2_fft) != fftsize)) {
|
|
round_log2_fft = 1024; // on error, substitute a sane value
|
|
}
|
|
if (size_ptr) *size_ptr = 1 << round_log2_fft;
|
|
return round_log2_fft;
|
|
}
|
|
|
|
|
|
Phase_vocoder pv_create(void *(*mallocfn)(size_t), void (*freefn)(void *))
|
|
{
|
|
if (!mallocfn) mallocfn = &malloc;
|
|
PV *pv = (PV *)mallocfn(sizeof(PV));
|
|
ZERO(pv, 1);
|
|
pv->phase = PV_UNINITIALIZED;
|
|
pv->malloc = mallocfn;
|
|
pv->free = freefn;
|
|
pv_set_fftsize(pv, 1 << LOGFFTSIZE_DEFAULT);
|
|
// syn_hopsize will now be FFTSIZE_DEFAULT / 8
|
|
// pv_set_syn_hopsize(pv, FFTSIZE_DEFAULT / 8);
|
|
pv->blocksize = pv->syn_hopsize;
|
|
pv_set_ratio(pv, RATIO_DEFAULT);
|
|
pv->first_time = TRUE;
|
|
pv->mode = 0;
|
|
return (Phase_vocoder)pv;
|
|
}
|
|
|
|
|
|
Phase_vocoder pv_create2(void *(*mallocfn)(size_t), void (*freefn)(void *),
|
|
Pv_callback callback, void *rock)
|
|
{
|
|
PV *pv = (PV *)pv_create(mallocfn, freefn);
|
|
pv->absolute = TRUE;
|
|
pv_set_callback(pv, callback, rock);
|
|
return (Phase_vocoder)pv;
|
|
}
|
|
|
|
|
|
void pv_end(Phase_vocoder *x)
|
|
{
|
|
PV *pv = (PV *)(*x);
|
|
fftFree();
|
|
PVFREE(ana_win);
|
|
PVFREE(syn_win);
|
|
PVFREE(input_buffer);
|
|
PVFREE(output_buffer);
|
|
PVFREE(ana_frame);
|
|
PVFREE(syn_frame);
|
|
PVFREE(mag);
|
|
PVFREE(ana_phase);
|
|
PVFREE(syn_phase);
|
|
PVFREE(pre_ana_phase);
|
|
PVFREE(pre_syn_phase);
|
|
PVFREE(bin_freq);
|
|
PVFREE(pos_buffer);
|
|
pv->free(pv);
|
|
*x = NULL;
|
|
}
|
|
|
|
void pv_set_callback(Phase_vocoder x, Pv_callback callback, void *rock)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
pv->callback = callback;
|
|
pv->rock = rock;
|
|
}
|
|
|
|
void pv_set_blocksize(Phase_vocoder x, int n)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
pv->blocksize = n;
|
|
pv->phase = PV_UNINITIALIZED;
|
|
}
|
|
|
|
void pv_set_fftsize(Phase_vocoder x, int n)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
// n must be power of 2 and for sanity, we'll require it to be at least 16
|
|
// power of 2 test: only if n is a power of 2 will n-1 clear the high order
|
|
// bit:
|
|
if ((n & (n - 1)) || (n < 16)) {
|
|
return; // ignore bad argument
|
|
}
|
|
// preserve the same syn_hopsize ratio, e.g. if syn_hopsize
|
|
// is fftsize/8, then after setting fftsize, new syn_hopsize
|
|
// will be (new fftsize)/8
|
|
int hop = (pv->syn_hopsize == 0 ? 8 :
|
|
pv->fftsize / pv->syn_hopsize); // the divisor
|
|
pv->fftsize = n;
|
|
pv->log2_fft = round_log_power(n, &(pv->fftsize));
|
|
pv_set_syn_hopsize(x, n / hop);
|
|
pv->phase = PV_UNINITIALIZED;
|
|
pv->max_ana_hopsize = n / 3;
|
|
}
|
|
|
|
void pv_set_ratio(Phase_vocoder x, float ratio)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
assert(pv->phase == PV_START || pv->phase == PV_UNINITIALIZED);
|
|
pv->pre_ratio = pv->ratio;
|
|
pv->ratio = 1.0F / ratio;
|
|
}
|
|
|
|
void pv_set_syn_hopsize(Phase_vocoder x, int n)
|
|
// set the hopsize. Must be fftsize divided by a power of 2.
|
|
// non-power-of-two n will be rounded up.
|
|
// hopsize must be at least 1 and at most fftsize/4.
|
|
// out-of-bound n will be put within bounds
|
|
{
|
|
PV *pv = (PV *)x;
|
|
if (n < 1) n = 1;
|
|
round_log_power(n, &(pv->syn_hopsize));
|
|
if (pv->syn_hopsize > pv->fftsize / 4) {
|
|
pv->syn_hopsize = pv->fftsize / 4;
|
|
}
|
|
pv->phase = PV_UNINITIALIZED;
|
|
}
|
|
|
|
void pv_initialize(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
|
|
// allocate space and initialize for window
|
|
if (! pv->ana_win)
|
|
pv->ana_win = pv_window(pv, hann); //default analysis window is Hanning
|
|
if (! pv->syn_win)
|
|
pv->syn_win = pv_window(pv, hann); //default synthesis window is Hanning
|
|
|
|
// allocate space and initialize for input buffer and output buffer
|
|
if (pv->blocksize <= pv->syn_hopsize) {
|
|
pv->input_buffer_len = pv->fftsize;
|
|
} else {
|
|
// The maximum of ana_hopsize is fftsize/3, so the
|
|
// pv->input_buffer_len is set to the maximum so as to avoid
|
|
// freeing and allocating memory for input buffer many times
|
|
// due to the changing of time-stretching ratio.
|
|
// The maximum amount needed is fftsize to produce one frame of
|
|
// output plus: fftsize/3 for each additional frame. The total
|
|
// number of frames we must compute to form blocksize samples
|
|
// of output is blocksize / hopsize.
|
|
pv->input_buffer_len = pv->fftsize + 2 /* to avoid rounding error */ +
|
|
lroundf((((float) pv->blocksize / pv->syn_hopsize) - 1) * (pv->fftsize / 3.0F));
|
|
}
|
|
if (! pv->absolute) {
|
|
PVREALLOC(input_buffer, pv->input_buffer_len);
|
|
// preload with fftsize/2 zeros so that the first sample of the input
|
|
// will be at the center of the fft window
|
|
pv->input_head = pv->input_buffer;
|
|
ZERO(pv->input_buffer, pv->fftsize / 2);
|
|
pv->input_rear = pv->input_buffer + pv->fftsize / 2;
|
|
}
|
|
|
|
// how long does the output buffer need to be?
|
|
// It has to be long enough to add in an entire fft frame, so
|
|
// at least fftsize. It should then be at least syn_hopsize - 2
|
|
// bigger, so that if we're outputting syn_hopsize - 1 samples
|
|
// (a really bad choice, by the way), we could have syn_hopsize - 2
|
|
// samples in the output buffer and the next buffer gets added
|
|
// starting at location syn_hopsize - 2, so we need syn_hopsize - 2 +
|
|
// fftsize. Let's make it an even syn_hopsize + fftsize.
|
|
// It also has to be long enough to hold an output buffer length +
|
|
// fftsize.
|
|
// So overall, we need max(syn_hopsize, blocksize) + fftsize.
|
|
//
|
|
pv->output_buffer_len = pv->blocksize;
|
|
if (pv->blocksize <= pv->syn_hopsize) {
|
|
pv->output_buffer_len = pv->syn_hopsize;
|
|
}
|
|
pv->output_buffer_len += pv->fftsize;
|
|
PVREALLOC(output_buffer, pv->output_buffer_len);
|
|
D printf("pv_initialize: input_buffer_len %ld\n", pv->input_buffer_len);
|
|
D printf(" output_buffer_len %ld\n", pv->output_buffer_len);
|
|
D printf(" blocksize %d\n", pv->blocksize);
|
|
D printf(" fftsize %d\n", pv->fftsize);
|
|
D printf(" syn_hopsize %d\n", pv->syn_hopsize);
|
|
pv->out_next = pv->output_buffer;
|
|
pv->frame_next = pv->output_buffer;
|
|
ZERO(pv->output_buffer, pv->output_buffer_len);
|
|
PVREALLOC(ana_frame, pv->fftsize);
|
|
PVREALLOC(syn_frame, pv->fftsize);
|
|
PVREALLOC(mag, pv->fftsize / 2 + 1);
|
|
// allocate space for phase and pre_phase which will be used in
|
|
// phase unwrapping
|
|
PVREALLOC(ana_phase, pv->fftsize / 2 + 1);
|
|
PVREALLOC(syn_phase, pv->fftsize / 2 + 1);
|
|
PVREALLOC(pre_ana_phase, pv->fftsize / 2 + 1);
|
|
PVREALLOC(pre_syn_phase, pv->fftsize / 2 + 1);
|
|
// bin frequency, used in phase unwrapping
|
|
PVREALLOC(bin_freq, pv->fftsize / 2 + 1);
|
|
int i;
|
|
for (i = 0; i <= pv->fftsize / 2; i++)
|
|
pv->bin_freq[i] = (float) (TWOPI * i / pv->fftsize);
|
|
// get_effective_pos() maps from the beginning of the
|
|
// next block to the corresponding input sample. Since
|
|
// the output hopsize is fixed, the next output sample
|
|
// was at the center of the frame if we go back by
|
|
// framesize / syn_hopsize frames. Thus we need
|
|
// framesize / syn_hopsize entries in the queue. We'll
|
|
// pad by a couple to deal with rounding issues:
|
|
pv->queue_length = pv->fftsize / (pv->syn_hopsize * 2) + 2;
|
|
if (!pv->absolute) {
|
|
PVFREE(pos_buffer);
|
|
pv->pos_buffer = (struct position *)
|
|
(pv->malloc((pv->queue_length + 1) * sizeof(struct position)));
|
|
|
|
pv->pos_buffer_head = pv->pos_buffer;
|
|
pv->pos_buffer_rear = pv->pos_buffer;
|
|
}
|
|
// make sure tables are constructed before we start real-time processing
|
|
#ifndef NDEBUG
|
|
int fft_error_sign =
|
|
#endif
|
|
fftInit(pv->log2_fft); // target fftInit
|
|
assert(!fft_error_sign);
|
|
|
|
pv->phase = PV_START;
|
|
}
|
|
|
|
|
|
void pv_set_ana_window(Phase_vocoder x, float *window)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
PVREALLOC(ana_win, pv->fftsize);
|
|
memcpy(pv->ana_win, window, pv->fftsize * sizeof(float));
|
|
}
|
|
|
|
|
|
void pv_set_syn_window(Phase_vocoder x, float *window)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
PVREALLOC(syn_win, pv->fftsize);
|
|
memcpy(pv->syn_win, window, pv->fftsize * sizeof(float));
|
|
}
|
|
|
|
|
|
void pv_set_mode(Phase_vocoder x, int mode)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
if (mode >= 0 && mode <= 2) {
|
|
pv->mode = mode;
|
|
}
|
|
}
|
|
|
|
|
|
float *pv_window(Phase_vocoder x, float (*window_type)(double x))
|
|
// window is after normalized
|
|
{
|
|
PV *pv = (PV *)x;
|
|
float sum_window_square = 0, COLA_factor;
|
|
int window_length = pv->fftsize;
|
|
float *window = (float *)pv->malloc(window_length * sizeof(float));
|
|
int i;
|
|
for (i = 0; i < window_length; i++) {
|
|
window[i] = window_type((double)i / window_length);
|
|
// note that the computation is all double even if window[i] is float
|
|
sum_window_square += window[i] * window[i];
|
|
}
|
|
COLA_factor = sum_window_square / pv->syn_hopsize;
|
|
for (i = 0; i <= pv->fftsize - 1; i++)
|
|
window[i] = (float) (window[i] / sqrt(COLA_factor));
|
|
return window;
|
|
}
|
|
|
|
|
|
int pv_get_input_count(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
int ana_hopsize = lroundf((pv->syn_hopsize) * (pv->ratio));
|
|
if (ana_hopsize > pv->max_ana_hopsize) {
|
|
ana_hopsize = pv->max_ana_hopsize;
|
|
}
|
|
|
|
assert(pv->phase == PV_START);
|
|
|
|
// To produce blocksize, how many samples do we need? The next
|
|
// sample to output is at out_next, and the next frame will be
|
|
// added at frame_next, so we've already computed
|
|
// out_next - frame_next:
|
|
int need = pv->blocksize - (int)(pv->frame_next - pv->out_next);
|
|
// need is now the number of output samples we need
|
|
// How many fft frames will be required? Round up by adding hopsize-1:
|
|
int frames = (need + pv->syn_hopsize - 1) / pv->syn_hopsize;
|
|
if (frames > 0) {
|
|
// Skip hopsize frames except on the first time, where
|
|
// we always put first sample in the middle of the frame:
|
|
if (!pv->first_time) {
|
|
pv->input_head += ana_hopsize;
|
|
}
|
|
// We need framesize + hopsize * (frames - 1) in order to compute
|
|
// frames overlapping fft frames of size framesize.
|
|
need = pv->fftsize + ana_hopsize * (frames - 1);
|
|
// Now how many input samples do we have already?
|
|
long have = (long) (pv->input_rear - pv->input_head);
|
|
need -= have;
|
|
// See if we have room for need samples in the buffer:
|
|
if (pv->input_rear + need > pv->input_buffer + pv->input_buffer_len) {
|
|
// not enough room. Shift the input buffer to make space.
|
|
long shift = (long) (pv->input_head - pv->input_buffer);
|
|
memmove(pv->input_buffer, pv->input_head,
|
|
(pv->input_rear - pv->input_head) *
|
|
sizeof(*(pv->input_buffer)));
|
|
pv->input_head -= shift;
|
|
pv->input_rear -= shift;
|
|
D printf(" after input shift by %ld, head at %ld\n",
|
|
shift, (long) (pv->input_head - pv->input_buffer));
|
|
}
|
|
// make sure our assumptions are true and we now have space:
|
|
assert(pv->input_rear + need <=
|
|
pv->input_buffer + pv->input_buffer_len);
|
|
// See if we have room in the output_buffer
|
|
// last sample will be at frame_next + (frames - 1) * hopsize + fftsize
|
|
float *last_output = pv->frame_next +
|
|
(frames - 1) * pv->syn_hopsize + pv->fftsize;
|
|
if (last_output > pv->output_buffer + pv->output_buffer_len) {
|
|
// not enough room. Shift the output buffer to make space.
|
|
long shift = (long) (pv->out_next - pv->output_buffer);
|
|
memmove(pv->output_buffer, pv->out_next,
|
|
(pv->fftsize - pv->syn_hopsize) *
|
|
sizeof(*(pv->output_buffer)));
|
|
pv->frame_next -= shift;
|
|
pv->out_next -= shift;
|
|
}
|
|
} else {
|
|
frames = 0;
|
|
need = 0;
|
|
}
|
|
pv->frames_to_compute = frames;
|
|
pv->phase = PV_GOT_COUNT;
|
|
pv->expected_input = need;
|
|
return need;
|
|
}
|
|
|
|
#pragma warning(disable: 4715 4068) // return type and unknown pragma
|
|
#pragma clang diagnostic ignored "-Wreturn-type"
|
|
double pv_get_effective_pos(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV*)x;
|
|
assert(pv->phase == PV_START);
|
|
|
|
// Find the appropriate position struct for the computation of
|
|
// effective audio position. We are given pv->output_total, the
|
|
// sample count at which we want the equivalent input sample
|
|
// count. We want to interpolate between two queue entries, so
|
|
// we'll search the queue until we have an entry that is greater
|
|
// than output_total. Then we set head to the previous entry
|
|
// because it will make future searches go faster.
|
|
|
|
struct position *pos_find = pv->pos_buffer_head;
|
|
struct position *pos_find_prev = NULL;
|
|
while (pos_find != pv->pos_buffer_rear &&
|
|
pos_find->syn_pos <= pv->output_total) {
|
|
pos_find_prev = pos_find;
|
|
pos_find++;
|
|
if (pos_find == pv->pos_buffer + pv->queue_length) {
|
|
pos_find = pv->pos_buffer; // wrap
|
|
}
|
|
}
|
|
// if pos_find and pos_find_prev both point to something, we
|
|
// can interpolate:
|
|
if (pos_find != pv->pos_buffer_rear && pos_find_prev) {
|
|
// we can drop old positions from queue now:
|
|
pv->pos_buffer_head = pos_find_prev;
|
|
// interpolate
|
|
long output_step = pos_find->syn_pos - pos_find_prev->syn_pos;
|
|
long input_step = pos_find->ana_pos - pos_find_prev->ana_pos;
|
|
return pos_find_prev->ana_pos + input_step *
|
|
(double)(pv->output_total - pos_find_prev->syn_pos) /
|
|
(double)output_step;
|
|
// if there's nothing in the queue, then we must be starting.
|
|
// after the first frame there are TWO entries
|
|
} else if (pos_find_prev == NULL) {
|
|
// if any of these fail, maybe the queue_length is too small
|
|
// and we dropped some history too early
|
|
assert(pos_find == pv->pos_buffer_rear);
|
|
assert(pv->first_time);
|
|
assert(pv->output_total == 0);
|
|
return -(pv->ratio * pv->fftsize / 2.0);
|
|
} // I can't think of any other case.
|
|
assert(FALSE);
|
|
}
|
|
|
|
|
|
// Send samples to phase vocoder. size should match the number of
|
|
// samples computed by get_input_count.
|
|
//
|
|
void pv_put_input(Phase_vocoder x, int size, float *samples)
|
|
// 'samples' points to samples to be sent each time
|
|
{
|
|
PV *pv = (PV *)x;
|
|
assert(pv->phase == PV_GOT_COUNT);
|
|
// size must agree with the value computed by pv_get_input_count:
|
|
assert(pv->expected_input == size);
|
|
D printf("pv_put_input: size %d, %g at %ld\n",
|
|
size, *samples, (long) (pv->input_rear - pv->input_buffer));
|
|
if (size > 0) {
|
|
memcpy(pv->input_rear, samples, size * sizeof(*pv->input_rear));
|
|
pv->input_rear += size;
|
|
pv->input_total += size;
|
|
}
|
|
pv->phase = PV_GOT_INPUT;
|
|
}
|
|
|
|
|
|
void compute_one_frame(PV *pv, int ana_hopsize)
|
|
{
|
|
float *syn_frame = pv->syn_frame;
|
|
float *ana_frame = pv->ana_frame;
|
|
int fftsize = pv->fftsize;
|
|
int log2_fft = pv->log2_fft;
|
|
float *mag = pv->mag;
|
|
float *ana_phase = pv->ana_phase;
|
|
float *syn_phase = pv->syn_phase;
|
|
float *syn_win = pv->syn_win;
|
|
float *frame_next = pv->frame_next;
|
|
int syn_hopsize = pv->syn_hopsize;
|
|
float *pre_ana_phase = pv->pre_ana_phase;
|
|
float *pre_syn_phase = pv->pre_syn_phase;
|
|
float *bin_freq = pv->bin_freq;
|
|
int i;
|
|
//#define SKIP_PHASE_ADJUST
|
|
#ifdef SKIP_PHASE_ADJUST
|
|
// for debugging we just copy the windowed input to the output
|
|
// without adjusting phases.
|
|
memcpy(syn_frame, ana_frame, fftsize * sizeof(*syn_frame));
|
|
#else
|
|
/*DBG
|
|
long zeros = pv->output_total + frame_next - pv->out_next;
|
|
write_pv_frame(zeros, ana_frame, fftsize, "pvsyn");
|
|
DBG*/
|
|
OneDimensionFFTshift(ana_frame, fftsize); // FFTshift
|
|
fftInit(log2_fft);
|
|
rffts(ana_frame, log2_fft, 1);
|
|
|
|
/* get magnitude and phase */
|
|
mag[0] = ana_frame[0];
|
|
ana_phase[0] = 0;
|
|
mag[fftsize / 2] = ana_frame[1];
|
|
ana_phase[fftsize / 2] = 0;
|
|
for (i = 1; i < fftsize / 2; i++) {
|
|
float real = ana_frame[2 * i];
|
|
float imag = ana_frame[2 * i + 1];
|
|
mag[i] = (float)sqrt(real * real + imag * imag);
|
|
ana_phase[i] = (float)atan2(imag, real);
|
|
}
|
|
#ifdef PV_SINE_TEST
|
|
if (pvst_offset == -1000) pvst_offset = -24;
|
|
else pvst_offset += ana_hopsize;
|
|
// phase of the sine should be (offset / 64) * TWO_PI,
|
|
// but when sine phase is 0, atan2(0, -1) is -pi/2 so we have
|
|
// atan2_phase = sine_phase - pi/2
|
|
// adding 3 pi/2 instead of subtracting pi/2 to make fmod result positive
|
|
double est_atan2_phase = fmod((pvst_offset / 64.0) * TWOPI + (3 * M_PI_2), TWOPI);
|
|
if (est_atan2_phase > M_PI) est_atan2_phase -= TWOPI;
|
|
printf("offset %ld hop %ld ph[8] %5f est.ph %5f real %5f imag %5f\n",
|
|
pvst_offset, ana_hopsize, ana_phase[8], est_atan2_phase,
|
|
ana_frame[16], ana_frame[17]);
|
|
#endif
|
|
/* phase unwrapping & set synthesis phase */
|
|
if (pv->first_time) {
|
|
D printf("phasevocoder fftsize %d hopsize %d\n",
|
|
fftsize, syn_hopsize);
|
|
memcpy(syn_phase, ana_phase,
|
|
((fftsize / 2) + 1) * sizeof(*syn_phase));
|
|
} else if (pv->mode == PV_MODE_PHASEFIX) {
|
|
// we'll start each iteration with prev_min_x set to the lowest bin
|
|
// that will be assigned to the peak at prev_peak_x. We'll find the
|
|
// next_min_x and the following next_peak_x. Update the phases.
|
|
//
|
|
int prev_peak_x = 0; // index of previous peak
|
|
float prev_peak_mag; // magnitude of previous peak
|
|
int prev_min_x = 0; // index of previous local minimum
|
|
int next_peak_x; // index of peak between prev_min_x and next_min_x
|
|
float next_peak_mag; // magnitude of next peak
|
|
int next_min_x; // index of next minimum (after peak_x)
|
|
float next_min_mag; // magnitude at next_min_x
|
|
float last_mag = mag[0]; // used in search for peaks
|
|
float this_mag = mag[1];
|
|
float next_mag;
|
|
int i; // loop index
|
|
// decide if we're starting on a peak or a minimum:
|
|
if (last_mag <= this_mag) { // starting on a minimum
|
|
// find peak
|
|
for (i = 1; i < fftsize / 2; i++) {
|
|
next_mag = mag[i + 1]; // invariant: last_mag <= this_mag
|
|
if (this_mag > next_mag) { // found peak
|
|
prev_peak_x = i;
|
|
prev_peak_mag = this_mag;
|
|
break; // invariant: this_mag > next_mag
|
|
}
|
|
// invariant: this_mag <= next_mag
|
|
last_mag = this_mag;
|
|
this_mag = next_mag; // invariant: last_mag <= this_mag
|
|
}
|
|
if (i >= fftsize / 2) {
|
|
prev_peak_x = i;
|
|
}
|
|
} else { // set up to start loop
|
|
next_mag = this_mag; // invariant: last_mag > this_mag
|
|
this_mag = last_mag; // invariant: this_mag > next_mag
|
|
prev_peak_mag = last_mag;
|
|
}
|
|
while (prev_min_x <= fftsize / 2) {
|
|
// invariant: prev_min_x is previous local minimum or 0
|
|
// prev_peak_x is first local peak after prev_min_x (or 0)
|
|
// last_mag is mag at prev_peak_x - 1
|
|
// this_mag is mag at prev_peak_x
|
|
// next_mag is mag at prev_peak_x + 1
|
|
// find next minimum
|
|
// Note: prev_peak_x might be fftsize/2, so i might be fftsize/2 + 1
|
|
for (i = prev_peak_x + 1; i < fftsize / 2; i++) {
|
|
last_mag = this_mag; // invariant: this_mag > next_mag
|
|
this_mag = next_mag; // invariant: last_mag > this_mag
|
|
next_mag = mag[i + 1];
|
|
if (this_mag <= next_mag) { // found minimum
|
|
// here, last_mag at i-1, this_mag at i, next_mag at i+1
|
|
// and next_mix_x == i
|
|
break;
|
|
} // loop invariant: this_mag > next_mag
|
|
}
|
|
// invariant: this_mag <= next_mag || i == fftsize/2
|
|
if (i >= fftsize / 2) { // special case at end of spectrum
|
|
// no minimum found
|
|
next_min_x = fftsize / 2 + 1;
|
|
} else {
|
|
next_min_x = i; // either we found peak or we set i to fftsize/2
|
|
next_min_mag = mag[i];
|
|
} // invariant: this_mag <= next_mag || i == fftsize/2
|
|
// search for second peak;
|
|
for (i = next_min_x + 1; i < fftsize / 2; i++) {
|
|
last_mag = this_mag; // invariant: this_mag <= next_mag
|
|
this_mag = next_mag; // invariant: last_mag <= this_mag
|
|
next_mag = mag[i + 1];
|
|
if (this_mag > next_mag) { // found peak
|
|
// here, last_mag at i-1, this_mag at i, next_mag at i+1
|
|
// and next_peak_x == i
|
|
break;
|
|
} // loop invariant: this_mag <= next_mag
|
|
}
|
|
next_peak_x = i;
|
|
|
|
// special case if we're at the end:
|
|
if (i >= fftsize / 2) {
|
|
if (next_mag < this_mag) {
|
|
next_peak_x = fftsize / 2 + 1;
|
|
} else {
|
|
next_peak_x = fftsize / 2; // this may not be necessary
|
|
next_peak_mag = mag[next_peak_x];
|
|
}
|
|
} else {
|
|
next_peak_mag = mag[i];
|
|
}
|
|
// now we have prev_min, prev_peak, next_min, and next_peak. We
|
|
// want bins from prev_min to next_min to get the phase shift of
|
|
// prev_peak. First decide if next_min gets assigned to prev_peak
|
|
// or next_peak. Assign to the closest peak, but break ties by
|
|
// picking the largest peak.
|
|
if (next_min_x - prev_peak_x < next_peak_x - next_min_x) {
|
|
// closer to prev_peak, so include min with it
|
|
next_min_x++;
|
|
} else if ((next_min_x - prev_peak_x == next_peak_x - next_min_x) &&
|
|
// equidistant so see if prev_peak_mag > next_peak_mag
|
|
(prev_peak_mag > next_peak_mag)) {
|
|
next_min_x++;
|
|
}
|
|
// Now we want to adjust phases of prev_min_x, prev_min_x + 1,
|
|
// prev_min_x + 2, ..., next_min_x - 1.
|
|
//
|
|
// Assign the new phases according to the peak...
|
|
// increment between actual phase increment value
|
|
// and the phase increment value got when the
|
|
// it's the nearest bin frequency. Used
|
|
// in phase unwrapping:
|
|
int j = prev_peak_x; // just to make more concise notation
|
|
|
|
double phase_increment = ana_phase[j] - pre_ana_phase[j] -
|
|
bin_freq[j] * ana_hopsize;
|
|
// need to get phase_increment between -M_PI and +M_PI.
|
|
// Algorithm: add M_PI, get phase_increment between 0 and TWO_PI,
|
|
// then subtract M_PI:
|
|
phase_increment = fmod(phase_increment + M_PI, TWOPI);
|
|
if (phase_increment < 0)
|
|
phase_increment += TWOPI;
|
|
phase_increment -= M_PI;
|
|
// estimated frequency from phase unwrapping
|
|
/*DBG
|
|
if (j == 8)
|
|
printf("phase_increment %g hopsize %d\n", phase_increment, ana_hopsize);
|
|
DBG*/
|
|
float estimate_freq = (float) (phase_increment / ana_hopsize +
|
|
bin_freq[j]);
|
|
// get synthesis phase adjustment
|
|
phase_increment = pre_syn_phase[j] +
|
|
syn_hopsize * estimate_freq - ana_phase[j];
|
|
/*DBG
|
|
if (j == 8)
|
|
printf("phase_increment at %d is %g, freq %g ", j, phase_increment,
|
|
estimate_freq);
|
|
DBG*/
|
|
// update the range of bins:
|
|
for (i = prev_min_x; i < next_min_x; i++) {
|
|
syn_phase[i] = fmodf((float) (ana_phase[i] + phase_increment),
|
|
(float) TWOPI);
|
|
}
|
|
/*DBG
|
|
if (j == 8) printf("syn_phase[8] %g\n", syn_phase[8]);
|
|
DBG*/
|
|
// now get ready for the next iteration
|
|
prev_min_x = next_min_x;
|
|
prev_peak_x = next_peak_x;
|
|
prev_peak_mag = next_peak_mag;
|
|
|
|
}
|
|
} else if (pv->mode == PV_MODE_STANDARD) {
|
|
for (i = 0; i <= fftsize / 2; i++) {
|
|
// increment between actual phase increment value
|
|
// and the phase increment value got when the
|
|
// it's the nearest bin frequency. Used
|
|
// in phase unwrapping:
|
|
double phase_increment = ana_phase[i] - pre_ana_phase[i] -
|
|
bin_freq[i] * ana_hopsize;
|
|
// need to get phase_increment between -M_PI and +M_PI.
|
|
// Algorithm: add M_PI, get phase_increment between 0 and TWO_PI,
|
|
// then subtract M_PI:
|
|
phase_increment = fmod(phase_increment + M_PI, TWOPI);
|
|
if (phase_increment < 0)
|
|
phase_increment += TWOPI;
|
|
phase_increment -= M_PI;
|
|
|
|
// estimated frequency from phase unwrapping
|
|
float estimate_freq = (float) (phase_increment / ana_hopsize +
|
|
bin_freq[i]);
|
|
// set synthesis phase
|
|
syn_phase[i] = fmodf((float) (pre_syn_phase[i] +
|
|
syn_hopsize * estimate_freq),
|
|
(float) TWOPI);
|
|
}
|
|
} else if (pv->mode == PV_MODE_ROBOVOICE) {
|
|
; // syn_phase[] is unmodified, i.e. constant
|
|
} else {
|
|
assert(FALSE); // bad mode value
|
|
}
|
|
for (i = 0; i < fftsize / 2; i++) {
|
|
// record phases
|
|
pre_ana_phase[i] = ana_phase[i];
|
|
pre_syn_phase[i] = syn_phase[i];
|
|
|
|
// update realpart and imagpart
|
|
syn_frame[i * 2] = (float) (mag[i] * cos(syn_phase[i]));
|
|
syn_frame[i * 2 + 1] = (float) (mag[i] * sin(syn_phase[i]));
|
|
}
|
|
pre_ana_phase[i] = ana_phase[i];
|
|
pre_syn_phase[i] = syn_phase[i];
|
|
// update realpart and imagpart
|
|
syn_frame[1] = (float) (mag[i] * cos(syn_phase[i]));
|
|
// inverse FFT
|
|
riffts(syn_frame, log2_fft, 1);
|
|
|
|
// fftshift
|
|
OneDimensionFFTshift(syn_frame, fftsize);
|
|
#endif // SKIP_PHASE_ADJUST
|
|
D printf(" mid syn_frame->%g\n", syn_frame[pv->fftsize / 2]);
|
|
//D printf(" frame offset %ld\n", frame_next - pv->output_buffer);
|
|
// window the frame and then add it to the output buffer
|
|
// assume here that there is room to add in syn_frame
|
|
D printf(" add to frame_next: %ld\n",
|
|
(long) (pv->frame_next - pv->output_buffer));
|
|
/*DBG
|
|
float tmp_frame[4096];
|
|
for (int i = 0; i < fftsize; i++) {
|
|
tmp_frame[i] = syn_win[i] * syn_frame[i];
|
|
}
|
|
write_pv_frame(zeros, syn_win, fftsize, "pvsyn");
|
|
write_pv_frame(zeros, syn_frame, fftsize, "pvsyn");
|
|
write_pv_frame(zeros, tmp_frame, fftsize, "pvsyn");
|
|
DBG*/
|
|
int sum_count = fftsize - syn_hopsize;
|
|
for (i = 0; i < sum_count; i++) {
|
|
frame_next[i] += syn_win[i] * syn_frame[i];
|
|
}
|
|
for (/* continue from i */; i < fftsize; i++) {
|
|
DD assert(frame_next[i] == 0);
|
|
frame_next[i] = syn_win[i] * syn_frame[i];
|
|
}
|
|
frame_next += syn_hopsize;
|
|
pv->frame_next = frame_next;
|
|
}
|
|
|
|
|
|
void update_position_queue(PV *pv, float *ana_center)
|
|
{
|
|
int fftsize = pv->fftsize;
|
|
float *frame_next = pv->frame_next;
|
|
int syn_hopsize = pv->syn_hopsize;
|
|
float *out_next = pv->out_next;
|
|
|
|
// put the positions of the processed frame into the queue
|
|
if (pv->first_time) {
|
|
// insert a special starting correspondence:
|
|
pv->pos_buffer_rear->ana_pos = lroundf(-pv->ratio * fftsize / 2);
|
|
pv->pos_buffer_rear->syn_pos = 0;
|
|
pv->pos_buffer_rear++;
|
|
}
|
|
// center of analysis window was at ana_center
|
|
// input_total corresponds to input_rear
|
|
pv->pos_buffer_rear->ana_pos =
|
|
pv->input_total - (long) (pv->input_rear - ana_center);
|
|
// output window center was at frame_next - syn_hopsize + fftsize / 2
|
|
// output_total corresponds to out_next
|
|
pv->pos_buffer_rear->syn_pos = pv->output_total +
|
|
(long) ((frame_next - syn_hopsize + (fftsize / 2)) - out_next);
|
|
|
|
// set pos_buffer_rear to new last element in the queue
|
|
pv->pos_buffer_rear++;
|
|
if (pv->pos_buffer_rear == pv->pos_buffer + pv->queue_length) {
|
|
pv->pos_buffer_rear = pv->pos_buffer; // wrap
|
|
}
|
|
// if queue is too full, remove first element at pos_buffer_head
|
|
if (pv->pos_buffer_head == pv->pos_buffer_rear) {
|
|
pv->pos_buffer_head++;
|
|
if (pv->pos_buffer_head == pv->pos_buffer + pv->queue_length) {
|
|
pv->pos_buffer_head = pv->pos_buffer; // wrap
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
float *finish_output(PV *pv)
|
|
{
|
|
assert(pv->frame_next - pv->out_next >= pv->blocksize);
|
|
pv->phase = PV_START;
|
|
// remember the current output:
|
|
float *block = pv->out_next;
|
|
// update out_next where next output block will be computed
|
|
pv->out_next = block + pv->blocksize;
|
|
pv->output_total += pv->blocksize;
|
|
D printf(" return offset %ld = %g\n",
|
|
(long) (pv->out_next - pv->output_buffer), *(pv->out_next));
|
|
/* DEBUG: To produce a 32767-sample-long sawtooth from 0 to 1 (roughly)
|
|
* as output, uncomment the following loop. This might be the first step
|
|
* in debugging. If you do not get a smoothly increasing ramp for 32K
|
|
* samples, then you are not handling the output of cmupv properly.
|
|
*/
|
|
/* for (int i = 0; i < pv->blocksize; i++) {
|
|
* block[i] = ((pv->output_total - pv->blocksize + i) % 32767) / 32768.0;
|
|
* }
|
|
*/
|
|
return block;
|
|
}
|
|
|
|
|
|
float *pv_get_output(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
assert(pv->phase == PV_GOT_INPUT);
|
|
#ifndef NDEBUG
|
|
long blocksize = pv->blocksize;
|
|
float *out_next = pv->out_next;
|
|
#endif
|
|
int fftsize = pv->fftsize;
|
|
int frames_to_compute = pv->frames_to_compute;
|
|
int syn_hopsize = pv->syn_hopsize;
|
|
float *ana_win = pv->ana_win;
|
|
float ratio = pv->ratio;
|
|
float *input_head = pv->input_head;
|
|
float *ana_frame = pv->ana_frame;
|
|
float *ana_center;
|
|
|
|
int ana_hopsize = lroundf(syn_hopsize * ratio);
|
|
if (ana_hopsize > pv->max_ana_hopsize) {
|
|
ana_hopsize = pv->max_ana_hopsize;
|
|
}
|
|
|
|
// compute frames and add them to the output_buffer until there
|
|
// are blocksize samples ready to deliver
|
|
D printf("pv_get_output: frames_to_compute %d\n", frames_to_compute);
|
|
int frame;
|
|
for (frame = 0; frame < frames_to_compute; frame++) {
|
|
assert(pv->frame_next - out_next < blocksize);
|
|
int i;
|
|
for (i = 0; i < fftsize; i++) // get and window the buffer
|
|
ana_frame[i] = input_head[i] * ana_win[i];
|
|
ana_center = input_head + fftsize / 2;
|
|
D printf(" mid ana_frame->%g at %ld\n", *ana_center,
|
|
(long) (ana_center - pv->input_buffer));
|
|
if (frame < frames_to_compute - 1) {
|
|
input_head += ana_hopsize; // get ready for next iteration,
|
|
// but on the last iteration, we do not add hopsize because
|
|
// ratio might change. ana_hopsize is added in get_input_count()
|
|
// to set up the next analysis frame location
|
|
} else {
|
|
// on the last iteration, update pv->input_head.
|
|
// Equivalently, after the for loop we could do
|
|
// if (frames_to_compute > 0)
|
|
// pv->input_head += ana_hopsize * (frames_to_compute - 1);
|
|
pv->input_head = input_head;
|
|
}
|
|
compute_one_frame(pv, ana_hopsize);
|
|
update_position_queue(pv, ana_center);
|
|
// first_time is not reset in update_position_queue where it is tested
|
|
// because it is also used in pv_get_output2, which does not call
|
|
// update_position_queue()
|
|
pv->first_time = FALSE;
|
|
}
|
|
return finish_output(pv);
|
|
}
|
|
|
|
|
|
float *pv_get_output2(Phase_vocoder x)
|
|
{
|
|
PV *pv = (PV *)x;
|
|
assert(pv->phase == PV_START);
|
|
|
|
long blocksize = pv->blocksize;
|
|
int fftsize = pv->fftsize;
|
|
float *out_next = pv->out_next;
|
|
float *output_buffer = pv->output_buffer;
|
|
float *ana_frame = pv->ana_frame;
|
|
float *ana_win = pv->ana_win;
|
|
long output_buffer_len = pv->output_buffer_len;
|
|
|
|
D printf("pv_get_output2: blocksize %ld frame_next %ld "
|
|
"out_next %ld buffer offset %ld\n",
|
|
blocksize, (long) (pv->frame_next - output_buffer),
|
|
(long) (out_next - output_buffer),
|
|
(long) (pv->output_total - (out_next - output_buffer)));
|
|
|
|
// To produce blocksize, how many samples do we need? The next
|
|
// sample to output is at out_next, and the next frame will be
|
|
// addded at frame_next, so we've already computed
|
|
// out_next - frame_next:
|
|
while (blocksize > (pv->frame_next - out_next)) {
|
|
long out_cnt = (long) (pv->output_total + (pv->frame_next - out_next) +
|
|
fftsize / 2);
|
|
// if there's no room in the output buffer, shift the samples.
|
|
// This is done here to avoid extra work (sometimes pv_get_output2
|
|
// can be called and the samples are already in the buffer so there's
|
|
// no need to shift.
|
|
if (pv->frame_next + fftsize > output_buffer + output_buffer_len) {
|
|
long shift = (long) (out_next - output_buffer);
|
|
D printf("shift output by %ld\n", shift);
|
|
memmove(output_buffer, out_next,
|
|
(output_buffer_len - shift) * sizeof(*output_buffer));
|
|
/* for debugging, fill the end with zero. When we write (rather
|
|
than add) to the buffer, assert that we're over-writing zeros */
|
|
DD ZERO(output_buffer + output_buffer_len - shift, shift);
|
|
out_next = output_buffer;
|
|
pv->out_next = output_buffer;
|
|
pv->frame_next -= shift;
|
|
}
|
|
int ana_hopsize = (*pv->callback)(out_cnt, ana_frame,
|
|
fftsize, pv->rock);
|
|
/* DEBUG - To check input, the following commented code will
|
|
write each analysis frame as a file. The analysis frame is
|
|
prefixed with zeros so that it will be placed at the right
|
|
time, but this generates N^2 samples, so only the first 20
|
|
frames are written */
|
|
/*DBG
|
|
write_pv_frame(out_cnt - fftsize / 2, ana_frame, fftsize, "pvana");
|
|
DBG*/
|
|
int i;
|
|
for (i = 0; i < fftsize; i++) ana_frame[i] *= ana_win[i];
|
|
compute_one_frame(pv, ana_hopsize);
|
|
pv->first_time = FALSE;
|
|
D printf("pv_get_output2: blocksize %ld frame_next %ld "
|
|
"out_next %ld buffer offset %ld\n",
|
|
blocksize, (long) (pv->frame_next - output_buffer),
|
|
(long) (out_next - output_buffer),
|
|
(long) (pv->output_total - (out_next - output_buffer)));
|
|
}
|
|
D printf("pv_get_output2 returning at offset %ld abs %ld\n",
|
|
(long) (pv->out_next - pv->output_buffer), pv->output_total);
|
|
/*DBG out_next position is output_total, so if we subtract out_next - output_buffer,
|
|
we get the absolute position of the output_buffer
|
|
write_pv_frame(pv->output_total - (pv->out_next - pv->output_buffer),
|
|
pv->output_buffer, pv->output_buffer_len, "pvbuf");
|
|
DBG*/
|
|
return finish_output(pv);
|
|
}
|