/* convolve.c -- implements (non-"fast") convolution */ /* * Note: this code is mostly generated by translate.lsp (see convole.tran * in the tran directory), but it has been modified by hand to extend the * stop time to include the "tail" of the convolution beyond the length * of the first parameter. */ /* Original convolve.c modified to do fast convolution. Here are some * notes: * The first arg is arbitrary length. The second arg is the impulse * response, which is converted into a table. The FFT size will be * limited to 64K, which allows convolution with up to 32K samples. * For longer impulse responses, we'll have to do convolutions one * 32K block at a time. I considered just limiting the convolution * size and handling longer impulse responses in Nyquist XLISP code, * but that would require taking FFT's of each input block multiple * times. Here, we save the FFT's and reuse them, which should gain * a factor of 2 in speed (we still have to inverse FFT each block * after multiplication, which should take 1/2 the time of doing * FFT/inverse-FFT on each block). * * The fast convolution works like this: * inputs are x_snd and h_snd. * Compute the length of h_snd in samples. * Set fft_size = MAX_FFT_SIZE * If length <= MAX_FFT_SIZE / 4 then * set fft_size = (round length to power of 2) * 2 * set N = fft_size/2 * Set h_len = (length rounded up to multiple of fft_size/2) * 2 * Let L = h_len/ fft_size * Allocate H of h_len floats * Iterate over i from 0 to L-1: * Copy ht with zero fill into H[i] of size fft_size, * where each H[i] of size fft_size is filled with * fft_size/2 samples (except for the last H[i]) * Compute FFT of H[i] in place (FFT size is fft_size) * Allocate X of h_len floats. This represents the history * of x_snd, which is initially all zero, so the FFT, X is all zero * Allocate output buffers Y and R, each of size fft_size * Iterate over j (i.e. run this to generate MAX_CONVOLVE_LEN * samples; then j = (j + 1) mod L. * Copy 2nd half of R to first half and zero the 2nd half. * Note: the first time does nothing because R is initially * filled with zeros * Copy fft_size/2 samples of x_snd into X[j], * where X[j] is of size fft_size and filled with * N samples (except when x_snd terminates) * Zero fill X[j] * Compute FFT of X[j] in place. * Iterate k = 0 to L-1 * Multiply X[(j-k) mod L] by H[k] (result goes into Y). * Compute IFFT of Y in place. Y is now time domain convolution * of two blocks of samples. * Add Y to R. * Now N samples of R can be output. * For simplicity, we'll keep processing x_snd input even after x_snd * terminates. This will avoid special cases where we do not need all * of X[j] at the end of the convolution. * * Length of output is length of x input + length of h */ // You can turn on debugging output with: #define D if (1) #define D if (0) #define MAX_IR_LEN 4000000 /* maximum impulse response length */ #define MAX_LOG_FFT_SIZE 16 /* maximum fft size for convolution */ //#define MAX_LOG_FFT_SIZE 4 /* maximum fft size for convolution */ #define _USE_MATH_DEFINES 1 /* for Visual C++ to get M_LN2 */ #include #include #include "stdio.h" #ifndef mips #include "stdlib.h" #endif #include "xlisp.h" #include "sound.h" #include "samples.h" #include "falloc.h" #include "cext.h" #include "fftlib.h" #include "fftext.h" #include "convolve.h" void convolve_free(snd_susp_type a_susp); typedef struct convolve_susp_struct { snd_susp_node susp; int64_t terminate_cnt; boolean know_end_of_x; boolean logically_stopped; sound_type x_snd; int x_snd_cnt; sample_block_values_type x_snd_ptr; sample_type *X; // the FFTs of x_snd int j; // which block are we processing? 0 <= j < L sample_type *H; // the FFTs of h_snd sample_type *Y; // product of X*H where we inverse FFT int h_snd_len; // true length of h_snd in samples int N; // length of convolution, FFTs are of size 2*N int M; // log2 of 2*N, the FFT size int L; // number of blocks: h_len / (2*N) sample_type *R; // result buffer where output is summed sample_type *R_current; // pointer to next sample to output } convolve_susp_node, *convolve_susp_type; /* void h_reverse(sample_type *h, long len) { sample_type temp; int i; for (i = 0; i < len; i++) { temp = h[i]; h[i] = h[len - 1]; h[len - 1] = temp; len--; } } */ void convolve_s_fetch(snd_susp_type a_susp, snd_list_type snd_list) { convolve_susp_type susp = (convolve_susp_type) a_susp; int cnt = 0; /* how many samples computed */ int togo; int n; sample_block_type out; register sample_block_values_type out_ptr; register sample_block_values_type out_ptr_reg; sample_type *R = susp->R; sample_type *R_current; int N = susp->N; falloc_sample_block(out, "convolve_s_fetch"); out_ptr = out->samples; snd_list->block = out; while (cnt < max_sample_block_len) { /* outer loop */ /* first compute how many samples to generate in inner loop: */ /* don't overflow the output sample block: */ togo = max_sample_block_len - cnt; /* if we need output samples, generate them here */ D printf("test R_current at offset %td\n", susp->R_current - R); if (susp->R_current >= R + N) { // true when we output half of R int i = 0; int k; sample_type *Xj = susp->X + susp->j * N * 2; sample_type *H = susp->H; sample_type *Y = susp->Y; int to_copy; /* Shift R, zero fill: */ memcpy(R, R + N, N * sizeof(*R)); memset(R + N, 0, N * sizeof(*R)); /* Copy N samples of x_snd into Xj and zero fill to size 2N */ D printf("Copying N samples of x_snd into Xj at offset %td\n", Xj - susp->X); while (i < N) { if (susp->x_snd_cnt == 0) { susp_get_samples(x_snd, x_snd_ptr, x_snd_cnt); if (susp->x_snd->logical_stop_cnt == susp->x_snd->current - susp->x_snd_cnt) { min_cnt(&susp->susp.log_stop_cnt, susp->x_snd, (snd_susp_type) susp, susp->x_snd_cnt); } } /* This code is not standard. Since we extend the terminate * count by susp->h_snd_len, the "standard" call to min_cnt() * results in extending the terminate time forever. Instead, * we make this code run once only by setting know_end_of_x. */ if (!susp->know_end_of_x && susp->x_snd_ptr == zero_block->samples) { susp->terminate_cnt = susp->x_snd->current - susp->x_snd_cnt; /* extend the output to include impulse response */ susp->terminate_cnt += susp->h_snd_len; susp->know_end_of_x = TRUE; } /* copy no more than the remaining space and no more than * the amount remaining in the block */ to_copy = min(N - i, susp->x_snd_cnt); memcpy(Xj + i, susp->x_snd_ptr, to_copy * sizeof(*susp->x_snd_ptr)); susp->x_snd_ptr += to_copy; susp->x_snd_cnt -= to_copy; i += to_copy; } /* zero fill to size 2N */ memset(Xj + N, 0, N * sizeof(Xj[0])); D { printf("Xj at offset %td: ", Xj - susp->X); printf(" %d samples ", susp->N * 2); float big = 0.0; for (i = 0; i < susp->N * 2; i++) { // printf("%g ", Xj[i]); big = max(big, fabs(Xj[i])); } printf("MAX: %g\n", big); } /* Compute FFT of Xj in place */ fftInit(susp->M); rffts(Xj, susp->M, 1); /* convolve pairs of blocks and sum into Y */ memset(Y, 0, N * sizeof(*Y)); /* initialize sum to zero */ for (k = 0; k < susp->L; k++) { /* Multiply Xj by H (result goes into X) */ sample_type *X = susp->X + ((susp->L + susp->j - k) % susp->L) * N * 2; rspectprod(X, H + k * N * 2, Y, N * 2); /* Compute IFFT of Y in place */ riffts(Y, susp->M, 1); /* R += Y */ D { printf("Output block %d, X offset %td: ", k, X - susp->X); printf(" %d samples ", 2 * N); float big = 0.0; for (i = 0; i < 2 * N; i++) { big = max(big, fabs(Y[i])); } printf("MAX: %g\n", big); } for (i = 0; i < 2 * N; i++) { R[i] += Y[i]; } } /* now N samples of R can be output */ susp->R_current = R; D printf("R: %d samples ", susp->N); D { float big = 0.0; for (i = 0; i < susp->N; i++) { // printf("%g ", R[i]); big = max(big, fabs(R[i])); } printf("MAX: %g\n", big); } susp->j = (susp->j + 1) % susp->L; } /* compute togo, the number of samples to "compute" */ /* can't use more than what's left in R. R_current is the next sample of R, so what's left is N - (R - R_current) */ R_current = susp->R_current; togo = (int) min(togo, N - (R_current - R)); /* don't run past terminate time */ if (susp->terminate_cnt != UNKNOWN && susp->terminate_cnt <= susp->susp.current + cnt + togo) { togo = (int) (susp->terminate_cnt - (susp->susp.current + cnt)); if (togo == 0) break; } /* don't run past logical stop time */ if (!susp->logically_stopped && susp->susp.log_stop_cnt != UNKNOWN && susp->susp.log_stop_cnt <= susp->susp.current + cnt + togo) { togo = (int) (susp->susp.log_stop_cnt - (susp->susp.current + cnt)); D printf("susp->susp.log_stop_cnt is set to %" PRId64 "\n", susp->susp.log_stop_cnt); if (togo == 0) break; } n = togo; out_ptr_reg = out_ptr; if (n) do { /* the inner sample computation loop */ *out_ptr_reg++ = (sample_type) *R_current++; } while (--n); /* inner loop */ /* using R_current is a bad idea on RS/6000: */ susp->R_current += togo; out_ptr += togo; cnt += togo; } /* outer loop */ /* test for termination */ if (togo == 0 && cnt == 0) { snd_list_terminate(snd_list); } else { snd_list->block_len = cnt; susp->susp.current += cnt; } /* test for logical stop */ if (susp->logically_stopped) { snd_list->logically_stopped = true; } else if (susp->susp.log_stop_cnt == susp->susp.current) { susp->logically_stopped = true; } } /* convolve_s_fetch */ void convolve_toss_fetch(snd_susp_type a_susp, snd_list_type snd_list) { convolve_susp_type susp = (convolve_susp_type) a_susp; time_type final_time = susp->susp.t0; long n; /* fetch samples from x_snd up to final_time for this block of zeros */ while ((ROUNDBIG((final_time - susp->x_snd->t0) * susp->x_snd->sr)) >= susp->x_snd->current) susp_get_samples(x_snd, x_snd_ptr, x_snd_cnt); /* convert to normal processing when we hit final_count */ /* we want each signal positioned at final_time */ n = (long) ROUNDBIG((final_time - susp->x_snd->t0) * susp->x_snd->sr - (susp->x_snd->current - susp->x_snd_cnt)); susp->x_snd_ptr += n; susp_took(x_snd_cnt, n); susp->susp.fetch = susp->susp.keep_fetch; (*(susp->susp.fetch))(a_susp, snd_list); } void convolve_mark(snd_susp_type a_susp) { convolve_susp_type susp = (convolve_susp_type) a_susp; sound_xlmark(susp->x_snd); } void convolve_free(snd_susp_type a_susp) { convolve_susp_type susp = (convolve_susp_type) a_susp; free(susp->R); free(susp->X); free(susp->Y); free(susp->H); sound_unref(susp->x_snd); ffree_generic(susp, sizeof(convolve_susp_node), "convolve_free"); } void convolve_print_tree(snd_susp_type a_susp, int n) { convolve_susp_type susp = (convolve_susp_type) a_susp; indent(n); stdputstr("x_snd:"); sound_print_tree_1(susp->x_snd, n); } void fill_with_samples(sample_type *x, sound_type s, long n) { /* this is based on snd_fetch in samples.c */ #define CNT extra[1] #define INDEX extra[2] #define FIELDS 3 #define SAMPLES list->block->samples int i; for (i = 0; i < n; i++) { if (!s->extra) { /* this is the first call, so fix up s */ s->extra = (int64_t *) malloc(sizeof(s->extra[0]) * FIELDS); s->extra[0] = sizeof(s->extra[0]) * FIELDS; s->CNT = s->INDEX = 0; } int icnt = (int) s->CNT; /* need this to be int type */ assert(icnt >= 0); if (icnt == s->INDEX) { sound_get_next(s, &icnt); assert(icnt >= 0); s->CNT = icnt; /* save the count back into s->extra */ s->INDEX = 0; } x[i] = s->SAMPLES[s->INDEX++] * s->scale; assert(x[i] < 2); } D { float big = 0.0; for (i = 0; i < n; i++) { big = max(big, fabs(x[i])); assert(big < 2); } printf("fill_with_samples n %ld scale %g max %g\n", n, s->scale, big); } } sound_type snd_make_convolve(sound_type x_snd, sound_type h_snd) { register convolve_susp_type susp; rate_type sr = x_snd->sr; time_type t0 = x_snd->t0; sample_type scale_factor = 1.0F; time_type t0_min = t0; int64_t h_len; int i; // assume fft_size is maximal. We fix this later if it is wrong long fft_size = 1 << MAX_LOG_FFT_SIZE; if (sr != h_snd->sr) { xlfail("convolve requires both inputs to have the same sample rates"); } falloc_generic(susp, convolve_susp_node, "snd_make_convolve"); /* compute the length of h_snd in samples */ h_len = snd_length(h_snd, MAX_IR_LEN + 1); if (h_len > MAX_IR_LEN) { char emsg[100]; sprintf(emsg, "convolve maximum impulse length is %d", MAX_IR_LEN); xlfail(emsg); } /* len is the impulse response length; * the FFT size is at least double that */ if (h_len <= fft_size / 4) { /* compute log-base-2(h_len): */; double log_len = log((double) h_len) / M_LN2; int log_len_int = (int) log_len; if (log_len_int != log_len) log_len_int++; /* round up to power of 2 */ susp->M = log_len_int + 1; } else { susp->M = MAX_LOG_FFT_SIZE; } fft_size = (1 << susp->M); D printf("fft_size %ld\n", fft_size); susp->N = fft_size / 2; // round h_len up to multiple of susp->N and multiply by 2 susp->h_snd_len = (int) h_len; h_len = ((h_len + susp->N - 1) / susp->N) * susp->N * 2; susp->L = (int) (h_len / fft_size); // allocate memory susp->H = (sample_type *) calloc((size_t) h_len, sizeof(susp->H[0])); if (!susp->H) { xlfail("memory allocation failure in convolve"); } for (i = 0; i < susp->L; i++) { /* copy fft_size/2 samples into each H[i] */ fill_with_samples(susp->H + i * susp->N * 2, h_snd, susp->N); } for (i = 0; i < susp->L; i++) { int j; float *H = susp->H + i * susp->N * 2; D { printf("H_%d at %td: ", i, H - susp->H); printf("%d samples ", susp->N * 2); float big = 0.0; for (j = 0; j < susp->N * 2; j++) { big = max(big, fabs(H[j])); assert(big < 2); // printf("%g ", H[j]); } printf("big %g\n", big); } } sound_unref(h_snd); h_snd = NULL; /* remaining N samples are already zero-filled */ if (fftInit(susp->M)) { free(susp->H); xlfail("fft initialization error in convolve"); } /* take the FFT of each block of the impulse response */ for (i = 0; i < susp->L; i++) { rffts(susp->H + i * susp->N * 2, susp->M, 1); } susp->X = (sample_type *) calloc((size_t) h_len, sizeof(susp->X[0])); susp->R = (sample_type *) calloc(fft_size, sizeof(susp->R[0])); susp->Y = (sample_type *) calloc(fft_size, sizeof(susp->Y[0])); if (!susp->X || !susp->R || !susp->Y) { free(susp->H); if (susp->X) free(susp->X); if (susp->R) free(susp->R); if (susp->Y) free(susp->Y); xlfail("memory allocation failed in convolve"); } susp->R_current = susp->R + susp->N; susp->susp.fetch = &convolve_s_fetch; susp->terminate_cnt = UNKNOWN; susp->know_end_of_x = FALSE; /* handle unequal start times, if any */ if (t0 < x_snd->t0) sound_prepend_zeros(x_snd, t0); /* minimum start time over all inputs: */ t0_min = min(x_snd->t0, t0); /* how many samples to toss before t0: */ susp->susp.toss_cnt = (long) ((t0 - t0_min) * sr + 0.5); if (susp->susp.toss_cnt > 0) { susp->susp.keep_fetch = susp->susp.fetch; susp->susp.fetch = convolve_toss_fetch; } /* initialize susp state */ susp->susp.free = convolve_free; susp->susp.sr = sr; susp->susp.t0 = t0; susp->susp.mark = convolve_mark; susp->susp.print_tree = convolve_print_tree; susp->susp.name = "convolve"; susp->logically_stopped = false; susp->susp.log_stop_cnt = logical_stop_cnt_cvt(x_snd); susp->susp.current = 0; susp->x_snd = x_snd; susp->x_snd_cnt = 0; susp->j = 0; return sound_create((snd_susp_type)susp, t0, sr, scale_factor); } sound_type snd_convolve(sound_type x_snd, sound_type h_snd) { sound_type x_snd_copy = sound_copy(x_snd); sound_type h_snd_copy = sound_copy(h_snd); return snd_make_convolve(x_snd_copy, h_snd_copy); }