From 303faaa15bf4825fa504e237e1afcc3160f23afc Mon Sep 17 00:00:00 2001 From: Leland Lucius Date: Mon, 4 May 2015 12:42:27 -0500 Subject: [PATCH] Replace convolve with previous version --- lib-src/audacity-patches.txt | 1 + lib-src/libnyquist/nyquist/nyqsrc/convolve.c | 317 ++++++++-------- lib-src/libnyquist/revert-convolve.patch | 366 +++++++++++++++++++ 3 files changed, 514 insertions(+), 170 deletions(-) create mode 100644 lib-src/libnyquist/revert-convolve.patch diff --git a/lib-src/audacity-patches.txt b/lib-src/audacity-patches.txt index 7200445a3..f4f95d39d 100644 --- a/lib-src/audacity-patches.txt +++ b/lib-src/audacity-patches.txt @@ -59,6 +59,7 @@ Version in Audacity SVN: 3.09 nyquist.patch Fix for a couple of memory leaks xlisp.patch Fix build in Windows sound.patch Fix build when using VS2013 (log2 is included in VS2013) +revert-convolve.patch Replaces broken convolve with previous version libogg ------ diff --git a/lib-src/libnyquist/nyquist/nyqsrc/convolve.c b/lib-src/libnyquist/nyquist/nyqsrc/convolve.c index fc287d2c0..e234171c9 100644 --- a/lib-src/libnyquist/nyquist/nyqsrc/convolve.c +++ b/lib-src/libnyquist/nyquist/nyqsrc/convolve.c @@ -6,34 +6,6 @@ * of the first parameter. */ -/* Original convolve.c modified to do fast convolution. Here are some - * notes: - * The first arg is arbitrary length. The second arg is the impulse - * response, which is converted into a table. Tables have limited maximum - * size, which is good because we're going to use a single FFT for the - * whole impulse response. - * - * The fast convolution works like this: - * inputs are x_snd and h_snd. - * Make h_snd into a table ht of size N, where N is a power of 2. - * Copy ht with zero fill into H of size 2N. - * Compute FFT of H in place. - * Iterate: - * Copy N samples of x_snd into X and zero fill to size 2N. - * Compute FFT of X in place. - * Multiply X by H (result goes into X). - * Compute IFFT of X in place - * Add X to R. - * Now N samples of R can be output. - * Copy 2nd half of R to first half and zero the 2nd half. - * (this is actually done first, and the first time does - * nothing because R is initially filled with zeros) - * - * Length of output is length of x input + length of h - */ - -#define _USE_MATH_DEFINES 1 /* for Visual C++ to get M_LN2 */ -#include #include "stdio.h" #ifndef mips #include "stdlib.h" @@ -43,8 +15,6 @@ #include "falloc.h" #include "cext.h" -#include "fftlib.h" -#include "fftext.h" #include "convolve.h" void convolve_free(); @@ -58,13 +28,13 @@ typedef struct convolve_susp_struct { long x_snd_cnt; sample_block_values_type x_snd_ptr; - sample_type *H; // the FFT of h_snd - int h_len; // true length of H - int N; // length of block, FFTs are of size 2*N - int M; // log2 of 2*N, the FFT size - sample_type *X; - sample_type *R; // result buffer where output is summed - sample_type *R_current; + table_type table; + sample_type *h_buf; + double length_of_h; + long h_len; + long x_buf_len; + sample_type *x_buffer_pointer; + sample_type *x_buffer_current; } convolve_susp_node, *convolve_susp_type; @@ -82,9 +52,8 @@ void h_reverse(sample_type *h, long len) } -void convolve_s_fetch(snd_susp_type a_susp, snd_list_type snd_list) +void convolve_s_fetch(register convolve_susp_type susp, snd_list_type snd_list) { - convolve_susp_type susp = (convolve_susp_type) a_susp; int cnt = 0; /* how many samples computed */ int togo; int n; @@ -93,118 +62,152 @@ void convolve_s_fetch(snd_susp_type a_susp, snd_list_type snd_list) register sample_block_values_type out_ptr_reg; - sample_type *R = susp->R; - sample_type *R_current; - int N = susp->N; + register sample_type * h_buf_reg; + register long h_len_reg; + register long x_buf_len_reg; + register sample_type * x_buffer_pointer_reg; + register sample_type * x_buffer_current_reg; + register sample_type x_snd_scale_reg = susp->x_snd->scale; + register sample_block_values_type x_snd_ptr_reg; falloc_sample_block(out, "convolve_s_fetch"); out_ptr = out->samples; snd_list->block = out; while (cnt < max_sample_block_len) { /* outer loop */ - /* first compute how many samples to generate in inner loop: */ - /* don't overflow the output sample block: */ - togo = max_sample_block_len - cnt; - /* if we need output samples, generate them here */ - if (susp->R_current >= R + N) { - /* Copy N samples of x_snd into X and zero fill to size 2N */ - int i = 0; - sample_type *X = susp->X; - sample_type *H = susp->H; - int to_copy; - while (i < N) { - if (susp->x_snd_cnt == 0) { - susp_get_samples(x_snd, x_snd_ptr, x_snd_cnt); - if (susp->x_snd->logical_stop_cnt == - susp->x_snd->current - susp->x_snd_cnt) { - min_cnt(&susp->susp.log_stop_cnt, susp->x_snd, - (snd_susp_type) susp, susp->x_snd_cnt); - } - } - if (susp->x_snd_ptr == zero_block->samples) { - min_cnt(&susp->terminate_cnt, susp->x_snd, - (snd_susp_type) susp, susp->x_snd_cnt); - /* extend the output to include impulse response */ - susp->terminate_cnt += susp->h_len; - } - /* copy no more than the remaining space and no more than - * the amount remaining in the block + /* first compute how many samples to generate in inner loop: */ + /* don't overflow the output sample block: */ + togo = max_sample_block_len - cnt; + + /* don't run past the x_snd input sample block: */ + /* based on susp_check_term_log_samples, but offset by h_len */ + + /* THIS IS EXPANDED BELOW + * susp_check_term_log_samples(x_snd, x_snd_ptr, x_snd_cnt); + */ + if (susp->x_snd_cnt == 0) { + susp_get_samples(x_snd, x_snd_ptr, x_snd_cnt); + + /* THIS IS EXPANDED BELOW + *logical_stop_test(x_snd, susp->x_snd_cnt); + */ + if (susp->x_snd->logical_stop_cnt == + susp->x_snd->current - susp->x_snd_cnt) { + min_cnt(&susp->susp.log_stop_cnt, susp->x_snd, + (snd_susp_type) susp, susp->x_snd_cnt); + } + + /* THIS IS EXPANDED BELOW + * terminate_test(x_snd_ptr, x_snd, susp->x_snd_cnt); + */ + if (susp->x_snd_ptr == zero_block->samples) { + /* ### modify this to terminate at an offset of (susp->h_len) */ + /* Note: in the min_cnt function, susp->x_snd_cnt is *subtracted* + * from susp->x_snd->current to form the terminate time, so to + * increase the time, we need to *subtract* susp->h_len, which + * due to the double negative, *adds* susp->h_len to the ultimate + * terminate time calculation. */ - to_copy = min(N - i, susp->x_snd_cnt); - memcpy(X + i, susp->x_snd_ptr, - to_copy * sizeof(*susp->x_snd_ptr)); - susp->x_snd_ptr += to_copy; - susp->x_snd_cnt -= to_copy; - i += to_copy; + min_cnt(&susp->terminate_cnt, susp->x_snd, + (snd_susp_type) susp, susp->x_snd_cnt - susp->h_len); + } + } + + + togo = min(togo, susp->x_snd_cnt); + + /* don't run past terminate time */ + if (susp->terminate_cnt != UNKNOWN && + susp->terminate_cnt <= susp->susp.current + cnt + togo) { + togo = susp->terminate_cnt - (susp->susp.current + cnt); + if (togo == 0) break; + } + + + /* don't run past logical stop time */ + if (!susp->logically_stopped && susp->susp.log_stop_cnt != UNKNOWN) { + int to_stop = susp->susp.log_stop_cnt - (susp->susp.current + cnt); + /* break if to_stop == 0 (we're at the logical stop) + * AND cnt > 0 (we're not at the beginning of the + * output block). + */ + if (to_stop < togo) { + if (to_stop == 0) { + if (cnt) { + togo = 0; + break; + } else /* keep togo as is: since cnt == 0, we + * can set the logical stop flag on this + * output block + */ + susp->logically_stopped = true; + } else /* limit togo so we can start a new + * block at the LST + */ + togo = to_stop; + } + } + + n = togo; + h_buf_reg = susp->h_buf; + h_len_reg = susp->h_len; + x_buf_len_reg = susp->x_buf_len; + x_buffer_pointer_reg = susp->x_buffer_pointer; + x_buffer_current_reg = susp->x_buffer_current; + x_snd_ptr_reg = susp->x_snd_ptr; + out_ptr_reg = out_ptr; + if (n) do { /* the inner sample computation loop */ + long i; double sum; + /* see if we've reached end of x_buffer */ + if ((x_buffer_pointer_reg + x_buf_len_reg) <= (x_buffer_current_reg + h_len_reg)) { + /* shift x_buffer from current back to base */ + for (i = 1; i < h_len_reg; i++) { + x_buffer_pointer_reg[i-1] = x_buffer_current_reg[i]; + } + /* this will be incremented back to x_buffer_pointer_reg below */ + x_buffer_current_reg = x_buffer_pointer_reg - 1; } - /* zero fill to size 2N */ - memset(X + N, 0, N * sizeof(X[0])); - /* Compute FFT of X in place */ - fftInit(susp->M); - rffts(X, susp->M, 1); - /* Multiply X by H (result goes into X) */ - rspectprod(X, H, X, N * 2); - /* Compute IFFT of X in place */ - riffts(X, susp->M, 1); - /* Shift R, zero fill, add X, all in one loop */ - for (i = 0; i < N; i++) { - R[i] = R[i + N] + X[i]; - R[i + N] = X[i + N]; + + x_buffer_current_reg++; + + x_buffer_current_reg[h_len_reg - 1] = (x_snd_scale_reg * *x_snd_ptr_reg++); + + sum = 0.0; + for (i = 0; i < h_len_reg; i++) { + sum += x_buffer_current_reg[i] * h_buf_reg[i]; } - /* now N samples of R can be output */ - susp->R_current = R; - } - /* compute togo, the number of samples to "compute" */ - /* can't use more than what's left in R. R_current is - the next sample of R, so what's left is N - (R - R_current) */ - R_current = susp->R_current; - togo = min(togo, N - (R_current - R)); - /* don't run past terminate time */ - if (susp->terminate_cnt != UNKNOWN && - susp->terminate_cnt <= susp->susp.current + cnt + togo) { - togo = susp->terminate_cnt - (susp->susp.current + cnt); - if (togo == 0) break; - } + *out_ptr_reg++ = (sample_type) sum; + } while (--n); /* inner loop */ - /* don't run past logical stop time */ - if (!susp->logically_stopped && - susp->susp.log_stop_cnt != UNKNOWN && - susp->susp.log_stop_cnt <= susp->susp.current + cnt + togo) { - togo = susp->susp.log_stop_cnt - (susp->susp.current + cnt); - if (togo == 0) break; - } - - n = togo; - out_ptr_reg = out_ptr; - if (n) do { /* the inner sample computation loop */ - *out_ptr_reg++ = (sample_type) *R_current++; - } while (--n); /* inner loop */ - - /* using R_current is a bad idea on RS/6000: */ - susp->R_current += togo; - out_ptr += togo; - cnt += togo; + susp->x_buffer_pointer = x_buffer_pointer_reg; + susp->x_buffer_current = x_buffer_current_reg; + /* using x_snd_ptr_reg is a bad idea on RS/6000: */ + susp->x_snd_ptr += togo; + out_ptr += togo; + susp_took(x_snd_cnt, togo); + cnt += togo; } /* outer loop */ /* test for termination */ if (togo == 0 && cnt == 0) { - snd_list_terminate(snd_list); + snd_list_terminate(snd_list); } else { - snd_list->block_len = cnt; - susp->susp.current += cnt; + snd_list->block_len = cnt; + susp->susp.current += cnt; } /* test for logical stop */ if (susp->logically_stopped) { - snd_list->logically_stopped = true; + snd_list->logically_stopped = true; } else if (susp->susp.log_stop_cnt == susp->susp.current) { - susp->logically_stopped = true; + susp->logically_stopped = true; } } /* convolve_s_fetch */ -void convolve_toss_fetch(snd_susp_type a_susp, snd_list_type snd_list) +void convolve_toss_fetch(susp, snd_list) + register convolve_susp_type susp; + snd_list_type snd_list; { - convolve_susp_type susp = (convolve_susp_type) susp; time_type final_time = susp->susp.t0; long n; @@ -215,40 +218,36 @@ void convolve_toss_fetch(snd_susp_type a_susp, snd_list_type snd_list) /* convert to normal processing when we hit final_count */ /* we want each signal positioned at final_time */ n = round((final_time - susp->x_snd->t0) * susp->x_snd->sr - - (susp->x_snd->current - susp->x_snd_cnt)); + (susp->x_snd->current - susp->x_snd_cnt)); susp->x_snd_ptr += n; susp_took(x_snd_cnt, n); susp->susp.fetch = susp->susp.keep_fetch; - (*(susp->susp.fetch))(a_susp, snd_list); + (*(susp->susp.fetch))(susp, snd_list); } -void convolve_mark(snd_susp_type a_susp) +void convolve_mark(convolve_susp_type susp) { - convolve_susp_type susp = (convolve_susp_type) a_susp; sound_xlmark(susp->x_snd); } -void convolve_free(snd_susp_type a_susp) +void convolve_free(convolve_susp_type susp) { - convolve_susp_type susp = (convolve_susp_type) a_susp; - free(susp->R); - free(susp->X); - free(susp->H); - sound_unref(susp->x_snd); + table_unref(susp->table); + free(susp->x_buffer_pointer); sound_unref(susp->x_snd); ffree_generic(susp, sizeof(convolve_susp_node), "convolve_free"); } -void convolve_print_tree(snd_susp_type a_susp, int n) +void convolve_print_tree(convolve_susp_type susp, int n) { - convolve_susp_type susp = (convolve_susp_type) a_susp; indent(n); stdputstr("x_snd:"); sound_print_tree_1(susp->x_snd, n); } + sound_type snd_make_convolve(sound_type x_snd, sound_type h_snd) { register convolve_susp_type susp; @@ -256,38 +255,16 @@ sound_type snd_make_convolve(sound_type x_snd, sound_type h_snd) time_type t0 = x_snd->t0; sample_type scale_factor = 1.0F; time_type t0_min = t0; - table_type table; - double log_len; falloc_generic(susp, convolve_susp_node, "snd_make_convolve"); - table = sound_to_table(h_snd); - susp->h_len = table->length; - log_len = log(table->length) / M_LN2; /* compute log-base-2(length) */ - susp->M = (int) log_len; - if (susp->M != log_len) susp->M++; /* round up */ - susp->N = 1 << susp->M; /* size of data blocks */ - susp->M++; /* M = log2(2 * N) */ - susp->H = (sample_type *) calloc(2 * susp->N, sizeof(susp->H[0])); - if (!susp->H) { - xlabort("memory allocation failure in convolve"); - } - memcpy(susp->H, table->samples, sizeof(susp->H[0]) * susp->N); - table_unref(table); /* don't need table now */ - /* remaining N samples are already zero-filled */ - if (fftInit(susp->M)) { - free(susp->H); - xlabort("fft initialization error in convolve"); - } - rffts(susp->H, susp->M, 1); - susp->X = (sample_type *) calloc(2 * susp->N, sizeof(susp->X[0])); - susp->R = (sample_type *) calloc(2 * susp->N, sizeof(susp->R[0])); - if (!susp->X || !susp->R) { - free(susp->H); - if (susp->X) free(susp->X); - if (susp->R) free(susp->R); - xlabort("memory allocation failed in convolve"); - } - susp->R_current = susp->R + susp->N; - susp->susp.fetch = &convolve_s_fetch; + susp->table = sound_to_table(h_snd); + susp->h_buf = susp->table->samples; + susp->length_of_h = susp->table->length; + susp->h_len = (long) susp->length_of_h; + h_reverse(susp->h_buf, susp->h_len); + susp->x_buf_len = 2 * susp->h_len; + susp->x_buffer_pointer = calloc((2 * (susp->h_len)), sizeof(float)); + susp->x_buffer_current = susp->x_buffer_pointer; + susp->susp.fetch = convolve_s_fetch; susp->terminate_cnt = UNKNOWN; /* handle unequal start times, if any */ if (t0 < x_snd->t0) sound_prepend_zeros(x_snd, t0); diff --git a/lib-src/libnyquist/revert-convolve.patch b/lib-src/libnyquist/revert-convolve.patch new file mode 100644 index 000000000..a7ccedf54 --- /dev/null +++ b/lib-src/libnyquist/revert-convolve.patch @@ -0,0 +1,366 @@ +--- orig/nyquist/nyqsrc/convolve.c 2015-05-04 12:41:01.497976900 -0500 ++++ nyquist/nyqsrc/convolve.c 2015-05-04 12:40:32.047737200 -0500 +@@ -6,34 +6,6 @@ + * of the first parameter. + */ + +-/* Original convolve.c modified to do fast convolution. Here are some +- * notes: +- * The first arg is arbitrary length. The second arg is the impulse +- * response, which is converted into a table. Tables have limited maximum +- * size, which is good because we're going to use a single FFT for the +- * whole impulse response. +- * +- * The fast convolution works like this: +- * inputs are x_snd and h_snd. +- * Make h_snd into a table ht of size N, where N is a power of 2. +- * Copy ht with zero fill into H of size 2N. +- * Compute FFT of H in place. +- * Iterate: +- * Copy N samples of x_snd into X and zero fill to size 2N. +- * Compute FFT of X in place. +- * Multiply X by H (result goes into X). +- * Compute IFFT of X in place +- * Add X to R. +- * Now N samples of R can be output. +- * Copy 2nd half of R to first half and zero the 2nd half. +- * (this is actually done first, and the first time does +- * nothing because R is initially filled with zeros) +- * +- * Length of output is length of x input + length of h +- */ +- +-#define _USE_MATH_DEFINES 1 /* for Visual C++ to get M_LN2 */ +-#include + #include "stdio.h" + #ifndef mips + #include "stdlib.h" +@@ -43,8 +15,6 @@ + + #include "falloc.h" + #include "cext.h" +-#include "fftlib.h" +-#include "fftext.h" + #include "convolve.h" + + void convolve_free(); +@@ -58,13 +28,13 @@ + long x_snd_cnt; + sample_block_values_type x_snd_ptr; + +- sample_type *H; // the FFT of h_snd +- int h_len; // true length of H +- int N; // length of block, FFTs are of size 2*N +- int M; // log2 of 2*N, the FFT size +- sample_type *X; +- sample_type *R; // result buffer where output is summed +- sample_type *R_current; ++ table_type table; ++ sample_type *h_buf; ++ double length_of_h; ++ long h_len; ++ long x_buf_len; ++ sample_type *x_buffer_pointer; ++ sample_type *x_buffer_current; + } convolve_susp_node, *convolve_susp_type; + + +@@ -82,9 +52,8 @@ + } + + +-void convolve_s_fetch(snd_susp_type a_susp, snd_list_type snd_list) ++void convolve_s_fetch(register convolve_susp_type susp, snd_list_type snd_list) + { +- convolve_susp_type susp = (convolve_susp_type) a_susp; + int cnt = 0; /* how many samples computed */ + int togo; + int n; +@@ -93,9 +62,13 @@ + + register sample_block_values_type out_ptr_reg; + +- sample_type *R = susp->R; +- sample_type *R_current; +- int N = susp->N; ++ register sample_type * h_buf_reg; ++ register long h_len_reg; ++ register long x_buf_len_reg; ++ register sample_type * x_buffer_pointer_reg; ++ register sample_type * x_buffer_current_reg; ++ register sample_type x_snd_scale_reg = susp->x_snd->scale; ++ register sample_block_values_type x_snd_ptr_reg; + falloc_sample_block(out, "convolve_s_fetch"); + out_ptr = out->samples; + snd_list->block = out; +@@ -104,60 +77,43 @@ + /* first compute how many samples to generate in inner loop: */ + /* don't overflow the output sample block: */ + togo = max_sample_block_len - cnt; +- /* if we need output samples, generate them here */ +- if (susp->R_current >= R + N) { +- /* Copy N samples of x_snd into X and zero fill to size 2N */ +- int i = 0; +- sample_type *X = susp->X; +- sample_type *H = susp->H; +- int to_copy; +- while (i < N) { ++ ++ /* don't run past the x_snd input sample block: */ ++ /* based on susp_check_term_log_samples, but offset by h_len */ ++ ++ /* THIS IS EXPANDED BELOW ++ * susp_check_term_log_samples(x_snd, x_snd_ptr, x_snd_cnt); ++ */ + if (susp->x_snd_cnt == 0) { + susp_get_samples(x_snd, x_snd_ptr, x_snd_cnt); ++ ++ /* THIS IS EXPANDED BELOW ++ *logical_stop_test(x_snd, susp->x_snd_cnt); ++ */ + if (susp->x_snd->logical_stop_cnt == + susp->x_snd->current - susp->x_snd_cnt) { + min_cnt(&susp->susp.log_stop_cnt, susp->x_snd, + (snd_susp_type) susp, susp->x_snd_cnt); + } +- } ++ ++ /* THIS IS EXPANDED BELOW ++ * terminate_test(x_snd_ptr, x_snd, susp->x_snd_cnt); ++ */ + if (susp->x_snd_ptr == zero_block->samples) { ++ /* ### modify this to terminate at an offset of (susp->h_len) */ ++ /* Note: in the min_cnt function, susp->x_snd_cnt is *subtracted* ++ * from susp->x_snd->current to form the terminate time, so to ++ * increase the time, we need to *subtract* susp->h_len, which ++ * due to the double negative, *adds* susp->h_len to the ultimate ++ * terminate time calculation. ++ */ + min_cnt(&susp->terminate_cnt, susp->x_snd, +- (snd_susp_type) susp, susp->x_snd_cnt); +- /* extend the output to include impulse response */ +- susp->terminate_cnt += susp->h_len; ++ (snd_susp_type) susp, susp->x_snd_cnt - susp->h_len); + } +- /* copy no more than the remaining space and no more than +- * the amount remaining in the block +- */ +- to_copy = min(N - i, susp->x_snd_cnt); +- memcpy(X + i, susp->x_snd_ptr, +- to_copy * sizeof(*susp->x_snd_ptr)); +- susp->x_snd_ptr += to_copy; +- susp->x_snd_cnt -= to_copy; +- i += to_copy; +- } +- /* zero fill to size 2N */ +- memset(X + N, 0, N * sizeof(X[0])); +- /* Compute FFT of X in place */ +- fftInit(susp->M); +- rffts(X, susp->M, 1); +- /* Multiply X by H (result goes into X) */ +- rspectprod(X, H, X, N * 2); +- /* Compute IFFT of X in place */ +- riffts(X, susp->M, 1); +- /* Shift R, zero fill, add X, all in one loop */ +- for (i = 0; i < N; i++) { +- R[i] = R[i + N] + X[i]; +- R[i + N] = X[i + N]; +- } +- /* now N samples of R can be output */ +- susp->R_current = R; +- } +- /* compute togo, the number of samples to "compute" */ +- /* can't use more than what's left in R. R_current is +- the next sample of R, so what's left is N - (R - R_current) */ +- R_current = susp->R_current; +- togo = min(togo, N - (R_current - R)); ++ } ++ ++ ++ togo = min(togo, susp->x_snd_cnt); + + /* don't run past terminate time */ + if (susp->terminate_cnt != UNKNOWN && +@@ -166,23 +122,69 @@ + if (togo == 0) break; + } + ++ + /* don't run past logical stop time */ +- if (!susp->logically_stopped && +- susp->susp.log_stop_cnt != UNKNOWN && +- susp->susp.log_stop_cnt <= susp->susp.current + cnt + togo) { +- togo = susp->susp.log_stop_cnt - (susp->susp.current + cnt); +- if (togo == 0) break; ++ if (!susp->logically_stopped && susp->susp.log_stop_cnt != UNKNOWN) { ++ int to_stop = susp->susp.log_stop_cnt - (susp->susp.current + cnt); ++ /* break if to_stop == 0 (we're at the logical stop) ++ * AND cnt > 0 (we're not at the beginning of the ++ * output block). ++ */ ++ if (to_stop < togo) { ++ if (to_stop == 0) { ++ if (cnt) { ++ togo = 0; ++ break; ++ } else /* keep togo as is: since cnt == 0, we ++ * can set the logical stop flag on this ++ * output block ++ */ ++ susp->logically_stopped = true; ++ } else /* limit togo so we can start a new ++ * block at the LST ++ */ ++ togo = to_stop; ++ } + } + + n = togo; ++ h_buf_reg = susp->h_buf; ++ h_len_reg = susp->h_len; ++ x_buf_len_reg = susp->x_buf_len; ++ x_buffer_pointer_reg = susp->x_buffer_pointer; ++ x_buffer_current_reg = susp->x_buffer_current; ++ x_snd_ptr_reg = susp->x_snd_ptr; + out_ptr_reg = out_ptr; + if (n) do { /* the inner sample computation loop */ +- *out_ptr_reg++ = (sample_type) *R_current++; ++ long i; double sum; ++ /* see if we've reached end of x_buffer */ ++ if ((x_buffer_pointer_reg + x_buf_len_reg) <= (x_buffer_current_reg + h_len_reg)) { ++ /* shift x_buffer from current back to base */ ++ for (i = 1; i < h_len_reg; i++) { ++ x_buffer_pointer_reg[i-1] = x_buffer_current_reg[i]; ++ } ++ /* this will be incremented back to x_buffer_pointer_reg below */ ++ x_buffer_current_reg = x_buffer_pointer_reg - 1; ++ } ++ ++ x_buffer_current_reg++; ++ ++ x_buffer_current_reg[h_len_reg - 1] = (x_snd_scale_reg * *x_snd_ptr_reg++); ++ ++ sum = 0.0; ++ for (i = 0; i < h_len_reg; i++) { ++ sum += x_buffer_current_reg[i] * h_buf_reg[i]; ++ } ++ ++ *out_ptr_reg++ = (sample_type) sum; + } while (--n); /* inner loop */ + +- /* using R_current is a bad idea on RS/6000: */ +- susp->R_current += togo; ++ susp->x_buffer_pointer = x_buffer_pointer_reg; ++ susp->x_buffer_current = x_buffer_current_reg; ++ /* using x_snd_ptr_reg is a bad idea on RS/6000: */ ++ susp->x_snd_ptr += togo; + out_ptr += togo; ++ susp_took(x_snd_cnt, togo); + cnt += togo; + } /* outer loop */ + +@@ -202,9 +204,10 @@ + } /* convolve_s_fetch */ + + +-void convolve_toss_fetch(snd_susp_type a_susp, snd_list_type snd_list) ++void convolve_toss_fetch(susp, snd_list) ++ register convolve_susp_type susp; ++ snd_list_type snd_list; + { +- convolve_susp_type susp = (convolve_susp_type) susp; + time_type final_time = susp->susp.t0; + long n; + +@@ -219,36 +222,32 @@ + susp->x_snd_ptr += n; + susp_took(x_snd_cnt, n); + susp->susp.fetch = susp->susp.keep_fetch; +- (*(susp->susp.fetch))(a_susp, snd_list); ++ (*(susp->susp.fetch))(susp, snd_list); + } + + +-void convolve_mark(snd_susp_type a_susp) ++void convolve_mark(convolve_susp_type susp) + { +- convolve_susp_type susp = (convolve_susp_type) a_susp; + sound_xlmark(susp->x_snd); + } + + +-void convolve_free(snd_susp_type a_susp) ++void convolve_free(convolve_susp_type susp) + { +- convolve_susp_type susp = (convolve_susp_type) a_susp; +- free(susp->R); +- free(susp->X); +- free(susp->H); +- sound_unref(susp->x_snd); ++ table_unref(susp->table); ++ free(susp->x_buffer_pointer); sound_unref(susp->x_snd); + ffree_generic(susp, sizeof(convolve_susp_node), "convolve_free"); + } + + +-void convolve_print_tree(snd_susp_type a_susp, int n) ++void convolve_print_tree(convolve_susp_type susp, int n) + { +- convolve_susp_type susp = (convolve_susp_type) a_susp; + indent(n); + stdputstr("x_snd:"); + sound_print_tree_1(susp->x_snd, n); + } + ++ + sound_type snd_make_convolve(sound_type x_snd, sound_type h_snd) + { + register convolve_susp_type susp; +@@ -256,38 +255,16 @@ + time_type t0 = x_snd->t0; + sample_type scale_factor = 1.0F; + time_type t0_min = t0; +- table_type table; +- double log_len; + falloc_generic(susp, convolve_susp_node, "snd_make_convolve"); +- table = sound_to_table(h_snd); +- susp->h_len = table->length; +- log_len = log(table->length) / M_LN2; /* compute log-base-2(length) */ +- susp->M = (int) log_len; +- if (susp->M != log_len) susp->M++; /* round up */ +- susp->N = 1 << susp->M; /* size of data blocks */ +- susp->M++; /* M = log2(2 * N) */ +- susp->H = (sample_type *) calloc(2 * susp->N, sizeof(susp->H[0])); +- if (!susp->H) { +- xlabort("memory allocation failure in convolve"); +- } +- memcpy(susp->H, table->samples, sizeof(susp->H[0]) * susp->N); +- table_unref(table); /* don't need table now */ +- /* remaining N samples are already zero-filled */ +- if (fftInit(susp->M)) { +- free(susp->H); +- xlabort("fft initialization error in convolve"); +- } +- rffts(susp->H, susp->M, 1); +- susp->X = (sample_type *) calloc(2 * susp->N, sizeof(susp->X[0])); +- susp->R = (sample_type *) calloc(2 * susp->N, sizeof(susp->R[0])); +- if (!susp->X || !susp->R) { +- free(susp->H); +- if (susp->X) free(susp->X); +- if (susp->R) free(susp->R); +- xlabort("memory allocation failed in convolve"); +- } +- susp->R_current = susp->R + susp->N; +- susp->susp.fetch = &convolve_s_fetch; ++ susp->table = sound_to_table(h_snd); ++ susp->h_buf = susp->table->samples; ++ susp->length_of_h = susp->table->length; ++ susp->h_len = (long) susp->length_of_h; ++ h_reverse(susp->h_buf, susp->h_len); ++ susp->x_buf_len = 2 * susp->h_len; ++ susp->x_buffer_pointer = calloc((2 * (susp->h_len)), sizeof(float)); ++ susp->x_buffer_current = susp->x_buffer_pointer; ++ susp->susp.fetch = convolve_s_fetch; + susp->terminate_cnt = UNKNOWN; + /* handle unequal start times, if any */ + if (t0 < x_snd->t0) sound_prepend_zeros(x_snd, t0);