1 /* libSoX effect: change tempo (and duration) or pitch (maintain duration)
2  * Copyright (c) 2007,8 robs@users.sourceforge.net
3  * Based on ideas from Olli Parviainen's SoundTouch Library.
4  *
5  * This library is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation; either version 2.1 of the License, or (at
8  * your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
13  * General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public License
16  * along with this library; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
18  */
19 
20 #include "sox_i.h"
21 #include "fifo.h"
22 #include <math.h>
23 
24 typedef struct {
25   /* Configuration parameters: */
26   size_t channels;
27   sox_bool quick_search; /* Whether to quick search or linear search */
28   double factor;         /* 1 for no change, < 1 for slower, > 1 for faster. */
29   size_t search;         /* Wide samples to search for best overlap position */
30   size_t segment;        /* Processing segment length in wide samples */
31   size_t overlap;        /* In wide samples */
32 
33   size_t process_size;   /* # input wide samples needed to process 1 segment */
34 
35   /* Buffers: */
36   fifo_t input_fifo;
37   float * overlap_buf;
38   fifo_t output_fifo;
39 
40   /* Counters: */
41   uint64_t samples_in;
42   uint64_t samples_out;
43   uint64_t segments_total;
44   uint64_t skip_total;
45 } tempo_t;
46 
47 /* Waveform Similarity by least squares; works across multi-channels */
difference(const float * a,const float * b,size_t length)48 static float difference(const float * a, const float * b, size_t length)
49 {
50   float diff = 0;
51   size_t i = 0;
52 
53   #define _ diff += sqr(a[i] - b[i]), ++i; /* Loop optimisation */
54   do {_ _ _ _ _ _ _ _} while (i < length); /* N.B. length ≡ 0 (mod 8) */
55   #undef _
56   return diff;
57 }
58 
59 /* Find where the two segments are most alike over the overlap period. */
tempo_best_overlap_position(tempo_t * t,float const * new_win)60 static size_t tempo_best_overlap_position(tempo_t * t, float const * new_win)
61 {
62   float * f = t->overlap_buf;
63   size_t j, best_pos, prev_best_pos = (t->search + 1) >> 1, step = 64;
64   size_t i = best_pos = t->quick_search? prev_best_pos : 0;
65   float diff, least_diff = difference(new_win + t->channels * i, f, t->channels * t->overlap);
66   int k = 0;
67 
68   if (t->quick_search) do { /* hierarchical search */
69     for (k = -1; k <= 1; k += 2) for (j = 1; j < 4 || step == 64; ++j) {
70       i = prev_best_pos + k * j * step;
71       if ((int)i < 0 || i >= t->search)
72         break;
73       diff = difference(new_win + t->channels * i, f, t->channels * t->overlap);
74       if (diff < least_diff)
75         least_diff = diff, best_pos = i;
76     }
77     prev_best_pos = best_pos;
78   } while (step >>= 2);
79   else for (i = 1; i < t->search; i++) { /* linear search */
80     diff = difference(new_win + t->channels * i, f, t->channels * t->overlap);
81     if (diff < least_diff)
82       least_diff = diff, best_pos = i;
83   }
84   return best_pos;
85 }
86 
tempo_overlap(tempo_t * t,const float * in1,const float * in2,float * output)87 static void tempo_overlap(
88     tempo_t * t, const float * in1, const float * in2, float * output)
89 {
90   size_t i, j, k = 0;
91   float fade_step = 1.0f / (float) t->overlap;
92 
93   for (i = 0; i < t->overlap; ++i) {
94     float fade_in  = fade_step * (float) i;
95     float fade_out = 1.0f - fade_in;
96     for (j = 0; j < t->channels; ++j, ++k)
97       output[k] = in1[k] * fade_out + in2[k] * fade_in;
98   }
99 }
100 
tempo_process(tempo_t * t)101 static void tempo_process(tempo_t * t)
102 {
103   while (fifo_occupancy(&t->input_fifo) >= t->process_size) {
104     size_t skip, offset;
105 
106     /* Copy or overlap the first bit to the output */
107     if (!t->segments_total) {
108       offset = t->search / 2;
109       fifo_write(&t->output_fifo, t->overlap, (float *) fifo_read_ptr(&t->input_fifo) + t->channels * offset);
110     } else {
111       offset = tempo_best_overlap_position(t, fifo_read_ptr(&t->input_fifo));
112       tempo_overlap(t, t->overlap_buf,
113           (float *) fifo_read_ptr(&t->input_fifo) + t->channels * offset,
114           fifo_write(&t->output_fifo, t->overlap, NULL));
115     }
116     /* Copy the middle bit to the output */
117     fifo_write(&t->output_fifo, t->segment - 2 * t->overlap,
118                (float *) fifo_read_ptr(&t->input_fifo) +
119                t->channels * (offset + t->overlap));
120 
121     /* Copy the end bit to overlap_buf ready to be mixed with
122      * the beginning of the next segment. */
123     memcpy(t->overlap_buf,
124            (float *) fifo_read_ptr(&t->input_fifo) +
125            t->channels * (offset + t->segment - t->overlap),
126            t->channels * t->overlap * sizeof(*(t->overlap_buf)));
127 
128     /* Advance through the input stream */
129     skip = t->factor * (++t->segments_total * (t->segment - t->overlap)) + 0.5;
130     t->skip_total += skip -= t->skip_total;
131     fifo_read(&t->input_fifo, skip, NULL);
132   }
133 }
134 
tempo_input(tempo_t * t,float const * samples,size_t n)135 static float * tempo_input(tempo_t * t, float const * samples, size_t n)
136 {
137   t->samples_in += n;
138   return fifo_write(&t->input_fifo, n, samples);
139 }
140 
tempo_output(tempo_t * t,float * samples,size_t * n)141 static float const * tempo_output(tempo_t * t, float * samples, size_t * n)
142 {
143   t->samples_out += *n = min(*n, fifo_occupancy(&t->output_fifo));
144   return fifo_read(&t->output_fifo, *n, samples);
145 }
146 
147 /* Flush samples remaining in overlap_buf & input_fifo to the output. */
tempo_flush(tempo_t * t)148 static void tempo_flush(tempo_t * t)
149 {
150   uint64_t samples_out = t->samples_in / t->factor + .5;
151   size_t remaining = samples_out > t->samples_out ?
152       (size_t)(samples_out - t->samples_out) : 0;
153   float * buff = lsx_calloc(128 * t->channels, sizeof(*buff));
154 
155   if (remaining > 0) {
156     while (fifo_occupancy(&t->output_fifo) < remaining) {
157       tempo_input(t, buff, (size_t) 128);
158       tempo_process(t);
159     }
160     fifo_trim_to(&t->output_fifo, remaining);
161     t->samples_in = 0;
162   }
163   free(buff);
164 }
165 
tempo_setup(tempo_t * t,double sample_rate,sox_bool quick_search,double factor,double segment_ms,double search_ms,double overlap_ms)166 static void tempo_setup(tempo_t * t,
167   double sample_rate, sox_bool quick_search, double factor,
168   double segment_ms, double search_ms, double overlap_ms)
169 {
170   size_t max_skip;
171   t->quick_search = quick_search;
172   t->factor = factor;
173   t->segment = sample_rate * segment_ms / 1000 + .5;
174   t->search  = sample_rate * search_ms / 1000 + .5;
175   t->overlap = max(sample_rate * overlap_ms / 1000 + 4.5, 16);
176   t->overlap &= ~7; /* Make divisible by 8 for loop optimisation */
177   if (t->overlap * 2 > t->segment)
178     t->overlap -= 8;
179   t->overlap_buf = lsx_malloc(t->overlap * t->channels * sizeof(*t->overlap_buf));
180   max_skip = ceil(factor * (t->segment - t->overlap));
181   t->process_size = max(max_skip + t->overlap, t->segment) + t->search;
182   memset(fifo_reserve(&t->input_fifo, t->search / 2), 0, (t->search / 2) * t->channels * sizeof(float));
183 }
184 
tempo_delete(tempo_t * t)185 static void tempo_delete(tempo_t * t)
186 {
187   free(t->overlap_buf);
188   fifo_delete(&t->output_fifo);
189   fifo_delete(&t->input_fifo);
190   free(t);
191 }
192 
tempo_create(size_t channels)193 static tempo_t * tempo_create(size_t channels)
194 {
195   tempo_t * t = lsx_calloc(1, sizeof(*t));
196   t->channels = channels;
197   fifo_create(&t->input_fifo, t->channels * sizeof(float));
198   fifo_create(&t->output_fifo, t->channels * sizeof(float));
199   return t;
200 }
201 
202 /*------------------------------- SoX Wrapper --------------------------------*/
203 
204 typedef struct {
205   tempo_t     * tempo;
206   sox_bool    quick_search;
207   double      factor, segment_ms, search_ms, overlap_ms;
208 } priv_t;
209 
getopts(sox_effect_t * effp,int argc,char ** argv)210 static int getopts(sox_effect_t * effp, int argc, char **argv)
211 {
212   priv_t * p = (priv_t *)effp->priv;
213   enum {Default, Music, Speech, Linear} profile = Default;
214   static const double segments_ms [] = {   82,82,  35  , 20};
215   static const double segments_pow[] = {    0, 1, .33  , 1};
216   static const double overlaps_div[] = {6.833, 7,  2.5 , 2};
217   static const double searches_div[] = {5.587, 6,  2.14, 2};
218   int c;
219   lsx_getopt_t optstate;
220   lsx_getopt_init(argc, argv, "+qmls", NULL, lsx_getopt_flag_none, 1, &optstate);
221 
222   p->segment_ms = p->search_ms = p->overlap_ms = HUGE_VAL;
223   while ((c = lsx_getopt(&optstate)) != -1) switch (c) {
224     case 'q': p->quick_search  = sox_true;   break;
225     case 'm': profile = Music; break;
226     case 's': profile = Speech; break;
227     case 'l': profile = Linear; p->search_ms = 0; break;
228     default: lsx_fail("unknown option `-%c'", optstate.opt); return lsx_usage(effp);
229   }
230   argc -= optstate.ind, argv += optstate.ind;
231   do {                    /* break-able block */
232     NUMERIC_PARAMETER(factor      ,0.1 , 100 )
233     NUMERIC_PARAMETER(segment_ms  , 10 , 120)
234     NUMERIC_PARAMETER(search_ms   , 0  , 30 )
235     NUMERIC_PARAMETER(overlap_ms  , 0  , 30 )
236   } while (0);
237 
238   if (p->segment_ms == HUGE_VAL)
239     p->segment_ms = max(10, segments_ms[profile] / max(pow(p->factor, segments_pow[profile]), 1));
240   if (p->overlap_ms == HUGE_VAL)
241     p->overlap_ms = p->segment_ms / overlaps_div[profile];
242   if (p->search_ms == HUGE_VAL)
243     p->search_ms = p->segment_ms / searches_div[profile];
244 
245   p->overlap_ms = min(p->overlap_ms, p->segment_ms / 2);
246   lsx_report("quick_search=%u factor=%g segment=%g search=%g overlap=%g",
247     p->quick_search, p->factor, p->segment_ms, p->search_ms, p->overlap_ms);
248   return argc? lsx_usage(effp) : SOX_SUCCESS;
249 }
250 
start(sox_effect_t * effp)251 static int start(sox_effect_t * effp)
252 {
253   priv_t * p = (priv_t *)effp->priv;
254 
255   if (p->factor == 1)
256     return SOX_EFF_NULL;
257 
258   p->tempo = tempo_create((size_t)effp->in_signal.channels);
259   tempo_setup(p->tempo, effp->in_signal.rate, p->quick_search, p->factor,
260       p->segment_ms, p->search_ms, p->overlap_ms);
261 
262   effp->out_signal.length = SOX_UNKNOWN_LEN;
263   if (effp->in_signal.length != SOX_UNKNOWN_LEN) {
264     uint64_t in_length = effp->in_signal.length / effp->in_signal.channels;
265     uint64_t out_length = in_length / p->factor + .5;
266     effp->out_signal.length = out_length * effp->in_signal.channels;
267   }
268 
269   return SOX_SUCCESS;
270 }
271 
flow(sox_effect_t * effp,const sox_sample_t * ibuf,sox_sample_t * obuf,size_t * isamp,size_t * osamp)272 static int flow(sox_effect_t * effp, const sox_sample_t * ibuf,
273                 sox_sample_t * obuf, size_t * isamp, size_t * osamp)
274 {
275   priv_t * p = (priv_t *)effp->priv;
276   size_t i, odone = *osamp /= effp->in_signal.channels;
277   float const * s = tempo_output(p->tempo, NULL, &odone);
278   SOX_SAMPLE_LOCALS;
279 
280   for (i = 0; i < odone * effp->in_signal.channels; ++i)
281     *obuf++ = SOX_FLOAT_32BIT_TO_SAMPLE(*s++, effp->clips);
282 
283   if (*isamp && odone < *osamp) {
284     float * t = tempo_input(p->tempo, NULL, *isamp / effp->in_signal.channels);
285     for (i = *isamp; i; --i)
286       *t++ = SOX_SAMPLE_TO_FLOAT_32BIT(*ibuf++, effp->clips);
287     tempo_process(p->tempo);
288   }
289   else *isamp = 0;
290 
291   *osamp = odone * effp->in_signal.channels;
292   return SOX_SUCCESS;
293 }
294 
drain(sox_effect_t * effp,sox_sample_t * obuf,size_t * osamp)295 static int drain(sox_effect_t * effp, sox_sample_t * obuf, size_t * osamp)
296 {
297   priv_t * p = (priv_t *)effp->priv;
298   static size_t isamp = 0;
299   tempo_flush(p->tempo);
300   return flow(effp, 0, obuf, &isamp, osamp);
301 }
302 
stop(sox_effect_t * effp)303 static int stop(sox_effect_t * effp)
304 {
305   priv_t * p = (priv_t *)effp->priv;
306   tempo_delete(p->tempo);
307   return SOX_SUCCESS;
308 }
309 
lsx_tempo_effect_fn(void)310 sox_effect_handler_t const * lsx_tempo_effect_fn(void)
311 {
312   static sox_effect_handler_t handler = {
313     "tempo", "[-q] [-m | -s | -l] factor [segment-ms [search-ms [overlap-ms]]]",
314     SOX_EFF_MCHAN | SOX_EFF_LENGTH,
315     getopts, start, flow, drain, stop, NULL, sizeof(priv_t)
316   };
317   return &handler;
318 }
319 
320 /*---------------------------------- pitch -----------------------------------*/
321 
pitch_getopts(sox_effect_t * effp,int argc,char ** argv)322 static int pitch_getopts(sox_effect_t * effp, int argc, char **argv)
323 {
324   double d;
325   char dummy, arg[100], **argv2 = lsx_malloc(argc * sizeof(*argv2));
326   int result, pos = (argc > 1 && !strcmp(argv[1], "-q"))? 2 : 1;
327 
328   if (argc <= pos || sscanf(argv[pos], "%lf %c", &d, &dummy) != 1)
329     return lsx_usage(effp);
330 
331   d = pow(2., d / 1200);  /* cents --> factor */
332   sprintf(arg, "%g", 1 / d);
333   memcpy(argv2, argv, argc * sizeof(*argv2));
334   argv2[pos] = arg;
335   result = getopts(effp, argc, argv2);
336   free(argv2);
337   return result;
338 }
339 
pitch_start(sox_effect_t * effp)340 static int pitch_start(sox_effect_t * effp)
341 {
342   priv_t * p = (priv_t *) effp->priv;
343   int result = start(effp);
344 
345   effp->out_signal.rate = effp->in_signal.rate / p->factor;
346   return result;
347 }
348 
lsx_pitch_effect_fn(void)349 sox_effect_handler_t const * lsx_pitch_effect_fn(void)
350 {
351   static sox_effect_handler_t handler;
352   handler = *lsx_tempo_effect_fn();
353   handler.name = "pitch";
354   handler.usage = "[-q] shift-in-cents [segment-ms [search-ms [overlap-ms]]]",
355   handler.getopts = pitch_getopts;
356   handler.start = pitch_start;
357   handler.flags &= ~SOX_EFF_LENGTH;
358   handler.flags |= SOX_EFF_RATE;
359   return &handler;
360 }
361