1 /* Silence effect for SoX
2 * by Heikki Leinonen (heilei@iki.fi) 25.03.2001
3 * Major Modifications by Chris Bagwell 06.08.2001
4 * Minor addition by Donnie Smith 13.08.2003
5 *
6 * This effect can delete samples from the start of a sound file
7 * until it sees a specified count of samples exceed a given threshold
8 * (any of the channels).
9 * This effect can also delete samples from the end of a sound file
10 * when it sees a specified count of samples below a given threshold
11 * (all channels).
12 * It may also be used to delete samples anywhere in a sound file.
13 * Thesholds can be given as either a percentage or in decibels.
14 */
15
16 #include "sox_i.h"
17
18 #include <string.h>
19
20 /* Private data for silence effect. */
21
22 #define SILENCE_TRIM 0
23 #define SILENCE_TRIM_FLUSH 1
24 #define SILENCE_COPY 2
25 #define SILENCE_COPY_FLUSH 3
26 #define SILENCE_STOP 4
27
28 typedef struct {
29 char start;
30 int start_periods;
31 char *start_duration_str;
32 size_t start_duration;
33 double start_threshold;
34 char start_unit; /* "d" for decibels or "%" for percent. */
35 int restart;
36
37 sox_sample_t *start_holdoff;
38 size_t start_holdoff_offset;
39 size_t start_holdoff_end;
40 int start_found_periods;
41
42 char stop;
43 int stop_periods;
44 char *stop_duration_str;
45 size_t stop_duration;
46 double stop_threshold;
47 char stop_unit;
48
49 sox_sample_t *stop_holdoff;
50 size_t stop_holdoff_offset;
51 size_t stop_holdoff_end;
52 int stop_found_periods;
53
54 double *window;
55 double *window_current;
56 double *window_end;
57 size_t window_size;
58 double rms_sum;
59
60 char leave_silence;
61
62 /* State Machine */
63 char mode;
64 } priv_t;
65
clear_rms(sox_effect_t * effp)66 static void clear_rms(sox_effect_t * effp)
67
68 {
69 priv_t * silence = (priv_t *) effp->priv;
70
71 memset(silence->window, 0,
72 silence->window_size * sizeof(double));
73
74 silence->window_current = silence->window;
75 silence->window_end = silence->window + silence->window_size;
76 silence->rms_sum = 0;
77 }
78
sox_silence_getopts(sox_effect_t * effp,int argc,char ** argv)79 static int sox_silence_getopts(sox_effect_t * effp, int argc, char **argv)
80 {
81 priv_t * silence = (priv_t *) effp->priv;
82 int parse_count;
83 uint64_t temp;
84 const char *n;
85 --argc, ++argv;
86
87 /* check for option switches */
88 silence->leave_silence = sox_false;
89 if (argc > 0)
90 {
91 if (!strcmp("-l", *argv)) {
92 argc--; argv++;
93 silence->leave_silence = sox_true;
94 }
95 }
96
97 if (argc < 1)
98 return lsx_usage(effp);
99
100 /* Parse data related to trimming front side */
101 silence->start = sox_false;
102 if (sscanf(argv[0], "%d", &silence->start_periods) != 1)
103 return lsx_usage(effp);
104 if (silence->start_periods < 0)
105 {
106 lsx_fail("Periods must not be negative");
107 return(SOX_EOF);
108 }
109 argv++;
110 argc--;
111
112 if (silence->start_periods > 0)
113 {
114 silence->start = sox_true;
115 if (argc < 2)
116 return lsx_usage(effp);
117
118 /* We do not know the sample rate so we can not fully
119 * parse the duration info yet. So save argument off
120 * for future processing.
121 */
122 silence->start_duration_str = lsx_strdup(argv[0]);
123 /* Perform a fake parse to do error checking */
124 n = lsx_parsesamples(0.,silence->start_duration_str,&temp,'s');
125 if (!n || *n)
126 return lsx_usage(effp);
127 silence->start_duration = temp;
128
129 parse_count = sscanf(argv[1], "%lf%c", &silence->start_threshold,
130 &silence->start_unit);
131 if (parse_count < 1)
132 return lsx_usage(effp);
133 else if (parse_count < 2)
134 silence->start_unit = '%';
135
136 argv++; argv++;
137 argc--; argc--;
138 }
139
140 silence->stop = sox_false;
141 /* Parse data needed for trimming of backside */
142 if (argc > 0)
143 {
144 if (argc < 3)
145 return lsx_usage(effp);
146 if (sscanf(argv[0], "%d", &silence->stop_periods) != 1)
147 return lsx_usage(effp);
148 if (silence->stop_periods < 0)
149 {
150 silence->stop_periods = -silence->stop_periods;
151 silence->restart = 1;
152 }
153 else
154 silence->restart = 0;
155 silence->stop = sox_true;
156 argv++;
157 argc--;
158
159 /* We do not know the sample rate so we can not fully
160 * parse the duration info yet. So save argument off
161 * for future processing.
162 */
163 silence->stop_duration_str = lsx_strdup(argv[0]);
164 /* Perform a fake parse to do error checking */
165 n = lsx_parsesamples(0.,silence->stop_duration_str,&temp,'s');
166 if (!n || *n)
167 return lsx_usage(effp);
168 silence->stop_duration = temp;
169
170 parse_count = sscanf(argv[1], "%lf%c", &silence->stop_threshold,
171 &silence->stop_unit);
172 if (parse_count < 1)
173 return lsx_usage(effp);
174 else if (parse_count < 2)
175 silence->stop_unit = '%';
176
177 argv++; argv++;
178 argc--; argc--;
179 }
180
181 /* Error checking */
182 if (silence->start)
183 {
184 if ((silence->start_unit != '%') && (silence->start_unit != 'd'))
185 {
186 lsx_fail("Invalid unit specified");
187 return lsx_usage(effp);
188 }
189 if ((silence->start_unit == '%') && ((silence->start_threshold < 0.0)
190 || (silence->start_threshold > 100.0)))
191 {
192 lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
193 return (SOX_EOF);
194 }
195 if ((silence->start_unit == 'd') && (silence->start_threshold >= 0.0))
196 {
197 lsx_fail("silence threshold should be less than 0.0 dB");
198 return(SOX_EOF);
199 }
200 }
201
202 if (silence->stop)
203 {
204 if ((silence->stop_unit != '%') && (silence->stop_unit != 'd'))
205 {
206 lsx_fail("Invalid unit specified");
207 return(SOX_EOF);
208 }
209 if ((silence->stop_unit == '%') && ((silence->stop_threshold < 0.0) ||
210 (silence->stop_threshold > 100.0)))
211 {
212 lsx_fail("silence threshold should be between 0.0 and 100.0 %%");
213 return (SOX_EOF);
214 }
215 if ((silence->stop_unit == 'd') && (silence->stop_threshold >= 0.0))
216 {
217 lsx_fail("silence threshold should be less than 0.0 dB");
218 return(SOX_EOF);
219 }
220 }
221 return(SOX_SUCCESS);
222 }
223
sox_silence_start(sox_effect_t * effp)224 static int sox_silence_start(sox_effect_t * effp)
225 {
226 priv_t *silence = (priv_t *)effp->priv;
227 uint64_t temp;
228
229 /* When you want to remove silence, small window sizes are
230 * better or else RMS will look like non-silence at
231 * aburpt changes from load to silence.
232 */
233 silence->window_size = (effp->in_signal.rate / 50) *
234 effp->in_signal.channels;
235 silence->window = lsx_malloc(silence->window_size * sizeof(double));
236
237 clear_rms(effp);
238
239 /* Now that we know sample rate, reparse duration. */
240 if (silence->start)
241 {
242 if (lsx_parsesamples(effp->in_signal.rate, silence->start_duration_str,
243 &temp, 's') == NULL)
244 return lsx_usage(effp);
245 silence->start_duration = temp * effp->in_signal.channels;
246 }
247 if (silence->stop)
248 {
249 if (lsx_parsesamples(effp->in_signal.rate,silence->stop_duration_str,
250 &temp,'s') == NULL)
251 return lsx_usage(effp);
252 silence->stop_duration = temp * effp->in_signal.channels;
253 }
254
255 if (silence->start)
256 silence->mode = SILENCE_TRIM;
257 else
258 silence->mode = SILENCE_COPY;
259
260 silence->start_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->start_duration);
261 silence->start_holdoff_offset = 0;
262 silence->start_holdoff_end = 0;
263 silence->start_found_periods = 0;
264
265 silence->stop_holdoff = lsx_malloc(sizeof(sox_sample_t)*silence->stop_duration);
266 silence->stop_holdoff_offset = 0;
267 silence->stop_holdoff_end = 0;
268 silence->stop_found_periods = 0;
269
270 effp->out_signal.length = SOX_UNKNOWN_LEN; /* depends on input data */
271
272 return(SOX_SUCCESS);
273 }
274
aboveThreshold(sox_effect_t const * effp,sox_sample_t value,double threshold,int unit)275 static sox_bool aboveThreshold(sox_effect_t const * effp,
276 sox_sample_t value /* >= 0 */, double threshold, int unit)
277 {
278 /* When scaling low bit data, noise values got scaled way up */
279 /* Only consider the original bits when looking for silence */
280 sox_sample_t masked_value = value & (-1 << (32 - effp->in_signal.precision));
281
282 double scaled_value = (double)masked_value / SOX_SAMPLE_MAX;
283
284 if (unit == '%')
285 scaled_value *= 100;
286 else if (unit == 'd')
287 scaled_value = linear_to_dB(scaled_value);
288
289 return scaled_value > threshold;
290 }
291
compute_rms(sox_effect_t * effp,sox_sample_t sample)292 static sox_sample_t compute_rms(sox_effect_t * effp, sox_sample_t sample)
293 {
294 priv_t * silence = (priv_t *) effp->priv;
295 double new_sum;
296 sox_sample_t rms;
297
298 new_sum = silence->rms_sum;
299 new_sum -= *silence->window_current;
300 new_sum += ((double)sample * (double)sample);
301
302 rms = sqrt(new_sum / silence->window_size);
303
304 return (rms);
305 }
306
update_rms(sox_effect_t * effp,sox_sample_t sample)307 static void update_rms(sox_effect_t * effp, sox_sample_t sample)
308 {
309 priv_t * silence = (priv_t *) effp->priv;
310
311 silence->rms_sum -= *silence->window_current;
312 *silence->window_current = ((double)sample * (double)sample);
313 silence->rms_sum += *silence->window_current;
314
315 silence->window_current++;
316 if (silence->window_current >= silence->window_end)
317 silence->window_current = silence->window;
318 }
319
320 /* Process signed long samples from ibuf to obuf. */
321 /* Return number of samples processed in isamp and osamp. */
sox_silence_flow(sox_effect_t * effp,const sox_sample_t * ibuf,sox_sample_t * obuf,size_t * isamp,size_t * osamp)322 static int sox_silence_flow(sox_effect_t * effp, const sox_sample_t *ibuf, sox_sample_t *obuf,
323 size_t *isamp, size_t *osamp)
324 {
325 priv_t * silence = (priv_t *) effp->priv;
326 int threshold;
327 size_t i, j;
328 size_t nrOfTicks, /* sometimes wide, sometimes non-wide samples */
329 nrOfInSamplesRead, nrOfOutSamplesWritten; /* non-wide samples */
330
331 nrOfInSamplesRead = 0;
332 nrOfOutSamplesWritten = 0;
333
334 switch (silence->mode)
335 {
336 case SILENCE_TRIM:
337 /* Reads and discards all input data until it detects a
338 * sample that is above the specified threshold. Turns on
339 * copy mode when detected.
340 * Need to make sure and copy input in groups of "channels" to
341 * prevent getting buffers out of sync.
342 * nrOfTicks counts wide samples here.
343 */
344 silence_trim:
345 nrOfTicks = min((*isamp-nrOfInSamplesRead),
346 (*osamp-nrOfOutSamplesWritten)) /
347 effp->in_signal.channels;
348 for(i = 0; i < nrOfTicks; i++)
349 {
350 threshold = 0;
351 for (j = 0; j < effp->in_signal.channels; j++)
352 {
353 threshold |= aboveThreshold(effp,
354 compute_rms(effp, ibuf[j]),
355 silence->start_threshold,
356 silence->start_unit);
357 }
358
359 if (threshold)
360 {
361 /* Add to holdoff buffer */
362 for (j = 0; j < effp->in_signal.channels; j++)
363 {
364 update_rms(effp, *ibuf);
365 silence->start_holdoff[
366 silence->start_holdoff_end++] = *ibuf++;
367 nrOfInSamplesRead++;
368 }
369
370 if (silence->start_holdoff_end >=
371 silence->start_duration)
372 {
373 if (++silence->start_found_periods >=
374 silence->start_periods)
375 {
376 silence->mode = SILENCE_TRIM_FLUSH;
377 goto silence_trim_flush;
378 }
379 /* Trash holdoff buffer since its not
380 * needed. Start looking again.
381 */
382 silence->start_holdoff_offset = 0;
383 silence->start_holdoff_end = 0;
384 }
385 }
386 else /* !above Threshold */
387 {
388 silence->start_holdoff_end = 0;
389 for (j = 0; j < effp->in_signal.channels; j++)
390 {
391 update_rms(effp, ibuf[j]);
392 }
393 ibuf += effp->in_signal.channels;
394 nrOfInSamplesRead += effp->in_signal.channels;
395 }
396 } /* for nrOfTicks */
397 break;
398
399 case SILENCE_TRIM_FLUSH:
400 /* nrOfTicks counts non-wide samples here. */
401 silence_trim_flush:
402 nrOfTicks = min((silence->start_holdoff_end -
403 silence->start_holdoff_offset),
404 (*osamp-nrOfOutSamplesWritten));
405 nrOfTicks -= nrOfTicks % effp->in_signal.channels;
406 for(i = 0; i < nrOfTicks; i++)
407 {
408 *obuf++ = silence->start_holdoff[silence->start_holdoff_offset++];
409 nrOfOutSamplesWritten++;
410 }
411
412 /* If fully drained holdoff then switch to copy mode */
413 if (silence->start_holdoff_offset == silence->start_holdoff_end)
414 {
415 silence->start_holdoff_offset = 0;
416 silence->start_holdoff_end = 0;
417 silence->mode = SILENCE_COPY;
418 goto silence_copy;
419 }
420 break;
421
422 case SILENCE_COPY:
423 /* Attempts to copy samples into output buffer.
424 *
425 * Case B:
426 * If not looking for silence to terminate copy then
427 * blindly copy data into output buffer.
428 *
429 * Case A:
430 *
431 * Case 1a:
432 * If previous silence was detect then see if input sample is
433 * above threshold. If found then flush out hold off buffer
434 * and copy over to output buffer.
435 *
436 * Case 1b:
437 * If no previous silence detect then see if input sample
438 * is above threshold. If found then copy directly
439 * to output buffer.
440 *
441 * Case 2:
442 * If not above threshold then silence is detect so
443 * store in hold off buffer and do not write to output
444 * buffer. Even though it wasn't put in output
445 * buffer, inform user that input was consumed.
446 *
447 * If hold off buffer is full after this then stop
448 * copying data and discard data in hold off buffer.
449 *
450 * Special leave_silence logic:
451 *
452 * During this mode, go ahead and copy input
453 * samples to output buffer instead of holdoff buffer
454 * Then also short ciruit any flushes that would occur
455 * when non-silence is detect since samples were already
456 * copied. This has the effect of always leaving
457 * holdoff[] amount of silence but deleting any
458 * beyond that amount.
459 *
460 * nrOfTicks counts wide samples here.
461 */
462 silence_copy:
463 nrOfTicks = min((*isamp-nrOfInSamplesRead),
464 (*osamp-nrOfOutSamplesWritten)) /
465 effp->in_signal.channels;
466 if (silence->stop)
467 {
468 /* Case A */
469 for(i = 0; i < nrOfTicks; i++)
470 {
471 threshold = 1;
472 for (j = 0; j < effp->in_signal.channels; j++)
473 {
474 threshold &= aboveThreshold(effp,
475 compute_rms(effp, ibuf[j]),
476 silence->stop_threshold,
477 silence->stop_unit);
478 }
479
480 /* Case 1a
481 * If above threshold, check to see if we where holding
482 * off previously. If so then flush this buffer.
483 * We haven't incremented any pointers yet so nothing
484 * is lost.
485 *
486 * If user wants to leave_silence, then we
487 * were already copying the data and so no
488 * need to flush the old data. Just resume
489 * copying as if we were not holding off.
490 */
491 if (threshold && silence->stop_holdoff_end
492 && !silence->leave_silence)
493 {
494 silence->mode = SILENCE_COPY_FLUSH;
495 goto silence_copy_flush;
496 }
497 /* Case 1b */
498 else if (threshold)
499 {
500 /* Not holding off so copy into output buffer */
501 for (j = 0; j < effp->in_signal.channels; j++)
502 {
503 update_rms(effp, *ibuf);
504 *obuf++ = *ibuf++;
505 nrOfInSamplesRead++;
506 nrOfOutSamplesWritten++;
507 }
508 }
509 /* Case 2 */
510 else if (!threshold)
511 {
512 /* Add to holdoff buffer */
513 for (j = 0; j < effp->in_signal.channels; j++)
514 {
515 update_rms(effp, *ibuf);
516 if (silence->leave_silence) {
517 *obuf++ = *ibuf;
518 nrOfOutSamplesWritten++;
519 }
520 silence->stop_holdoff[
521 silence->stop_holdoff_end++] = *ibuf++;
522 nrOfInSamplesRead++;
523 }
524
525 /* Check if holdoff buffer is greater than duration
526 */
527 if (silence->stop_holdoff_end >=
528 silence->stop_duration)
529 {
530 /* Increment found counter and see if this
531 * is the last period. If so then exit.
532 */
533 if (++silence->stop_found_periods >=
534 silence->stop_periods)
535 {
536 silence->stop_holdoff_offset = 0;
537 silence->stop_holdoff_end = 0;
538 if (!silence->restart)
539 {
540 *isamp = nrOfInSamplesRead;
541 *osamp = nrOfOutSamplesWritten;
542 silence->mode = SILENCE_STOP;
543 /* Return SOX_EOF since no more processing */
544 return (SOX_EOF);
545 }
546 else
547 {
548 silence->stop_found_periods = 0;
549 silence->start_found_periods = 0;
550 silence->start_holdoff_offset = 0;
551 silence->start_holdoff_end = 0;
552 clear_rms(effp);
553 silence->mode = SILENCE_TRIM;
554
555 goto silence_trim;
556 }
557 }
558 else
559 {
560 /* Flush this buffer and start
561 * looking again.
562 */
563 silence->mode = SILENCE_COPY_FLUSH;
564 goto silence_copy_flush;
565 }
566 break;
567 } /* Filled holdoff buffer */
568 } /* Detected silence */
569 } /* For # of samples */
570 } /* Trimming off backend */
571 else /* !(silence->stop) */
572 {
573 /* Case B */
574 memcpy(obuf, ibuf, sizeof(sox_sample_t)*nrOfTicks*
575 effp->in_signal.channels);
576 nrOfInSamplesRead += (nrOfTicks*effp->in_signal.channels);
577 nrOfOutSamplesWritten += (nrOfTicks*effp->in_signal.channels);
578 }
579 break;
580
581 case SILENCE_COPY_FLUSH:
582 /* nrOfTicks counts non-wide samples here. */
583 silence_copy_flush:
584 nrOfTicks = min((silence->stop_holdoff_end -
585 silence->stop_holdoff_offset),
586 (*osamp-nrOfOutSamplesWritten));
587 nrOfTicks -= nrOfTicks % effp->in_signal.channels;
588
589 for(i = 0; i < nrOfTicks; i++)
590 {
591 *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
592 nrOfOutSamplesWritten++;
593 }
594
595 /* If fully drained holdoff then return to copy mode */
596 if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
597 {
598 silence->stop_holdoff_offset = 0;
599 silence->stop_holdoff_end = 0;
600 silence->mode = SILENCE_COPY;
601 goto silence_copy;
602 }
603 break;
604
605 case SILENCE_STOP:
606 /* This code can't be reached. */
607 nrOfInSamplesRead = *isamp;
608 break;
609 }
610
611 *isamp = nrOfInSamplesRead;
612 *osamp = nrOfOutSamplesWritten;
613
614 return (SOX_SUCCESS);
615 }
616
sox_silence_drain(sox_effect_t * effp,sox_sample_t * obuf,size_t * osamp)617 static int sox_silence_drain(sox_effect_t * effp, sox_sample_t *obuf, size_t *osamp)
618 {
619 priv_t * silence = (priv_t *) effp->priv;
620 size_t i;
621 size_t nrOfTicks, nrOfOutSamplesWritten = 0; /* non-wide samples */
622
623 /* Only if in flush mode will there be possible samples to write
624 * out during drain() call.
625 */
626 if (silence->mode == SILENCE_COPY_FLUSH ||
627 silence->mode == SILENCE_COPY)
628 {
629 nrOfTicks = min((silence->stop_holdoff_end -
630 silence->stop_holdoff_offset), *osamp);
631 nrOfTicks -= nrOfTicks % effp->in_signal.channels;
632 for(i = 0; i < nrOfTicks; i++)
633 {
634 *obuf++ = silence->stop_holdoff[silence->stop_holdoff_offset++];
635 nrOfOutSamplesWritten++;
636 }
637
638 /* If fully drained holdoff then stop */
639 if (silence->stop_holdoff_offset == silence->stop_holdoff_end)
640 {
641 silence->stop_holdoff_offset = 0;
642 silence->stop_holdoff_end = 0;
643 silence->mode = SILENCE_STOP;
644 }
645 }
646
647 *osamp = nrOfOutSamplesWritten;
648 if (silence->mode == SILENCE_STOP || *osamp == 0)
649 return SOX_EOF;
650 else
651 return SOX_SUCCESS;
652 }
653
sox_silence_stop(sox_effect_t * effp)654 static int sox_silence_stop(sox_effect_t * effp)
655 {
656 priv_t * silence = (priv_t *) effp->priv;
657
658 free(silence->window);
659 free(silence->start_holdoff);
660 free(silence->stop_holdoff);
661
662 return(SOX_SUCCESS);
663 }
664
lsx_kill(sox_effect_t * effp)665 static int lsx_kill(sox_effect_t * effp)
666 {
667 priv_t * silence = (priv_t *) effp->priv;
668
669 free(silence->start_duration_str);
670 free(silence->stop_duration_str);
671
672 return SOX_SUCCESS;
673 }
674
675 static sox_effect_handler_t sox_silence_effect = {
676 "silence",
677 "[ -l ] above_periods [ duration threshold[d|%] ] [ below_periods duration threshold[d|%] ]",
678 SOX_EFF_MCHAN | SOX_EFF_MODIFY | SOX_EFF_LENGTH,
679 sox_silence_getopts,
680 sox_silence_start,
681 sox_silence_flow,
682 sox_silence_drain,
683 sox_silence_stop,
684 lsx_kill, sizeof(priv_t)
685 };
686
lsx_silence_effect_fn(void)687 const sox_effect_handler_t *lsx_silence_effect_fn(void)
688 {
689 return &sox_silence_effect;
690 }
691