1 /* libSoX microsoft's WAVE sound format handler
2  *
3  * Copyright 1998-2006 Chris Bagwell and SoX Contributors
4  * Copyright 1997 Graeme W. Gill, 93/5/17
5  * Copyright 1992 Rick Richardson
6  * Copyright 1991 Lance Norskog And Sundry Contributors
7  *
8  * Info for format tags can be found at:
9  *   http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html
10  *
11  */
12 
13 #include "sox_i.h"
14 
15 #include <string.h>
16 #include <stdlib.h>
17 #include <stdio.h>
18 
19 #include "ima_rw.h"
20 #include "adpcm.h"
21 
22 #ifdef HAVE_LIBGSM
23 #ifdef HAVE_GSM_GSM_H
24 #include <gsm/gsm.h>
25 #else
26 #include <gsm.h>
27 #endif
28 #endif
29 
30 /* Magic length sometimes used to indicate unknown or too large size.
31  * When detected on inputs, disable any length logic.
32  */
33 #define MS_UNSPEC 0x7ffff000
34 
35 #define WAVE_FORMAT_UNKNOWN             0x0000
36 #define WAVE_FORMAT_PCM                 0x0001
37 #define WAVE_FORMAT_ADPCM               0x0002
38 #define WAVE_FORMAT_IEEE_FLOAT          0x0003
39 #define WAVE_FORMAT_IBM_CVSD            0x0005
40 #define WAVE_FORMAT_ALAW                0x0006
41 #define WAVE_FORMAT_MULAW               0x0007
42 #define WAVE_FORMAT_OKI_ADPCM           0x0010
43 #define WAVE_FORMAT_IMA_ADPCM           0x0011
44 #define WAVE_FORMAT_MEDIASPACE_ADPCM    0x0012
45 #define WAVE_FORMAT_SIERRA_ADPCM        0x0013
46 #define WAVE_FORMAT_G723_ADPCM          0x0014
47 #define WAVE_FORMAT_DIGISTD             0x0015
48 #define WAVE_FORMAT_DIGIFIX             0x0016
49 #define WAVE_FORMAT_YAMAHA_ADPCM        0x0020
50 #define WAVE_FORMAT_SONARC              0x0021
51 #define WAVE_FORMAT_TRUESPEECH          0x0022
52 #define WAVE_FORMAT_ECHOSC1             0x0023
53 #define WAVE_FORMAT_AUDIOFILE_AF36      0x0024
54 #define WAVE_FORMAT_APTX                0x0025
55 #define WAVE_FORMAT_AUDIOFILE_AF10      0x0026
56 #define WAVE_FORMAT_DOLBY_AC2           0x0030
57 #define WAVE_FORMAT_GSM610              0x0031
58 #define WAVE_FORMAT_ADPCME              0x0033
59 #define WAVE_FORMAT_CONTROL_RES_VQLPC   0x0034
60 #define WAVE_FORMAT_DIGIREAL            0x0035
61 #define WAVE_FORMAT_DIGIADPCM           0x0036
62 #define WAVE_FORMAT_CONTROL_RES_CR10    0x0037
63 #define WAVE_FORMAT_ROCKWELL_ADPCM      0x003b
64 #define WAVE_FORMAT_ROCKWELL_DIGITALK   0x003c
65 #define WAVE_FORMAT_G721_ADPCM          0x0040
66 #define WAVE_FORMAT_G728_CELP           0x0041
67 #define WAVE_FORMAT_MPEG                0x0050
68 #define WAVE_FORMAT_MPEGLAYER3          0x0055
69 #define WAVE_FORMAT_G726_ADPCM          0x0064
70 #define WAVE_FORMAT_G722_ADPCM          0x0065
71 #define WAVE_FORMAT_CREATIVE_ADPCM      0x0200
72 #define WAVE_FORMAT_CREATIVE_FSP8       0x0202
73 #define WAVE_FORMAT_CREATIVE_FSP10      0x0203
74 #define WAVE_FORMAT_FM_TOWNS_SND        0x0300
75 #define WAVE_FORMAT_OLIGSM              0x1000
76 #define WAVE_FORMAT_OLIADPCM            0x1001
77 #define WAVE_FORMAT_OLISBC              0x1003
78 #define WAVE_FORMAT_OLIOPR              0x1004
79 #define WAVE_FORMAT_EXTENSIBLE          0xfffe
80 
81 /* To allow padding to samplesPerBlock. Works, but currently never true. */
82 static const size_t pad_nsamps = sox_false;
83 
84 /* Private data for .wav file */
85 typedef struct {
86     /* samples/channel reading: starts at total count and decremented  */
87     /* writing: starts at 0 and counts samples written */
88     uint64_t  numSamples;
89     size_t    dataLength;     /* needed for ADPCM writing */
90     unsigned short formatTag;       /* What type of encoding file is using */
91     unsigned short samplesPerBlock;
92     unsigned short blockAlign;
93     uint16_t bitsPerSample;     /* bits per sample */
94     size_t dataStart;           /* need to for seeking */
95     int ignoreSize;                 /* ignoreSize allows us to process 32-bit WAV files that are
96                                      * greater then 2 Gb and can't be represented by the
97                                      * 32-bit size field. */
98   /* FIXME: Have some front-end code which sets this flag. */
99 
100     /* following used by *ADPCM wav files */
101     unsigned short nCoefs;          /* ADPCM: number of coef sets */
102     short         *lsx_ms_adpcm_i_coefs;          /* ADPCM: coef sets           */
103     void          *ms_adpcm_data;   /* Private data of adpcm decoder */
104     unsigned char *packet;          /* Temporary buffer for packets */
105     short         *samples;         /* interleaved samples buffer */
106     short         *samplePtr;       /* Pointer to current sample  */
107     short         *sampleTop;       /* End of samples-buffer      */
108     unsigned short blockSamplesRemaining;/* Samples remaining per channel */
109     int            state[16];       /* step-size info for *ADPCM writes */
110 
111 #ifdef HAVE_LIBGSM
112     /* following used by GSM 6.10 wav */
113     gsm            gsmhandle;
114     gsm_signal     *gsmsample;
115     int            gsmindex;
116     size_t      gsmbytecount;    /* counts bytes written to data block */
117 #endif
118 } priv_t;
119 
120 struct wave_format {
121     uint16_t tag;
122     const char *name;
123     sox_encoding_t encoding;
124     int (*read_fmt)(sox_format_t *ft, uint32_t len);
125 };
126 
127 static const char *wav_format_str(unsigned tag);
128 
129 static int wavwritehdr(sox_format_t *, int);
130 
131 /****************************************************************************/
132 /* IMA ADPCM Support Functions Section                                      */
133 /****************************************************************************/
134 
wav_ima_adpcm_fmt(sox_format_t * ft,uint32_t len)135 static int wav_ima_adpcm_fmt(sox_format_t *ft, uint32_t len)
136 {
137     priv_t *wav = ft->priv;
138     size_t  bytesPerBlock;
139     int err;
140 
141     if (wav->bitsPerSample != 4) {
142         lsx_fail_errno(ft, SOX_EOF,
143                        "Can only handle 4-bit IMA ADPCM in wav files");
144         return SOX_EOF;
145     }
146 
147     err = lsx_read_fields(ft, &len, "h", &wav->samplesPerBlock);
148     if (err)
149         return SOX_EOF;
150 
151     bytesPerBlock = lsx_ima_bytes_per_block(ft->signal.channels,
152                                             wav->samplesPerBlock);
153 
154     if (bytesPerBlock != wav->blockAlign || wav->samplesPerBlock % 8 != 1) {
155         lsx_fail_errno(ft, SOX_EOF,
156                        "format[%s]: samplesPerBlock(%d) != blockAlign(%d)",
157                        wav_format_str(wav->formatTag),
158                        wav->samplesPerBlock, wav->blockAlign);
159         return SOX_EOF;
160     }
161 
162     wav->packet = lsx_malloc(wav->blockAlign);
163     wav->samples =
164         lsx_malloc(ft->signal.channels * wav->samplesPerBlock * sizeof(short));
165 
166     return SOX_SUCCESS;
167 }
168 
169 /*
170  *
171  * ImaAdpcmReadBlock - Grab and decode complete block of samples
172  *
173  */
ImaAdpcmReadBlock(sox_format_t * ft)174 static unsigned short  ImaAdpcmReadBlock(sox_format_t * ft)
175 {
176     priv_t *       wav = (priv_t *) ft->priv;
177     size_t bytesRead;
178     int samplesThisBlock;
179 
180     /* Pull in the packet and check the header */
181     bytesRead = lsx_readbuf(ft, wav->packet, (size_t)wav->blockAlign);
182     samplesThisBlock = wav->samplesPerBlock;
183     if (bytesRead < wav->blockAlign)
184     {
185         /* If it looks like a valid header is around then try and */
186         /* work with partial blocks.  Specs say it should be null */
187         /* padded but I guess this is better than trailing quiet. */
188         samplesThisBlock = lsx_ima_samples_in((size_t)0, (size_t)ft->signal.channels, bytesRead, (size_t) 0);
189         if (samplesThisBlock == 0 || samplesThisBlock > wav->samplesPerBlock)
190         {
191             lsx_warn("Premature EOF on .wav input file");
192             return 0;
193         }
194     }
195 
196     wav->samplePtr = wav->samples;
197 
198     /* For a full block, the following should be true: */
199     /* wav->samplesPerBlock = blockAlign - 8byte header + 1 sample in header */
200     lsx_ima_block_expand_i(ft->signal.channels, wav->packet, wav->samples, samplesThisBlock);
201     return samplesThisBlock;
202 
203 }
204 
205 /****************************************************************************/
206 /* MS ADPCM Support Functions Section                                       */
207 /****************************************************************************/
208 
wav_ms_adpcm_fmt(sox_format_t * ft,uint32_t len)209 static int wav_ms_adpcm_fmt(sox_format_t *ft, uint32_t len)
210 {
211     priv_t *wav = ft->priv;
212     size_t  bytesPerBlock;
213     int i, errct = 0;
214     int err;
215 
216     if (wav->bitsPerSample != 4) {
217         lsx_fail_errno(ft, SOX_EOF,
218                        "Can only handle 4-bit MS ADPCM in wav files");
219         return SOX_EOF;
220     }
221 
222     err = lsx_read_fields(ft, &len, "hh", &wav->samplesPerBlock, &wav->nCoefs);
223     if (err)
224         return SOX_EOF;
225 
226     bytesPerBlock = lsx_ms_adpcm_bytes_per_block(ft->signal.channels,
227                                                  wav->samplesPerBlock);
228 
229     if (bytesPerBlock != wav->blockAlign) {
230         lsx_fail_errno(ft, SOX_EOF,
231                        "format[%s]: samplesPerBlock(%d) != blockAlign(%d)",
232                        wav_format_str(wav->formatTag),
233                        wav->samplesPerBlock, wav->blockAlign);
234         return SOX_EOF;
235     }
236 
237     if (wav->nCoefs < 7 || wav->nCoefs > 0x100) {
238         lsx_fail_errno(ft, SOX_EOF,
239                        "ADPCM file nCoefs (%.4hx) makes no sense",
240                        wav->nCoefs);
241         return SOX_EOF;
242     }
243 
244     if (len < 4 * wav->nCoefs) {
245         lsx_fail_errno(ft, SOX_EOF, "wave header error: cbSize too small");
246         return SOX_EOF;
247     }
248 
249     wav->packet = lsx_malloc(wav->blockAlign);
250     wav->samples =
251         lsx_malloc(ft->signal.channels * wav->samplesPerBlock * sizeof(short));
252 
253     /* nCoefs, lsx_ms_adpcm_i_coefs used by adpcm.c */
254     wav->lsx_ms_adpcm_i_coefs = lsx_malloc(wav->nCoefs * 2 * sizeof(short));
255     wav->ms_adpcm_data = lsx_ms_adpcm_alloc(ft->signal.channels);
256 
257     err = lsx_read_fields(ft, &len, "*h",
258                           2 * wav->nCoefs, wav->lsx_ms_adpcm_i_coefs);
259     if (err)
260         return SOX_EOF;
261 
262     for (i = 0; i < 14; i++)
263         errct += wav->lsx_ms_adpcm_i_coefs[i] != lsx_ms_adpcm_i_coef[i/2][i%2];
264 
265     if (errct)
266         lsx_warn("base lsx_ms_adpcm_i_coefs differ in %d/14 positions", errct);
267 
268     return SOX_SUCCESS;
269 }
270 
271 /*
272  *
273  * AdpcmReadBlock - Grab and decode complete block of samples
274  *
275  */
AdpcmReadBlock(sox_format_t * ft)276 static unsigned short  AdpcmReadBlock(sox_format_t * ft)
277 {
278     priv_t *       wav = (priv_t *) ft->priv;
279     size_t bytesRead;
280     int samplesThisBlock;
281     const char *errmsg;
282 
283     /* Pull in the packet and check the header */
284     bytesRead = lsx_readbuf(ft, wav->packet, (size_t) wav->blockAlign);
285     samplesThisBlock = wav->samplesPerBlock;
286     if (bytesRead < wav->blockAlign)
287     {
288         /* If it looks like a valid header is around then try and */
289         /* work with partial blocks.  Specs say it should be null */
290         /* padded but I guess this is better than trailing quiet. */
291         samplesThisBlock = lsx_ms_adpcm_samples_in((size_t)0, (size_t)ft->signal.channels, bytesRead, (size_t)0);
292         if (samplesThisBlock == 0 || samplesThisBlock > wav->samplesPerBlock)
293         {
294             lsx_warn("Premature EOF on .wav input file");
295             return 0;
296         }
297     }
298 
299     errmsg = lsx_ms_adpcm_block_expand_i(wav->ms_adpcm_data, ft->signal.channels, wav->nCoefs, wav->lsx_ms_adpcm_i_coefs, wav->packet, wav->samples, samplesThisBlock);
300 
301     if (errmsg)
302         lsx_warn("%s", errmsg);
303 
304     return samplesThisBlock;
305 }
306 
307 /****************************************************************************/
308 /* Common ADPCM Write Function                                              */
309 /****************************************************************************/
310 
xxxAdpcmWriteBlock(sox_format_t * ft)311 static int xxxAdpcmWriteBlock(sox_format_t * ft)
312 {
313     priv_t * wav = (priv_t *) ft->priv;
314     size_t chans, ct;
315     short *p;
316 
317     chans = ft->signal.channels;
318     p = wav->samplePtr;
319     ct = p - wav->samples;
320     if (ct>=chans) {
321         /* zero-fill samples if needed to complete block */
322         for (p = wav->samplePtr; p < wav->sampleTop; p++) *p=0;
323         /* compress the samples to wav->packet */
324         if (wav->formatTag == WAVE_FORMAT_ADPCM) {
325             lsx_ms_adpcm_block_mash_i((unsigned) chans, wav->samples, wav->samplesPerBlock, wav->state, wav->packet, wav->blockAlign);
326         }else{ /* WAVE_FORMAT_IMA_ADPCM */
327             lsx_ima_block_mash_i((unsigned) chans, wav->samples, wav->samplesPerBlock, wav->state, wav->packet, 9);
328         }
329         /* write the compressed packet */
330         if (lsx_writebuf(ft, wav->packet, (size_t) wav->blockAlign) != wav->blockAlign)
331         {
332             lsx_fail_errno(ft,SOX_EOF,"write error");
333             return (SOX_EOF);
334         }
335         /* update lengths and samplePtr */
336         wav->dataLength += wav->blockAlign;
337         if (pad_nsamps)
338           wav->numSamples += wav->samplesPerBlock;
339         else
340           wav->numSamples += ct/chans;
341         wav->samplePtr = wav->samples;
342     }
343     return (SOX_SUCCESS);
344 }
345 
346 #ifdef HAVE_LIBGSM
347 /****************************************************************************/
348 /* WAV GSM6.10 support functions                                            */
349 /****************************************************************************/
350 
wav_gsm_fmt(sox_format_t * ft,uint32_t len)351 static int wav_gsm_fmt(sox_format_t *ft, uint32_t len)
352 {
353     priv_t *wav = ft->priv;
354     int err;
355 
356     err = lsx_read_fields(ft, &len, "h", &wav->samplesPerBlock);
357     if (err)
358         return SOX_EOF;
359 
360     if (wav->blockAlign != 65) {
361         lsx_fail_errno(ft, SOX_EOF, "format[%s]: expects blockAlign(%d) = %d",
362                        wav_format_str(wav->formatTag), wav->blockAlign, 65);
363         return SOX_EOF;
364     }
365 
366     if (wav->samplesPerBlock != 320) {
367         lsx_fail_errno(ft, SOX_EOF,
368                        "format[%s]: expects samplesPerBlock(%d) = %d",
369                        wav_format_str(wav->formatTag),
370                        wav->samplesPerBlock, 320);
371         return SOX_EOF;
372     }
373 
374     return SOX_SUCCESS;
375 }
376 
377 /* create the gsm object, malloc buffer for 160*2 samples */
wavgsminit(sox_format_t * ft)378 static int wavgsminit(sox_format_t * ft)
379 {
380     int valueP=1;
381     priv_t *       wav = (priv_t *) ft->priv;
382     wav->gsmbytecount=0;
383     wav->gsmhandle=gsm_create();
384     if (!wav->gsmhandle)
385     {
386         lsx_fail_errno(ft,SOX_EOF,"cannot create GSM object");
387         return (SOX_EOF);
388     }
389 
390     if(gsm_option(wav->gsmhandle,GSM_OPT_WAV49,&valueP) == -1){
391         lsx_fail_errno(ft,SOX_EOF,"error setting gsm_option for WAV49 format. Recompile gsm library with -DWAV49 option and relink sox");
392         return (SOX_EOF);
393     }
394 
395     wav->gsmsample=lsx_malloc(sizeof(gsm_signal)*160*2);
396     wav->gsmindex=0;
397     return (SOX_SUCCESS);
398 }
399 
400 /*destroy the gsm object and free the buffer */
wavgsmdestroy(sox_format_t * ft)401 static void wavgsmdestroy(sox_format_t * ft)
402 {
403     priv_t *       wav = (priv_t *) ft->priv;
404     gsm_destroy(wav->gsmhandle);
405     free(wav->gsmsample);
406 }
407 
wavgsmread(sox_format_t * ft,sox_sample_t * buf,size_t len)408 static size_t wavgsmread(sox_format_t * ft, sox_sample_t *buf, size_t len)
409 {
410     priv_t *       wav = (priv_t *) ft->priv;
411     size_t done=0;
412     int bytes;
413     gsm_byte    frame[65];
414 
415     ft->sox_errno = SOX_SUCCESS;
416 
417   /* copy out any samples left from the last call */
418     while(wav->gsmindex && (wav->gsmindex<160*2) && (done < len))
419         buf[done++]=SOX_SIGNED_16BIT_TO_SAMPLE(wav->gsmsample[wav->gsmindex++],);
420 
421   /* read and decode loop, possibly leaving some samples in wav->gsmsample */
422     while (done < len) {
423         wav->gsmindex=0;
424         bytes = lsx_readbuf(ft, frame, (size_t)65);
425         if (bytes <=0)
426             return done;
427         if (bytes<65) {
428             lsx_warn("invalid wav gsm frame size: %d bytes",bytes);
429             return done;
430         }
431         /* decode the long 33 byte half */
432         if(gsm_decode(wav->gsmhandle,frame, wav->gsmsample)<0)
433         {
434             lsx_fail_errno(ft,SOX_EOF,"error during gsm decode");
435             return 0;
436         }
437         /* decode the short 32 byte half */
438         if(gsm_decode(wav->gsmhandle,frame+33, wav->gsmsample+160)<0)
439         {
440             lsx_fail_errno(ft,SOX_EOF,"error during gsm decode");
441             return 0;
442         }
443 
444         while ((wav->gsmindex <160*2) && (done < len)){
445             buf[done++]=SOX_SIGNED_16BIT_TO_SAMPLE(wav->gsmsample[(wav->gsmindex)++],);
446         }
447     }
448 
449     return done;
450 }
451 
wavgsmflush(sox_format_t * ft)452 static int wavgsmflush(sox_format_t * ft)
453 {
454     gsm_byte    frame[65];
455     priv_t *       wav = (priv_t *) ft->priv;
456 
457     /* zero fill as needed */
458     while(wav->gsmindex<160*2)
459         wav->gsmsample[wav->gsmindex++]=0;
460 
461     /*encode the even half short (32 byte) frame */
462     gsm_encode(wav->gsmhandle, wav->gsmsample, frame);
463     /*encode the odd half long (33 byte) frame */
464     gsm_encode(wav->gsmhandle, wav->gsmsample+160, frame+32);
465     if (lsx_writebuf(ft, frame, (size_t) 65) != 65)
466     {
467         lsx_fail_errno(ft,SOX_EOF,"write error");
468         return (SOX_EOF);
469     }
470     wav->gsmbytecount += 65;
471 
472     wav->gsmindex = 0;
473     return (SOX_SUCCESS);
474 }
475 
wavgsmwrite(sox_format_t * ft,const sox_sample_t * buf,size_t len)476 static size_t wavgsmwrite(sox_format_t * ft, const sox_sample_t *buf, size_t len)
477 {
478     priv_t * wav = (priv_t *) ft->priv;
479     size_t done = 0;
480     int rc;
481 
482     ft->sox_errno = SOX_SUCCESS;
483 
484     while (done < len) {
485         SOX_SAMPLE_LOCALS;
486         while ((wav->gsmindex < 160*2) && (done < len))
487             wav->gsmsample[(wav->gsmindex)++] =
488                 SOX_SAMPLE_TO_SIGNED_16BIT(buf[done++], ft->clips);
489 
490         if (wav->gsmindex < 160*2)
491             break;
492 
493         rc = wavgsmflush(ft);
494         if (rc)
495             return 0;
496     }
497     return done;
498 
499 }
500 
wavgsmstopwrite(sox_format_t * ft)501 static void wavgsmstopwrite(sox_format_t * ft)
502 {
503     priv_t *       wav = (priv_t *) ft->priv;
504 
505     ft->sox_errno = SOX_SUCCESS;
506 
507     if (wav->gsmindex)
508         wavgsmflush(ft);
509 
510     /* Add a pad byte if amount of written bytes is not even. */
511     if (wav->gsmbytecount && wav->gsmbytecount % 2){
512         if(lsx_writeb(ft, 0))
513             lsx_fail_errno(ft,SOX_EOF,"write error");
514         else
515             wav->gsmbytecount += 1;
516     }
517 
518     wavgsmdestroy(ft);
519 }
520 
521 #endif  /* HAVE_LIBGSM */
522 
523 /****************************************************************************/
524 /* General Sox WAV file code                                                */
525 /****************************************************************************/
526 
wav_pcm_fmt(sox_format_t * ft,uint32_t len)527 static int wav_pcm_fmt(sox_format_t *ft, uint32_t len)
528 {
529     priv_t *wav = ft->priv;
530     int bps = (wav->bitsPerSample + 7) / 8;
531 
532     if (bps == 1) {
533         ft->encoding.encoding = SOX_ENCODING_UNSIGNED;
534     } else if (bps <= 4) {
535         ft->encoding.encoding = SOX_ENCODING_SIGN2;
536     } else {
537         lsx_fail_errno(ft, SOX_EFMT, "%d bytes per sample not suppored", bps);
538         return SOX_EOF;
539     }
540 
541     return SOX_SUCCESS;
542 }
543 
544 static const struct wave_format wave_formats[] = {
545     { WAVE_FORMAT_UNKNOWN,              "Unknown Wave Type" },
546     { WAVE_FORMAT_PCM,                  "PCM",
547       SOX_ENCODING_UNKNOWN,
548       wav_pcm_fmt,
549     },
550     { WAVE_FORMAT_ADPCM,                "Microsoft ADPCM",
551       SOX_ENCODING_MS_ADPCM,
552       wav_ms_adpcm_fmt,
553     },
554     { WAVE_FORMAT_IEEE_FLOAT,           "IEEE Float",
555       SOX_ENCODING_FLOAT },
556     { WAVE_FORMAT_IBM_CVSD,             "Digispeech CVSD" },
557     { WAVE_FORMAT_ALAW,                 "CCITT A-law",
558       SOX_ENCODING_ALAW },
559     { WAVE_FORMAT_MULAW,                "CCITT u-law",
560       SOX_ENCODING_ULAW },
561     { WAVE_FORMAT_OKI_ADPCM,            "OKI ADPCM" },
562     { WAVE_FORMAT_IMA_ADPCM,            "IMA ADPCM",
563       SOX_ENCODING_IMA_ADPCM,
564       wav_ima_adpcm_fmt,
565     },
566     { WAVE_FORMAT_MEDIASPACE_ADPCM,     "MediaSpace ADPCM" },
567     { WAVE_FORMAT_SIERRA_ADPCM,         "Sierra ADPCM" },
568     { WAVE_FORMAT_G723_ADPCM,           "G.723 ADPCM" },
569     { WAVE_FORMAT_DIGISTD,              "DIGISTD" },
570     { WAVE_FORMAT_DIGIFIX,              "DigiFix" },
571     { WAVE_FORMAT_YAMAHA_ADPCM,         "Yamaha ADPCM" },
572     { WAVE_FORMAT_SONARC,               "Sonarc" },
573     { WAVE_FORMAT_TRUESPEECH,           "Truespeech" },
574     { WAVE_FORMAT_ECHOSC1,              "ECHO SC-1", },
575     { WAVE_FORMAT_AUDIOFILE_AF36,       "Audio File AF36" },
576     { WAVE_FORMAT_APTX,                 "aptX" },
577     { WAVE_FORMAT_AUDIOFILE_AF10,       "Audio File AF10" },
578     { WAVE_FORMAT_DOLBY_AC2,            "Dolby AC-2" },
579     { WAVE_FORMAT_GSM610,               "GSM 6.10",
580 #ifdef HAVE_LIBGSM
581       SOX_ENCODING_GSM,
582       wav_gsm_fmt,
583 #endif
584     },
585     { WAVE_FORMAT_ADPCME,               "Antex ADPCME" },
586     { WAVE_FORMAT_CONTROL_RES_VQLPC,    "Control Resources VQLPC" },
587     { WAVE_FORMAT_DIGIREAL,             "DSP Solutions REAL" },
588     { WAVE_FORMAT_DIGIADPCM,            "DSP Solutions ADPCM" },
589     { WAVE_FORMAT_CONTROL_RES_CR10,     "Control Resources CR10" },
590     { WAVE_FORMAT_ROCKWELL_ADPCM,       "Rockwell ADPCM" },
591     { WAVE_FORMAT_ROCKWELL_DIGITALK,    "Rockwell DIGITALK" },
592     { WAVE_FORMAT_G721_ADPCM,           "G.721 ADPCM" },
593     { WAVE_FORMAT_G728_CELP,            "G.728 CELP" },
594     { WAVE_FORMAT_MPEG,                 "MPEG-1 Audio" },
595     { WAVE_FORMAT_MPEGLAYER3,           "MPEG-1 Layer 3" },
596     { WAVE_FORMAT_G726_ADPCM,           "G.726 ADPCM" },
597     { WAVE_FORMAT_G722_ADPCM,           "G.722 ADPCM" },
598     { WAVE_FORMAT_CREATIVE_ADPCM,       "Creative Labs ADPCM" },
599     { WAVE_FORMAT_CREATIVE_FSP8,        "Creative Labs FastSpeech 8" },
600     { WAVE_FORMAT_CREATIVE_FSP10,       "Creative Labs FastSpeech 10" },
601     { WAVE_FORMAT_FM_TOWNS_SND,         "Fujitsu FM Towns SND" },
602     { WAVE_FORMAT_OLIGSM,               "Olivetti GSM" },
603     { WAVE_FORMAT_OLIADPCM,             "Olivetti ADPCM" },
604     { WAVE_FORMAT_OLISBC,               "Olivetti CELP" },
605     { WAVE_FORMAT_OLIOPR,               "Olivetti OPR" },
606     { }
607 };
608 
wav_find_format(unsigned tag)609 static const struct wave_format *wav_find_format(unsigned tag)
610 {
611     const struct wave_format *f;
612 
613     for (f = wave_formats; f->name; f++)
614         if (f->tag == tag)
615             return f;
616 
617     return NULL;
618 }
619 
wavfail(sox_format_t * ft,int tag,const char * name)620 static int wavfail(sox_format_t *ft, int tag, const char *name)
621 {
622     if (name)
623         lsx_fail_errno(ft, SOX_EHDR, "WAVE format '%s' (%04x) not supported",
624                        name, tag);
625     else
626         lsx_fail_errno(ft, SOX_EHDR, "Unknown WAVE format %04x", tag);
627 
628     return SOX_EOF;
629 }
630 
wav_read_fmt(sox_format_t * ft,uint32_t len)631 static int wav_read_fmt(sox_format_t *ft, uint32_t len)
632 {
633     priv_t  *wav = ft->priv;
634     uint16_t wChannels;          /* number of channels */
635     uint32_t dwSamplesPerSecond; /* samples per second per channel */
636     uint32_t dwAvgBytesPerSec;   /* estimate of bytes per second needed */
637     uint16_t wExtSize = 0;       /* extended field for non-PCM */
638     const struct wave_format *fmt;
639     sox_encoding_t user_enc = ft->encoding.encoding;
640     int err;
641 
642     if (len < 16) {
643         lsx_fail_errno(ft, SOX_EHDR, "WAVE file fmt chunk is too short");
644         return SOX_EOF;
645     }
646 
647     err = lsx_read_fields(ft, &len, "hhiihh",
648                           &wav->formatTag,
649                           &wChannels,
650                           &dwSamplesPerSecond,
651                           &dwAvgBytesPerSec,
652                           &wav->blockAlign,
653                           &wav->bitsPerSample);
654     if (err)
655         return SOX_EOF;
656 
657     /* non-PCM formats except alaw and mulaw formats have extended fmt chunk.
658      * Check for those cases.
659      */
660     if (wav->formatTag != WAVE_FORMAT_PCM &&
661         wav->formatTag != WAVE_FORMAT_ALAW &&
662         wav->formatTag != WAVE_FORMAT_MULAW &&
663         len < 2)
664         lsx_warn("WAVE file missing extended part of fmt chunk");
665 
666     if (len >= 2) {
667         err = lsx_read_fields(ft, &len, "h", &wExtSize);
668         if (err)
669             return SOX_EOF;
670     }
671 
672     if (wExtSize != len) {
673         lsx_fail_errno(ft, SOX_EOF,
674                        "WAVE header error: cbSize inconsistent with fmt size");
675         return SOX_EOF;
676     }
677 
678     if (wav->formatTag == WAVE_FORMAT_EXTENSIBLE) {
679         uint16_t numberOfValidBits;
680         uint32_t speakerPositionMask;
681         uint16_t subFormatTag;
682 
683         if (len < 22) {
684             lsx_fail_errno(ft, SOX_EHDR, "WAVE file fmt chunk is too short");
685             return SOX_EOF;
686         }
687 
688         err = lsx_read_fields(ft, &len, "hih14x",
689                               &numberOfValidBits,
690                               &speakerPositionMask,
691                               &subFormatTag);
692         if (err)
693             return SOX_EOF;
694 
695         if (numberOfValidBits > wav->bitsPerSample) {
696             lsx_fail_errno(ft, SOX_EHDR,
697                            "wValidBitsPerSample > wBitsPerSample");
698             return SOX_EOF;
699         }
700 
701         wav->formatTag = subFormatTag;
702         lsx_report("EXTENSIBLE");
703     }
704 
705     /* User options take precedence */
706     if (ft->signal.channels == 0 || ft->signal.channels == wChannels)
707         ft->signal.channels = wChannels;
708     else
709         lsx_report("User options overriding channels read in .wav header");
710 
711     if (ft->signal.channels == 0) {
712         lsx_fail_errno(ft, SOX_EHDR, "Channel count is zero");
713         return SOX_EOF;
714     }
715 
716     if (ft->signal.rate == 0 || ft->signal.rate == dwSamplesPerSecond)
717         ft->signal.rate = dwSamplesPerSecond;
718     else
719         lsx_report("User options overriding rate read in .wav header");
720 
721     fmt = wav_find_format(wav->formatTag);
722     if (!fmt)
723         return wavfail(ft, wav->formatTag, NULL);
724 
725     /* format handler might override */
726     ft->encoding.encoding = fmt->encoding;
727 
728     if (fmt->read_fmt) {
729         if (fmt->read_fmt(ft, len))
730             return SOX_EOF;
731     } else if (!fmt->encoding) {
732         return wavfail(ft, wav->formatTag, fmt->name);
733     }
734 
735     /* User options take precedence */
736     if (!ft->encoding.bits_per_sample ||
737         ft->encoding.bits_per_sample == wav->bitsPerSample)
738         ft->encoding.bits_per_sample = wav->bitsPerSample;
739     else
740         lsx_warn("User options overriding size read in .wav header");
741 
742     if (user_enc && user_enc != ft->encoding.encoding) {
743         lsx_report("User options overriding encoding read in .wav header");
744         ft->encoding.encoding = user_enc;
745     }
746 
747     return 0;
748 }
749 
valid_chunk_id(const char p[4])750 static sox_bool valid_chunk_id(const char p[4])
751 {
752     int i;
753 
754     for (i = 0; i < 4; i++)
755         if (p[i] < 0x20 || p[i] > 0x7f)
756             return sox_false;
757 
758     return sox_true;
759 }
760 
read_chunk_header(sox_format_t * ft,char tag[4],uint32_t * len)761 static int read_chunk_header(sox_format_t *ft, char tag[4], uint32_t *len)
762 {
763     int r;
764 
765     r = lsx_readbuf(ft, tag, 4);
766     if (r < 4)
767         return SOX_EOF;
768 
769     return lsx_readdw(ft, len);
770 }
771 
772 /*
773  * Do anything required before you start reading samples.
774  * Read file header.
775  *      Find out sampling rate,
776  *      size and encoding of samples,
777  *      mono/stereo/quad.
778  */
startread(sox_format_t * ft)779 static int startread(sox_format_t *ft)
780 {
781     priv_t  *wav = ft->priv;
782     char     magic[5] = { 0 };
783     uint32_t clen;
784     int      err;
785 
786     sox_bool isRF64 = sox_false;
787     uint64_t ds64_riff_size;
788     uint64_t ds64_data_size;
789     uint64_t ds64_sample_count;
790 
791     /* wave file characteristics */
792     uint64_t qwRiffLength;
793     uint64_t qwDataLength = 0;
794     sox_bool have_fmt = sox_false;
795 
796     ft->sox_errno = SOX_SUCCESS;
797     wav->ignoreSize = ft->signal.length == SOX_IGNORE_LENGTH;
798     ft->encoding.reverse_bytes = MACHINE_IS_BIGENDIAN;
799 
800     if (read_chunk_header(ft, magic, &clen))
801         return SOX_EOF;
802 
803     if (!memcmp(magic, "RIFX", 4)) {
804         lsx_debug("Found RIFX header");
805         ft->encoding.reverse_bytes = MACHINE_IS_LITTLEENDIAN;
806     } else if (!memcmp(magic, "RF64", 4)) {
807         lsx_debug("Found RF64 header");
808         isRF64 = sox_true;
809     } else if (memcmp(magic, "RIFF", 4)) {
810         lsx_fail_errno(ft, SOX_EHDR, "WAVE: RIFF header not found");
811         return SOX_EOF;
812     }
813 
814     qwRiffLength = clen;
815 
816     if (lsx_readbuf(ft, magic, 4) < 4 || memcmp(magic, "WAVE", 4)) {
817         lsx_fail_errno(ft, SOX_EHDR, "WAVE header not found");
818         return SOX_EOF;
819     }
820 
821     while (!read_chunk_header(ft, magic, &clen)) {
822         uint32_t len = clen;
823         off_t cstart = lsx_tell(ft);
824         off_t pos;
825 
826         if (!valid_chunk_id(magic)) {
827             lsx_fail_errno(ft, SOX_EHDR, "invalid chunk ID found");
828             return SOX_EOF;
829         }
830 
831         lsx_debug("Found chunk '%s', size %u", magic, clen);
832 
833         if (!memcmp(magic, "ds64", 4)) {
834             if (!isRF64)
835                 lsx_warn("ds64 chunk in non-RF64 file");
836 
837             if (clen < 28) {
838                 lsx_fail_errno(ft, SOX_EHDR, "ds64 chunk too small");
839                 return SOX_EOF;
840             }
841 
842             if (clen == 32) {
843                 lsx_warn("ds64 chunk size invalid, attempting workaround");
844                 clen = 28;
845             }
846 
847             err = lsx_read_fields(ft, &len, "qqq",
848                                   &ds64_riff_size,
849                                   &ds64_data_size,
850                                   &ds64_sample_count);
851             if (err)
852                 return SOX_EOF;
853 
854             goto next;
855         }
856 
857         if (!memcmp(magic, "fmt ", 4)) {
858             err = wav_read_fmt(ft, clen);
859             if (err)
860                 return err;
861 
862             have_fmt = sox_true;
863 
864             goto next;
865         }
866 
867         if (!memcmp(magic, "fact", 4)) {
868             uint32_t val;
869 
870             err = lsx_read_fields(ft, &len, "i", &val);
871             if (err)
872                 return SOX_EOF;
873 
874             wav->numSamples = val;
875 
876             goto next;
877         }
878 
879         if (!memcmp(magic, "data", 4)) {
880             if (isRF64 && clen == UINT32_MAX)
881                 clen = ds64_data_size;
882 
883             qwDataLength = clen;
884             wav->dataStart = lsx_tell(ft);
885 
886             if (qwDataLength == UINT32_MAX || qwDataLength == MS_UNSPEC)
887                 break;
888 
889             if (!ft->seekable)
890                 break;
891 
892             goto next;
893         }
894 
895     next:
896         pos = lsx_tell(ft);
897         clen += clen & 1;
898 
899         if (pos > cstart + clen) {
900             lsx_fail_errno(ft, SOX_EHDR, "malformed chunk %s", magic);
901             return SOX_EOF;
902         }
903 
904         err = lsx_seeki(ft, cstart + clen - pos, SEEK_CUR);
905         if (err)
906             return SOX_EOF;
907     }
908 
909     if (isRF64) {
910         if (wav->numSamples == UINT32_MAX)
911             wav->numSamples = ds64_sample_count;
912 
913         if (qwRiffLength == UINT32_MAX)
914             qwRiffLength = ds64_riff_size;
915     }
916 
917     if (!have_fmt) {
918         lsx_fail_errno(ft, SOX_EOF, "fmt chunk not found");
919         return SOX_EOF;
920     }
921 
922     if (!wav->dataStart) {
923         lsx_fail_errno(ft, SOX_EOF, "data chunk not found");
924         return SOX_EOF;
925     }
926 
927     if (ft->seekable)
928         lsx_seeki(ft, wav->dataStart, SEEK_SET);
929 
930     /* some files wrongly report total samples across all channels */
931     if (wav->numSamples * wav->blockAlign == qwDataLength * ft->signal.channels)
932         wav->numSamples /= ft->signal.channels;
933 
934     if ((qwDataLength == UINT32_MAX && !wav->numSamples) ||
935         qwDataLength == MS_UNSPEC) {
936         lsx_warn("WAV data length is magic value or UINT32_MAX, ignoring");
937         wav->ignoreSize = 1;
938     }
939 
940     switch (wav->formatTag) {
941     case WAVE_FORMAT_ADPCM:
942         wav->numSamples =
943             lsx_ms_adpcm_samples_in(qwDataLength, ft->signal.channels,
944                                     wav->blockAlign, wav->samplesPerBlock);
945         wav->blockSamplesRemaining = 0;        /* Samples left in buffer */
946         break;
947 
948     case WAVE_FORMAT_IMA_ADPCM:
949         /* Compute easiest part of number of samples.  For every block, there
950            are samplesPerBlock samples to read. */
951         wav->numSamples =
952             lsx_ima_samples_in(qwDataLength, ft->signal.channels,
953                                wav->blockAlign, wav->samplesPerBlock);
954         wav->blockSamplesRemaining = 0;        /* Samples left in buffer */
955         lsx_ima_init_table();
956         break;
957 
958 #ifdef HAVE_LIBGSM
959     case WAVE_FORMAT_GSM610:
960         wav->numSamples = qwDataLength / wav->blockAlign * wav->samplesPerBlock;
961         wavgsminit(ft);
962         break;
963 #endif
964     }
965 
966     if (!wav->numSamples)
967         wav->numSamples = div_bits(qwDataLength, ft->encoding.bits_per_sample)
968             / ft->signal.channels;
969 
970     if (wav->ignoreSize)
971         ft->signal.length = SOX_UNSPEC;
972     else
973         ft->signal.length = wav->numSamples * ft->signal.channels;
974 
975     return lsx_rawstartread(ft);
976 }
977 
978 
979 /*
980  * Read up to len samples from file.
981  * Convert to signed longs.
982  * Place in buf[].
983  * Return number of samples read.
984  */
985 
read_samples(sox_format_t * ft,sox_sample_t * buf,size_t len)986 static size_t read_samples(sox_format_t *ft, sox_sample_t *buf, size_t len)
987 {
988     priv_t *wav = ft->priv;
989     size_t done;
990 
991     ft->sox_errno = SOX_SUCCESS;
992 
993     if (!wav->ignoreSize)
994         len = min(len, wav->numSamples * ft->signal.channels);
995 
996     /* If file is in ADPCM encoding then read in multiple blocks else */
997     /* read as much as possible and return quickly. */
998     switch (ft->encoding.encoding) {
999     case SOX_ENCODING_IMA_ADPCM:
1000     case SOX_ENCODING_MS_ADPCM:
1001         done = 0;
1002         while (done < len) { /* Still want data? */
1003             short *p, *top;
1004             size_t ct;
1005 
1006             /* See if need to read more from disk */
1007             if (wav->blockSamplesRemaining == 0) {
1008                 if (wav->formatTag == WAVE_FORMAT_IMA_ADPCM)
1009                     wav->blockSamplesRemaining = ImaAdpcmReadBlock(ft);
1010                 else
1011                     wav->blockSamplesRemaining = AdpcmReadBlock(ft);
1012 
1013                 if (wav->blockSamplesRemaining == 0) {
1014                     /* Don't try to read any more samples */
1015                     wav->numSamples = 0;
1016                     return done;
1017                 }
1018                 wav->samplePtr = wav->samples;
1019             }
1020 
1021             /* Copy interleaved data into buf, converting to sox_sample_t */
1022             ct = len - done;
1023             if (ct > wav->blockSamplesRemaining * ft->signal.channels)
1024                 ct = wav->blockSamplesRemaining * ft->signal.channels;
1025 
1026             done += ct;
1027             wav->blockSamplesRemaining -= ct / ft->signal.channels;
1028             p = wav->samplePtr;
1029             top = p + ct;
1030 
1031             /* Output is already signed */
1032             while (p < top)
1033                 *buf++ = SOX_SIGNED_16BIT_TO_SAMPLE(*p++,);
1034 
1035             wav->samplePtr = p;
1036         }
1037 
1038         /* "done" for ADPCM equals total data processed and not
1039          * total samples procesed.  The only way to take care of that
1040          * is to return here and not fall thru.
1041          */
1042         wav->numSamples -= done / ft->signal.channels;
1043 
1044         return done;
1045 
1046 #ifdef HAVE_LIBGSM
1047     case SOX_ENCODING_GSM:
1048         done = wavgsmread(ft, buf, len);
1049         break;
1050 #endif
1051 
1052     default: /* assume PCM or float encoding */
1053         done = lsx_rawread(ft, buf, len);
1054         break;
1055     }
1056 
1057     if (done == 0 && wav->numSamples && !wav->ignoreSize)
1058         lsx_warn("Premature EOF on .wav input file");
1059 
1060     /* Only return buffers that contain a totally playable
1061      * amount of audio.
1062      */
1063     done -= done % ft->signal.channels;
1064 
1065     if (done / ft->signal.channels > wav->numSamples)
1066         wav->numSamples = 0;
1067     else
1068         wav->numSamples -= done / ft->signal.channels;
1069 
1070     return done;
1071 }
1072 
1073 /*
1074  * Do anything required when you stop reading samples.
1075  * Don't close input file!
1076  */
stopread(sox_format_t * ft)1077 static int stopread(sox_format_t * ft)
1078 {
1079     priv_t *       wav = (priv_t *) ft->priv;
1080 
1081     ft->sox_errno = SOX_SUCCESS;
1082 
1083     free(wav->packet);
1084     free(wav->samples);
1085     free(wav->lsx_ms_adpcm_i_coefs);
1086     free(wav->ms_adpcm_data);
1087 
1088     switch (ft->encoding.encoding)
1089     {
1090 #ifdef HAVE_LIBGSM
1091     case SOX_ENCODING_GSM:
1092         wavgsmdestroy(ft);
1093         break;
1094 #endif
1095     case SOX_ENCODING_IMA_ADPCM:
1096     case SOX_ENCODING_MS_ADPCM:
1097         break;
1098     default:
1099         break;
1100     }
1101     return SOX_SUCCESS;
1102 }
1103 
startwrite(sox_format_t * ft)1104 static int startwrite(sox_format_t * ft)
1105 {
1106     priv_t * wav = (priv_t *) ft->priv;
1107     int rc;
1108 
1109     ft->sox_errno = SOX_SUCCESS;
1110 
1111     if (ft->encoding.encoding != SOX_ENCODING_MS_ADPCM &&
1112         ft->encoding.encoding != SOX_ENCODING_IMA_ADPCM &&
1113         ft->encoding.encoding != SOX_ENCODING_GSM)
1114     {
1115         rc = lsx_rawstartwrite(ft);
1116         if (rc)
1117             return rc;
1118     }
1119 
1120     wav->numSamples = 0;
1121     wav->dataLength = 0;
1122     if (!ft->signal.length && !ft->seekable)
1123         lsx_warn("Length in output .wav header will be wrong since can't seek to fix it");
1124 
1125     rc = wavwritehdr(ft, 0);  /* also calculates various wav->* info */
1126     if (rc != 0)
1127         return rc;
1128 
1129     wav->packet = NULL;
1130     wav->samples = NULL;
1131     wav->lsx_ms_adpcm_i_coefs = NULL;
1132     switch (wav->formatTag)
1133     {
1134         size_t ch, sbsize;
1135 
1136         case WAVE_FORMAT_IMA_ADPCM:
1137             lsx_ima_init_table();
1138         /* intentional case fallthru! */
1139         case WAVE_FORMAT_ADPCM:
1140             /* #channels already range-checked for overflow in wavwritehdr() */
1141             for (ch=0; ch<ft->signal.channels; ch++)
1142                 wav->state[ch] = 0;
1143             sbsize = ft->signal.channels * wav->samplesPerBlock;
1144             wav->packet = lsx_malloc((size_t)wav->blockAlign);
1145             wav->samples = lsx_malloc(sbsize*sizeof(short));
1146             wav->sampleTop = wav->samples + sbsize;
1147             wav->samplePtr = wav->samples;
1148             break;
1149 
1150 #ifdef HAVE_LIBGSM
1151         case WAVE_FORMAT_GSM610:
1152             return wavgsminit(ft);
1153 #endif
1154 
1155         default:
1156             break;
1157     }
1158     return SOX_SUCCESS;
1159 }
1160 
1161 /* wavwritehdr:  write .wav headers as follows:
1162 
1163 bytes      variable      description
1164 0  - 3     'RIFF'/'RIFX' Little/Big-endian
1165 4  - 7     wRiffLength   length of file minus the 8 byte riff header
1166 8  - 11    'WAVE'
1167 12 - 15    'fmt '
1168 16 - 19    wFmtSize       length of format chunk minus 8 byte header
1169 20 - 21    wFormatTag     identifies PCM, ULAW etc
1170 22 - 23    wChannels
1171 24 - 27    dwSamplesPerSecond  samples per second per channel
1172 28 - 31    dwAvgBytesPerSec    non-trivial for compressed formats
1173 32 - 33    wBlockAlign         basic block size
1174 34 - 35    wBitsPerSample      non-trivial for compressed formats
1175 
1176 PCM formats then go straight to the data chunk:
1177 36 - 39    'data'
1178 40 - 43     dwDataLength   length of data chunk minus 8 byte header
1179 44 - (dwDataLength + 43)   the data
1180 (+ a padding byte if dwDataLength is odd)
1181 
1182 non-PCM formats must write an extended format chunk and a fact chunk:
1183 
1184 ULAW, ALAW formats:
1185 36 - 37    wExtSize = 0  the length of the format extension
1186 38 - 41    'fact'
1187 42 - 45    dwFactSize = 4  length of the fact chunk minus 8 byte header
1188 46 - 49    dwSamplesWritten   actual number of samples written out
1189 50 - 53    'data'
1190 54 - 57     dwDataLength  length of data chunk minus 8 byte header
1191 58 - (dwDataLength + 57)  the data
1192 (+ a padding byte if dwDataLength is odd)
1193 
1194 
1195 GSM6.10  format:
1196 36 - 37    wExtSize = 2 the length in bytes of the format-dependent extension
1197 38 - 39    320           number of samples per  block
1198 40 - 43    'fact'
1199 44 - 47    dwFactSize = 4  length of the fact chunk minus 8 byte header
1200 48 - 51    dwSamplesWritten   actual number of samples written out
1201 52 - 55    'data'
1202 56 - 59     dwDataLength  length of data chunk minus 8 byte header
1203 60 - (dwDataLength + 59)  the data (including a padding byte, if necessary,
1204                             so dwDataLength is always even)
1205 
1206 
1207 note that header contains (up to) 3 separate ways of describing the
1208 length of the file, all derived here from the number of (input)
1209 samples wav->numSamples in a way that is non-trivial for the blocked
1210 and padded compressed formats:
1211 
1212 wRiffLength -      (riff header) the length of the file, minus 8
1213 dwSamplesWritten - (fact header) the number of samples written (after padding
1214                    to a complete block eg for GSM)
1215 dwDataLength     - (data chunk header) the number of (valid) data bytes written
1216 
1217 */
1218 
wavwritehdr(sox_format_t * ft,int second_header)1219 static int wavwritehdr(sox_format_t * ft, int second_header)
1220 {
1221     priv_t *       wav = (priv_t *) ft->priv;
1222 
1223     /* variables written to wav file header */
1224     /* RIFF header */
1225     uint64_t wRiffLength ;  /* length of file after 8 byte riff header */
1226     /* fmt chunk */
1227     uint16_t wFmtSize = 16;       /* size field of the fmt chunk */
1228     uint16_t wFormatTag = 0;      /* data format */
1229     uint16_t wChannels;           /* number of channels */
1230     uint32_t dwSamplesPerSecond;  /* samples per second per channel*/
1231     uint32_t dwAvgBytesPerSec=0;  /* estimate of bytes per second needed */
1232     uint32_t wBlockAlign=0;       /* byte alignment of a basic sample block */
1233     uint16_t wBitsPerSample=0;    /* bits per sample */
1234     /* fmt chunk extension (not PCM) */
1235     uint16_t wExtSize=0;          /* extra bytes in the format extension */
1236     uint16_t wSamplesPerBlock;    /* samples per channel per block */
1237     /* wSamplesPerBlock and other things may go into format extension */
1238 
1239     /* fact chunk (not PCM) */
1240     uint32_t dwFactSize=4;        /* length of the fact chunk */
1241     uint64_t dwSamplesWritten=0;  /* windows doesnt seem to use this*/
1242 
1243     /* data chunk */
1244     uint64_t dwDataLength;        /* length of sound data in bytes */
1245     /* end of variables written to header */
1246 
1247     /* internal variables, intermediate values etc */
1248     int bytespersample; /* (uncompressed) bytes per sample (per channel) */
1249     uint64_t blocksWritten = 0;
1250     sox_bool isExtensible = sox_false;    /* WAVE_FORMAT_EXTENSIBLE? */
1251 
1252     if (ft->signal.channels > UINT16_MAX) {
1253         lsx_fail_errno(ft, SOX_EOF, "Too many channels (%u)",
1254                        ft->signal.channels);
1255         return SOX_EOF;
1256     }
1257 
1258     dwSamplesPerSecond = ft->signal.rate;
1259     wChannels = ft->signal.channels;
1260     wBitsPerSample = ft->encoding.bits_per_sample;
1261     wSamplesPerBlock = 1;       /* common default for PCM data */
1262 
1263     switch (ft->encoding.encoding)
1264     {
1265         case SOX_ENCODING_UNSIGNED:
1266         case SOX_ENCODING_SIGN2:
1267             wFormatTag = WAVE_FORMAT_PCM;
1268             bytespersample = (wBitsPerSample + 7)/8;
1269             wBlockAlign = wChannels * bytespersample;
1270             break;
1271         case SOX_ENCODING_FLOAT:
1272             wFormatTag = WAVE_FORMAT_IEEE_FLOAT;
1273             bytespersample = (wBitsPerSample + 7)/8;
1274             wBlockAlign = wChannels * bytespersample;
1275             break;
1276         case SOX_ENCODING_ALAW:
1277             wFormatTag = WAVE_FORMAT_ALAW;
1278             wBlockAlign = wChannels;
1279             break;
1280         case SOX_ENCODING_ULAW:
1281             wFormatTag = WAVE_FORMAT_MULAW;
1282             wBlockAlign = wChannels;
1283             break;
1284         case SOX_ENCODING_IMA_ADPCM:
1285             if (wChannels>16)
1286             {
1287                 lsx_fail_errno(ft,SOX_EOF,"Channels(%d) must be <= 16",wChannels);
1288                 return SOX_EOF;
1289             }
1290             wFormatTag = WAVE_FORMAT_IMA_ADPCM;
1291             wBlockAlign = wChannels * 256; /* reasonable default */
1292             wBitsPerSample = 4;
1293             wExtSize = 2;
1294             wSamplesPerBlock = lsx_ima_samples_in((size_t) 0, (size_t) wChannels, (size_t) wBlockAlign, (size_t) 0);
1295             break;
1296         case SOX_ENCODING_MS_ADPCM:
1297             if (wChannels>16)
1298             {
1299                 lsx_fail_errno(ft,SOX_EOF,"Channels(%d) must be <= 16",wChannels);
1300                 return SOX_EOF;
1301             }
1302             wFormatTag = WAVE_FORMAT_ADPCM;
1303             wBlockAlign = ft->signal.rate / 11008;
1304             wBlockAlign = max(wBlockAlign, 1) * wChannels * 256;
1305             wBitsPerSample = 4;
1306             wExtSize = 4+4*7;      /* Ext fmt data length */
1307             wSamplesPerBlock = lsx_ms_adpcm_samples_in((size_t) 0, (size_t) wChannels, (size_t) wBlockAlign, (size_t) 0);
1308             break;
1309 #ifdef HAVE_LIBGSM
1310         case SOX_ENCODING_GSM:
1311             if (wChannels!=1)
1312             {
1313                 lsx_report("Overriding GSM audio from %d channel to 1",wChannels);
1314                 if (!second_header)
1315                   ft->signal.length /= max(1, ft->signal.channels);
1316                 wChannels = ft->signal.channels = 1;
1317             }
1318             wFormatTag = WAVE_FORMAT_GSM610;
1319             /* dwAvgBytesPerSec = 1625*(dwSamplesPerSecond/8000.)+0.5; */
1320             wBlockAlign=65;
1321             wBitsPerSample=0;  /* not representable as int   */
1322             wExtSize=2;        /* length of format extension */
1323             wSamplesPerBlock = 320;
1324             break;
1325 #endif
1326         default:
1327                 break;
1328     }
1329 
1330     if (wBlockAlign > UINT16_MAX) {
1331         lsx_fail_errno(ft, SOX_EOF, "Too many channels (%u)",
1332                        ft->signal.channels);
1333         return SOX_EOF;
1334     }
1335 
1336     wav->formatTag = wFormatTag;
1337     wav->blockAlign = wBlockAlign;
1338     wav->samplesPerBlock = wSamplesPerBlock;
1339 
1340     /* When creating header, use length hint given by input file.  If no
1341      * hint then write default value.  Also, use default value even
1342      * on header update if more then 32-bit length needs to be written.
1343      */
1344 
1345     dwSamplesWritten =
1346         second_header ? wav->numSamples : ft->signal.length / wChannels;
1347     blocksWritten =
1348         (dwSamplesWritten + wSamplesPerBlock - 1) / wSamplesPerBlock;
1349     dwDataLength = blocksWritten * wBlockAlign;
1350 
1351     if (wFormatTag == WAVE_FORMAT_GSM610)
1352         dwDataLength = (dwDataLength+1) & ~1u; /* round up to even */
1353 
1354     if (wFormatTag == WAVE_FORMAT_PCM && (wBitsPerSample > 16 || wChannels > 2)
1355         && strcmp(ft->filetype, "wavpcm")) {
1356       isExtensible = sox_true;
1357       wFmtSize += 2 + 22;
1358     }
1359     else if (wFormatTag != WAVE_FORMAT_PCM)
1360         wFmtSize += 2+wExtSize; /* plus ExtData */
1361 
1362     wRiffLength = 4 + (8+wFmtSize) + (8+dwDataLength+dwDataLength%2);
1363     if (isExtensible || wFormatTag != WAVE_FORMAT_PCM) /* PCM omits the "fact" chunk */
1364         wRiffLength += (8+dwFactSize);
1365 
1366     if (dwSamplesWritten > UINT32_MAX)
1367         dwSamplesWritten = UINT32_MAX;
1368 
1369     if (dwDataLength > UINT32_MAX)
1370         dwDataLength = UINT32_MAX;
1371 
1372     if (!second_header && !ft->signal.length)
1373         dwDataLength = UINT32_MAX;
1374 
1375     if (wRiffLength > UINT32_MAX)
1376         wRiffLength = UINT32_MAX;
1377 
1378     /* dwAvgBytesPerSec <-- this is BEFORE compression, isn't it? guess not. */
1379     dwAvgBytesPerSec = (double)wBlockAlign*ft->signal.rate / (double)wSamplesPerBlock + 0.5;
1380 
1381     /* figured out header info, so write it */
1382 
1383     /* If user specified opposite swap than we think, assume they are
1384      * asking to write a RIFX file.
1385      */
1386     if (ft->encoding.reverse_bytes == MACHINE_IS_LITTLEENDIAN)
1387     {
1388         if (!second_header)
1389             lsx_report("Requested to swap bytes so writing RIFX header");
1390         lsx_writes(ft, "RIFX");
1391     }
1392     else
1393         lsx_writes(ft, "RIFF");
1394     lsx_writedw(ft, wRiffLength);
1395     lsx_writes(ft, "WAVE");
1396     lsx_writes(ft, "fmt ");
1397     lsx_writedw(ft, wFmtSize);
1398     lsx_writew(ft, isExtensible ? WAVE_FORMAT_EXTENSIBLE : wFormatTag);
1399     lsx_writew(ft, wChannels);
1400     lsx_writedw(ft, dwSamplesPerSecond);
1401     lsx_writedw(ft, dwAvgBytesPerSec);
1402     lsx_writew(ft, wBlockAlign);
1403     lsx_writew(ft, wBitsPerSample); /* end info common to all fmts */
1404 
1405     if (isExtensible) {
1406       uint32_t dwChannelMask=0;  /* unassigned speaker mapping by default */
1407       static unsigned char const guids[][14] = {
1408         "\x00\x00\x00\x00\x10\x00\x80\x00\x00\xAA\x00\x38\x9B\x71",  /* wav */
1409         "\x00\x00\x21\x07\xd3\x11\x86\x44\xc8\xc1\xca\x00\x00\x00"}; /* amb */
1410 
1411       /* if not amb, assume most likely channel masks from number of channels; not
1412        * ideal solution, but will make files playable in many/most situations
1413        */
1414       if (strcmp(ft->filetype, "amb")) {
1415         if      (wChannels == 1) dwChannelMask = 0x4;     /* 1 channel (mono) = FC */
1416         else if (wChannels == 2) dwChannelMask = 0x3;     /* 2 channels (stereo) = FL, FR */
1417         else if (wChannels == 4) dwChannelMask = 0x33;    /* 4 channels (quad) = FL, FR, BL, BR */
1418         else if (wChannels == 6) dwChannelMask = 0x3F;    /* 6 channels (5.1) = FL, FR, FC, LF, BL, BR */
1419         else if (wChannels == 8) dwChannelMask = 0x63F;   /* 8 channels (7.1) = FL, FR, FC, LF, BL, BR, SL, SR */
1420       }
1421 
1422       lsx_writew(ft, 22);
1423       lsx_writew(ft, wBitsPerSample); /* No padding in container */
1424       lsx_writedw(ft, dwChannelMask); /* Speaker mapping is something reasonable */
1425       lsx_writew(ft, wFormatTag);
1426       lsx_writebuf(ft, guids[!strcmp(ft->filetype, "amb")], (size_t)14);
1427     }
1428     else
1429     /* if not PCM, we need to write out wExtSize even if wExtSize=0 */
1430     if (wFormatTag != WAVE_FORMAT_PCM)
1431         lsx_writew(ft,wExtSize);
1432 
1433     switch (wFormatTag)
1434     {
1435         int i;
1436         case WAVE_FORMAT_IMA_ADPCM:
1437         lsx_writew(ft, wSamplesPerBlock);
1438         break;
1439         case WAVE_FORMAT_ADPCM:
1440         lsx_writew(ft, wSamplesPerBlock);
1441         lsx_writew(ft, 7); /* nCoefs */
1442         for (i=0; i<7; i++) {
1443             lsx_writew(ft, (uint16_t)(lsx_ms_adpcm_i_coef[i][0]));
1444             lsx_writew(ft, (uint16_t)(lsx_ms_adpcm_i_coef[i][1]));
1445         }
1446         break;
1447         case WAVE_FORMAT_GSM610:
1448         lsx_writew(ft, wSamplesPerBlock);
1449         break;
1450         default:
1451         break;
1452     }
1453 
1454     /* if not PCM, write the 'fact' chunk */
1455     if (isExtensible || wFormatTag != WAVE_FORMAT_PCM){
1456         lsx_writes(ft, "fact");
1457         lsx_writedw(ft,dwFactSize);
1458         lsx_writedw(ft,dwSamplesWritten);
1459     }
1460 
1461     lsx_writes(ft, "data");
1462     lsx_writedw(ft, dwDataLength);               /* data chunk size */
1463 
1464     if (!second_header) {
1465         lsx_debug("Writing Wave file: %s format, %d channel%s, %d samp/sec",
1466                 wav_format_str(wFormatTag), wChannels,
1467                 wChannels == 1 ? "" : "s", dwSamplesPerSecond);
1468         lsx_debug("        %d byte/sec, %d block align, %d bits/samp",
1469                 dwAvgBytesPerSec, wBlockAlign, wBitsPerSample);
1470     } else {
1471         if (wRiffLength == UINT32_MAX || dwDataLength == UINT32_MAX ||
1472             dwSamplesWritten == UINT32_MAX)
1473             lsx_warn("File too large, writing truncated values in header");
1474 
1475         lsx_debug("Finished writing Wave file, %"PRIu64" data bytes %"PRIu64" samples",
1476                   dwDataLength, wav->numSamples);
1477 #ifdef HAVE_LIBGSM
1478         if (wFormatTag == WAVE_FORMAT_GSM610){
1479             lsx_debug("GSM6.10 format: %"PRIu64" blocks %"PRIu64" padded samples %"PRIu64" padded data bytes",
1480                     blocksWritten, dwSamplesWritten, dwDataLength);
1481             if (wav->gsmbytecount != dwDataLength)
1482                 lsx_warn("help ! internal inconsistency - data_written %"PRIu64" gsmbytecount %zu",
1483                          dwDataLength, wav->gsmbytecount);
1484 
1485         }
1486 #endif
1487     }
1488     return SOX_SUCCESS;
1489 }
1490 
write_samples(sox_format_t * ft,const sox_sample_t * buf,size_t len)1491 static size_t write_samples(sox_format_t * ft, const sox_sample_t *buf, size_t len)
1492 {
1493         priv_t *   wav = (priv_t *) ft->priv;
1494         ptrdiff_t total_len = len;
1495 
1496         ft->sox_errno = SOX_SUCCESS;
1497 
1498         switch (wav->formatTag)
1499         {
1500         case WAVE_FORMAT_IMA_ADPCM:
1501         case WAVE_FORMAT_ADPCM:
1502             while (len>0) {
1503                 short *p = wav->samplePtr;
1504                 short *top = wav->sampleTop;
1505 
1506                 if (top>p+len) top = p+len;
1507                 len -= top-p; /* update residual len */
1508                 while (p < top)
1509                    *p++ = (*buf++) >> 16;
1510 
1511                 wav->samplePtr = p;
1512                 if (p == wav->sampleTop)
1513                     xxxAdpcmWriteBlock(ft);
1514 
1515             }
1516             return total_len - len;
1517             break;
1518 
1519 #ifdef HAVE_LIBGSM
1520         case WAVE_FORMAT_GSM610:
1521             len = wavgsmwrite(ft, buf, len);
1522             wav->numSamples += (len/ft->signal.channels);
1523             return len;
1524             break;
1525 #endif
1526 
1527         default:
1528             len = lsx_rawwrite(ft, buf, len);
1529             wav->numSamples += (len/ft->signal.channels);
1530             return len;
1531         }
1532 }
1533 
stopwrite(sox_format_t * ft)1534 static int stopwrite(sox_format_t * ft)
1535 {
1536         priv_t *   wav = (priv_t *) ft->priv;
1537 
1538         ft->sox_errno = SOX_SUCCESS;
1539 
1540 
1541         /* Call this to flush out any remaining data. */
1542         switch (wav->formatTag)
1543         {
1544         case WAVE_FORMAT_IMA_ADPCM:
1545         case WAVE_FORMAT_ADPCM:
1546             xxxAdpcmWriteBlock(ft);
1547             break;
1548 #ifdef HAVE_LIBGSM
1549         case WAVE_FORMAT_GSM610:
1550             wavgsmstopwrite(ft);
1551             break;
1552 #endif
1553         }
1554 
1555         /* Add a pad byte if the number of data bytes is odd.
1556            See wavwritehdr() above for the calculation. */
1557         if (wav->formatTag != WAVE_FORMAT_GSM610)
1558           lsx_padbytes(ft, (size_t)((wav->numSamples + wav->samplesPerBlock - 1)/wav->samplesPerBlock*wav->blockAlign) % 2);
1559 
1560         free(wav->packet);
1561         free(wav->samples);
1562         free(wav->lsx_ms_adpcm_i_coefs);
1563 
1564         /* All samples are already written out. */
1565         /* If file header needs fixing up, for example it needs the */
1566         /* the number of samples in a field, seek back and write them here. */
1567         if (ft->signal.length && wav->numSamples <= 0xffffffff &&
1568             wav->numSamples == ft->signal.length)
1569           return SOX_SUCCESS;
1570         if (!ft->seekable)
1571           return SOX_EOF;
1572 
1573         if (lsx_seeki(ft, (off_t)0, SEEK_SET) != 0)
1574         {
1575                 lsx_fail_errno(ft,SOX_EOF,"Can't rewind output file to rewrite .wav header.");
1576                 return SOX_EOF;
1577         }
1578 
1579         return (wavwritehdr(ft, 1));
1580 }
1581 
1582 /*
1583  * Return a string corresponding to the wave format type.
1584  */
wav_format_str(unsigned tag)1585 static const char *wav_format_str(unsigned tag)
1586 {
1587     const struct wave_format *f = wav_find_format(tag);
1588     return f ? f->name : "unknown";
1589 }
1590 
seek(sox_format_t * ft,uint64_t offset)1591 static int seek(sox_format_t * ft, uint64_t offset)
1592 {
1593   priv_t *   wav = (priv_t *) ft->priv;
1594 
1595   if (ft->encoding.bits_per_sample & 7)
1596     lsx_fail_errno(ft, SOX_ENOTSUP, "seeking not supported with this encoding");
1597   else if (wav->formatTag == WAVE_FORMAT_GSM610) {
1598     int alignment;
1599     size_t gsmoff;
1600 
1601     /* rounding bytes to blockAlign so that we
1602      * don't have to decode partial block. */
1603     gsmoff = offset * wav->blockAlign / wav->samplesPerBlock +
1604              wav->blockAlign * ft->signal.channels / 2;
1605     gsmoff -= gsmoff % (wav->blockAlign * ft->signal.channels);
1606 
1607     ft->sox_errno = lsx_seeki(ft, (off_t)(gsmoff + wav->dataStart), SEEK_SET);
1608     if (ft->sox_errno == SOX_SUCCESS) {
1609       /* offset is in samples */
1610       uint64_t new_offset = offset;
1611       alignment = offset % wav->samplesPerBlock;
1612       if (alignment != 0)
1613           new_offset += (wav->samplesPerBlock - alignment);
1614       wav->numSamples = ft->signal.length - (new_offset / ft->signal.channels);
1615     }
1616   } else {
1617     double wide_sample = offset - (offset % ft->signal.channels);
1618     double to_d = wide_sample * ft->encoding.bits_per_sample / 8;
1619     off_t to = to_d;
1620     ft->sox_errno = (to != to_d)? SOX_EOF : lsx_seeki(ft, (off_t)wav->dataStart + (off_t)to, SEEK_SET);
1621     if (ft->sox_errno == SOX_SUCCESS)
1622       wav->numSamples -= (size_t)wide_sample / ft->signal.channels;
1623   }
1624 
1625   return ft->sox_errno;
1626 }
1627 
LSX_FORMAT_HANDLER(wav)1628 LSX_FORMAT_HANDLER(wav)
1629 {
1630   static char const * const names[] = {"wav", "wavpcm", "amb", NULL};
1631   static unsigned const write_encodings[] = {
1632     SOX_ENCODING_SIGN2, 16, 24, 32, 0,
1633     SOX_ENCODING_UNSIGNED, 8, 0,
1634     SOX_ENCODING_ULAW, 8, 0,
1635     SOX_ENCODING_ALAW, 8, 0,
1636 #ifdef HAVE_LIBGSM
1637     SOX_ENCODING_GSM, 0,
1638 #endif
1639     SOX_ENCODING_MS_ADPCM, 4, 0,
1640     SOX_ENCODING_IMA_ADPCM, 4, 0,
1641     SOX_ENCODING_FLOAT, 32, 64, 0,
1642     0};
1643   static sox_format_handler_t const handler = {SOX_LIB_VERSION_CODE,
1644     "Microsoft audio format", names, SOX_FILE_LIT_END,
1645     startread, read_samples, stopread,
1646     startwrite, write_samples, stopwrite,
1647     seek, write_encodings, NULL, sizeof(priv_t)
1648   };
1649   return &handler;
1650 }
1651