1 // Ogg Vorbis audio decoder - v1.10 - public domain
2 // http://nothings.org/stb_vorbis/
3 //
4 // Original version written by Sean Barrett in 2007.
5 //
6 // Originally sponsored by RAD Game Tools. Seeking sponsored
7 // by Phillip Bennefall, Marc Andersen, Aaron Baker, Elias Software,
8 // Aras Pranckevicius, and Sean Barrett.
9 //
10 // LICENSE
11 //
12 //   See end of file for license information.
13 //
14 // Limitations:
15 //
16 //   - floor 0 not supported (used in old ogg vorbis files pre-2004)
17 //   - lossless sample-truncation at beginning ignored
18 //   - cannot concatenate multiple vorbis streams
19 //   - sample positions are 32-bit, limiting seekable 192Khz
20 //       files to around 6 hours (Ogg supports 64-bit)
21 //
22 // Feature contributors:
23 //    Dougall Johnson (sample-exact seeking)
24 //
25 // Bugfix/warning contributors:
26 //    Terje Mathisen     Niklas Frykholm     Andy Hill
27 //    Casey Muratori     John Bolton         Gargaj
28 //    Laurent Gomila     Marc LeBlanc        Ronny Chevalier
29 //    Bernhard Wodo      Evan Balster        alxprd@github
30 //    Tom Beaumont       Ingo Leitgeb        Nicolas Guillemot
31 //    Phillip Bennefall  Rohit               Thiago Goulart
32 //    manxorist@github   saga musix
33 //
34 // Partial history:
35 //    1.10    - 2017/03/03 - more robust seeking; fix negative ilog(); clear error in open_memory
36 //    1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
37 //    1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
38 //                           avoid discarding last frame of audio data
39 //    1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
40 //                           some more crash fixes when out of memory or with corrupt files
41 //    1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
42 //                           some crash fixes when out of memory or with corrupt files
43 //                           fix some inappropriately signed shifts
44 //    1.05    - 2015/04/19 - don't define __forceinline if it's redundant
45 //    1.04    - 2014/08/27 - fix missing const-correct case in API
46 //    1.03    - 2014/08/07 - warning fixes
47 //    1.02    - 2014/07/09 - declare qsort comparison as explicitly _cdecl in Windows
48 //    1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float (interleaved was correct)
49 //    1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in >2-channel;
50 //                           (API change) report sample rate for decode-full-file funcs
51 //    0.99996 -            - bracket #include <malloc.h> for macintosh compilation
52 //    0.99995 -            - avoid alias-optimization issue in float-to-int conversion
53 //
54 // See end of file for full version history.
55 // D translation by Ketmar // Invisible Vector
56 // stolen by adam and module renamed.
57 /++
58 	Port of stb_vorbis to D. Provides .ogg audio file reading capabilities. See [arsd.simpleaudio] for code that can use this to actually load and play the file.
59 +/
60 module arsd.vorbis;
61 
62 import core.stdc.stdio : FILE;
63 
64 version(Windows)
65 	extern(C) int lrintf(float f) { return cast(int) f; }
66 
67 nothrow /*@trusted*/:
68 @nogc { // code block, as c macro helper is not @nogc; yet it's CTFE-only
69 // import it here, as druntime has no `@nogc` on it (for a reason)
70 private extern(C) void qsort (void* base, size_t nmemb, size_t size, int function(in void*, in void*) compar);
71 
72 
73 //////////////////////////////////////////////////////////////////////////////
74 //
75 //  HEADER BEGINS HERE
76 //
77 
78 ///////////   THREAD SAFETY
79 
80 // Individual VorbisDecoder handles are not thread-safe; you cannot decode from
81 // them from multiple threads at the same time. However, you can have multiple
82 // VorbisDecoder handles and decode from them independently in multiple thrads.
83 
84 
85 ///////////   MEMORY ALLOCATION
86 
87 // normally stb_vorbis uses malloc() to allocate memory at startup,
88 // and alloca() to allocate temporary memory during a frame on the
89 // stack. (Memory consumption will depend on the amount of setup
90 // data in the file and how you set the compile flags for speed
91 // vs. size. In my test files the maximal-size usage is ~150KB.)
92 //
93 // You can modify the wrapper functions in the source (setup_malloc,
94 // setup_temp_malloc, temp_malloc) to change this behavior, or you
95 // can use a simpler allocation model: you pass in a buffer from
96 // which stb_vorbis will allocate _all_ its memory (including the
97 // temp memory). "open" may fail with a VORBIS_outofmem if you
98 // do not pass in enough data; there is no way to determine how
99 // much you do need except to succeed (at which point you can
100 // query get_info to find the exact amount required. yes I know
101 // this is lame).
102 //
103 // If you pass in a non-null buffer of the type below, allocation
104 // will occur from it as described above. Otherwise just pass null
105 // to use malloc()/alloca()
106 
107 public struct stb_vorbis_alloc {
108   ubyte* alloc_buffer;
109   int alloc_buffer_length_in_bytes;
110 }
111 
112 
113 ///////////   FUNCTIONS USEABLE WITH ALL INPUT MODES
114 
115 /*
116 public struct stb_vorbis_info {
117   uint sample_rate;
118   int channels;
119 
120   uint setup_memory_required;
121   uint setup_temp_memory_required;
122   uint temp_memory_required;
123 
124   int max_frame_size;
125 }
126 */
127 
128 
129 /* ************************************************************************** *
130 // get general information about the file
131 stb_vorbis_info stb_vorbis_get_info (VorbisDecoder f);
132 
133 // get the last error detected (clears it, too)
134 int stb_vorbis_get_error (VorbisDecoder f);
135 
136 // close an ogg vorbis file and free all memory in use
137 void stb_vorbis_close (VorbisDecoder f);
138 
139 // this function returns the offset (in samples) from the beginning of the
140 // file that will be returned by the next decode, if it is known, or -1
141 // otherwise. after a flush_pushdata() call, this may take a while before
142 // it becomes valid again.
143 // NOT WORKING YET after a seek with PULLDATA API
144 int stb_vorbis_get_sample_offset (VorbisDecoder f);
145 
146 // returns the current seek point within the file, or offset from the beginning
147 // of the memory buffer. In pushdata mode it returns 0.
148 uint stb_vorbis_get_file_offset (VorbisDecoder f);
149 
150 
151 ///////////   PUSHDATA API
152 
153 // this API allows you to get blocks of data from any source and hand
154 // them to stb_vorbis. you have to buffer them; stb_vorbis will tell
155 // you how much it used, and you have to give it the rest next time;
156 // and stb_vorbis may not have enough data to work with and you will
157 // need to give it the same data again PLUS more. Note that the Vorbis
158 // specification does not bound the size of an individual frame.
159 
160 // create a vorbis decoder by passing in the initial data block containing
161 //    the ogg&vorbis headers (you don't need to do parse them, just provide
162 //    the first N bytes of the file--you're told if it's not enough, see below)
163 // on success, returns an VorbisDecoder, does not set error, returns the amount of
164 //    data parsed/consumed on this call in *datablock_memory_consumed_in_bytes;
165 // on failure, returns null on error and sets *error, does not change *datablock_memory_consumed
166 // if returns null and *error is VORBIS_need_more_data, then the input block was
167 //       incomplete and you need to pass in a larger block from the start of the file
168 VorbisDecoder stb_vorbis_open_pushdata (
169               ubyte* datablock, int datablock_length_in_bytes,
170               int* datablock_memory_consumed_in_bytes,
171               int* error,
172               stb_vorbis_alloc* alloc_buffer
173             );
174 
175 // decode a frame of audio sample data if possible from the passed-in data block
176 //
177 // return value: number of bytes we used from datablock
178 //
179 // possible cases:
180 //     0 bytes used, 0 samples output (need more data)
181 //     N bytes used, 0 samples output (resynching the stream, keep going)
182 //     N bytes used, M samples output (one frame of data)
183 // note that after opening a file, you will ALWAYS get one N-bytes, 0-sample
184 // frame, because Vorbis always "discards" the first frame.
185 //
186 // Note that on resynch, stb_vorbis will rarely consume all of the buffer,
187 // instead only datablock_length_in_bytes-3 or less. This is because it wants
188 // to avoid missing parts of a page header if they cross a datablock boundary,
189 // without writing state-machiney code to record a partial detection.
190 //
191 // The number of channels returned are stored in *channels (which can be
192 // null--it is always the same as the number of channels reported by
193 // get_info). *output will contain an array of float* buffers, one per
194 // channel. In other words, (*output)[0][0] contains the first sample from
195 // the first channel, and (*output)[1][0] contains the first sample from
196 // the second channel.
197 int stb_vorbis_decode_frame_pushdata (
198       VorbisDecoder f, ubyte* datablock, int datablock_length_in_bytes,
199       int* channels,   // place to write number of float * buffers
200       float*** output, // place to write float ** array of float * buffers
201       int* samples     // place to write number of output samples
202     );
203 
204 // inform stb_vorbis that your next datablock will not be contiguous with
205 // previous ones (e.g. you've seeked in the data); future attempts to decode
206 // frames will cause stb_vorbis to resynchronize (as noted above), and
207 // once it sees a valid Ogg page (typically 4-8KB, as large as 64KB), it
208 // will begin decoding the _next_ frame.
209 //
210 // if you want to seek using pushdata, you need to seek in your file, then
211 // call stb_vorbis_flush_pushdata(), then start calling decoding, then once
212 // decoding is returning you data, call stb_vorbis_get_sample_offset, and
213 // if you don't like the result, seek your file again and repeat.
214 void stb_vorbis_flush_pushdata (VorbisDecoder f);
215 
216 
217 //////////   PULLING INPUT API
218 
219 // This API assumes stb_vorbis is allowed to pull data from a source--
220 // either a block of memory containing the _entire_ vorbis stream, or a
221 // FILE* that you or it create, or possibly some other reading mechanism
222 // if you go modify the source to replace the FILE* case with some kind
223 // of callback to your code. (But if you don't support seeking, you may
224 // just want to go ahead and use pushdata.)
225 
226 // decode an entire file and output the data interleaved into a malloc()ed
227 // buffer stored in *output. The return value is the number of samples
228 // decoded, or -1 if the file could not be opened or was not an ogg vorbis file.
229 // When you're done with it, just free() the pointer returned in *output.
230 int stb_vorbis_decode_filename (const(char)* filename, int* channels, int* sample_rate, short** output);
231 int stb_vorbis_decode_memory (const(ubyte)* mem, int len, int* channels, int* sample_rate, short** output);
232 
233 // create an ogg vorbis decoder from an ogg vorbis stream in memory (note
234 // this must be the entire stream!). on failure, returns null and sets *error
235 VorbisDecoder stb_vorbis_open_memory (const(ubyte)* data, int len, int* error, stb_vorbis_alloc* alloc_buffer);
236 
237 // create an ogg vorbis decoder from a filename via fopen(). on failure,
238 // returns null and sets *error (possibly to VORBIS_file_open_failure).
239 VorbisDecoder stb_vorbis_open_filename (const(char)* filename, int* error, stb_vorbis_alloc* alloc_buffer);
240 
241 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
242 // the _current_ seek point (ftell). on failure, returns null and sets *error.
243 // note that stb_vorbis must "own" this stream; if you seek it in between
244 // calls to stb_vorbis, it will become confused. Morever, if you attempt to
245 // perform stb_vorbis_seek_*() operations on this file, it will assume it
246 // owns the _entire_ rest of the file after the start point. Use the next
247 // function, stb_vorbis_open_file_section(), to limit it.
248 VorbisDecoder stb_vorbis_open_file (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer);
249 
250 // create an ogg vorbis decoder from an open FILE*, looking for a stream at
251 // the _current_ seek point (ftell); the stream will be of length 'len' bytes.
252 // on failure, returns null and sets *error. note that stb_vorbis must "own"
253 // this stream; if you seek it in between calls to stb_vorbis, it will become
254 // confused.
255 VorbisDecoder stb_vorbis_open_file_section (FILE* f, int close_handle_on_close, int* error, stb_vorbis_alloc* alloc_buffer, uint len);
256 
257 // these functions seek in the Vorbis file to (approximately) 'sample_number'.
258 // after calling seek_frame(), the next call to get_frame_*() will include
259 // the specified sample. after calling stb_vorbis_seek(), the next call to
260 // stb_vorbis_get_samples_* will start with the specified sample. If you
261 // do not need to seek to EXACTLY the target sample when using get_samples_*,
262 // you can also use seek_frame().
263 int stb_vorbis_seek_frame (VorbisDecoder f, uint sample_number);
264 int stb_vorbis_seek (VorbisDecoder f, uint sample_number);
265 
266 // this function is equivalent to stb_vorbis_seek(f, 0)
267 int stb_vorbis_seek_start (VorbisDecoder f);
268 
269 // these functions return the total length of the vorbis stream
270 uint stb_vorbis_stream_length_in_samples (VorbisDecoder f);
271 float stb_vorbis_stream_length_in_seconds (VorbisDecoder f);
272 
273 // decode the next frame and return the number of samples. the number of
274 // channels returned are stored in *channels (which can be null--it is always
275 // the same as the number of channels reported by get_info). *output will
276 // contain an array of float* buffers, one per channel. These outputs will
277 // be overwritten on the next call to stb_vorbis_get_frame_*.
278 //
279 // You generally should not intermix calls to stb_vorbis_get_frame_*()
280 // and stb_vorbis_get_samples_*(), since the latter calls the former.
281 int stb_vorbis_get_frame_float (VorbisDecoder f, int* channels, float*** output);
282 
283 // decode the next frame and return the number of *samples* per channel.
284 // Note that for interleaved data, you pass in the number of shorts (the
285 // size of your array), but the return value is the number of samples per
286 // channel, not the total number of samples.
287 //
288 // The data is coerced to the number of channels you request according to the
289 // channel coercion rules (see below). You must pass in the size of your
290 // buffer(s) so that stb_vorbis will not overwrite the end of the buffer.
291 // The maximum buffer size needed can be gotten from get_info(); however,
292 // the Vorbis I specification implies an absolute maximum of 4096 samples
293 // per channel.
294 int stb_vorbis_get_frame_short_interleaved (VorbisDecoder f, int num_c, short* buffer, int num_shorts);
295 int stb_vorbis_get_frame_short (VorbisDecoder f, int num_c, short** buffer, int num_samples);
296 
297 // Channel coercion rules:
298 //    Let M be the number of channels requested, and N the number of channels present,
299 //    and Cn be the nth channel; let stereo L be the sum of all L and center channels,
300 //    and stereo R be the sum of all R and center channels (channel assignment from the
301 //    vorbis spec).
302 //        M    N       output
303 //        1    k      sum(Ck) for all k
304 //        2    *      stereo L, stereo R
305 //        k    l      k > l, the first l channels, then 0s
306 //        k    l      k <= l, the first k channels
307 //    Note that this is not _good_ surround etc. mixing at all! It's just so
308 //    you get something useful.
309 
310 // gets num_samples samples, not necessarily on a frame boundary--this requires
311 // buffering so you have to supply the buffers. DOES NOT APPLY THE COERCION RULES.
312 // Returns the number of samples stored per channel; it may be less than requested
313 // at the end of the file. If there are no more samples in the file, returns 0.
314 int stb_vorbis_get_samples_float_interleaved (VorbisDecoder f, int channels, float* buffer, int num_floats);
315 int stb_vorbis_get_samples_float (VorbisDecoder f, int channels, float** buffer, int num_samples);
316 
317 // gets num_samples samples, not necessarily on a frame boundary--this requires
318 // buffering so you have to supply the buffers. Applies the coercion rules above
319 // to produce 'channels' channels. Returns the number of samples stored per channel;
320 // it may be less than requested at the end of the file. If there are no more
321 // samples in the file, returns 0.
322 int stb_vorbis_get_samples_short_interleaved (VorbisDecoder f, int channels, short* buffer, int num_shorts);
323 int stb_vorbis_get_samples_short (VorbisDecoder f, int channels, short** buffer, int num_samples);
324 */
325 
326 ////////   ERROR CODES
327 
328 public enum STBVorbisError {
329   no_error,
330 
331   need_more_data = 1,    // not a real error
332 
333   invalid_api_mixing,    // can't mix API modes
334   outofmem,              // not enough memory
335   feature_not_supported, // uses floor 0
336   too_many_channels,     // STB_VORBIS_MAX_CHANNELS is too small
337   file_open_failure,     // fopen() failed
338   seek_without_length,   // can't seek in unknown-length file
339 
340   unexpected_eof = 10,   // file is truncated?
341   seek_invalid,          // seek past EOF
342 
343   // decoding errors (corrupt/invalid stream) -- you probably
344   // don't care about the exact details of these
345 
346   // vorbis errors:
347   invalid_setup = 20,
348   invalid_stream,
349 
350   // ogg errors:
351   missing_capture_pattern = 30,
352   invalid_stream_structure_version,
353   continued_packet_flag_invalid,
354   incorrect_stream_serial_number,
355   invalid_first_page,
356   bad_packet_type,
357   cant_find_last_page,
358   seek_failed,
359 }
360 //
361 //  HEADER ENDS HERE
362 //
363 //////////////////////////////////////////////////////////////////////////////
364 
365 
366 // global configuration settings (e.g. set these in the project/makefile),
367 // or just set them in this file at the top (although ideally the first few
368 // should be visible when the header file is compiled too, although it's not
369 // crucial)
370 
371 // STB_VORBIS_NO_INTEGER_CONVERSION
372 //     does not compile the code for converting audio sample data from
373 //     float to integer (implied by STB_VORBIS_NO_PULLDATA_API)
374 //version = STB_VORBIS_NO_INTEGER_CONVERSION;
375 
376 // STB_VORBIS_NO_FAST_SCALED_FLOAT
377 //      does not use a fast float-to-int trick to accelerate float-to-int on
378 //      most platforms which requires endianness be defined correctly.
379 //version = STB_VORBIS_NO_FAST_SCALED_FLOAT;
380 
381 // STB_VORBIS_MAX_CHANNELS [number]
382 //     globally define this to the maximum number of channels you need.
383 //     The spec does not put a restriction on channels except that
384 //     the count is stored in a byte, so 255 is the hard limit.
385 //     Reducing this saves about 16 bytes per value, so using 16 saves
386 //     (255-16)*16 or around 4KB. Plus anything other memory usage
387 //     I forgot to account for. Can probably go as low as 8 (7.1 audio),
388 //     6 (5.1 audio), or 2 (stereo only).
389 enum STB_VORBIS_MAX_CHANNELS = 16; // enough for anyone?
390 
391 // STB_VORBIS_PUSHDATA_CRC_COUNT [number]
392 //     after a flush_pushdata(), stb_vorbis begins scanning for the
393 //     next valid page, without backtracking. when it finds something
394 //     that looks like a page, it streams through it and verifies its
395 //     CRC32. Should that validation fail, it keeps scanning. But it's
396 //     possible that _while_ streaming through to check the CRC32 of
397 //     one candidate page, it sees another candidate page. This #define
398 //     determines how many "overlapping" candidate pages it can search
399 //     at once. Note that "real" pages are typically ~4KB to ~8KB, whereas
400 //     garbage pages could be as big as 64KB, but probably average ~16KB.
401 //     So don't hose ourselves by scanning an apparent 64KB page and
402 //     missing a ton of real ones in the interim; so minimum of 2
403 enum STB_VORBIS_PUSHDATA_CRC_COUNT = 4;
404 
405 // STB_VORBIS_FAST_HUFFMAN_LENGTH [number]
406 //     sets the log size of the huffman-acceleration table.  Maximum
407 //     supported value is 24. with larger numbers, more decodings are O(1),
408 //     but the table size is larger so worse cache missing, so you'll have
409 //     to probe (and try multiple ogg vorbis files) to find the sweet spot.
410 enum STB_VORBIS_FAST_HUFFMAN_LENGTH = 10;
411 
412 // STB_VORBIS_FAST_BINARY_LENGTH [number]
413 //     sets the log size of the binary-search acceleration table. this
414 //     is used in similar fashion to the fast-huffman size to set initial
415 //     parameters for the binary search
416 
417 // STB_VORBIS_FAST_HUFFMAN_INT
418 //     The fast huffman tables are much more efficient if they can be
419 //     stored as 16-bit results instead of 32-bit results. This restricts
420 //     the codebooks to having only 65535 possible outcomes, though.
421 //     (At least, accelerated by the huffman table.)
422 //version = STB_VORBIS_FAST_HUFFMAN_INT;
423 version(STB_VORBIS_FAST_HUFFMAN_INT) {} else version = STB_VORBIS_FAST_HUFFMAN_SHORT;
424 
425 // STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH
426 //     If the 'fast huffman' search doesn't succeed, then stb_vorbis falls
427 //     back on binary searching for the correct one. This requires storing
428 //     extra tables with the huffman codes in sorted order. Defining this
429 //     symbol trades off space for speed by forcing a linear search in the
430 //     non-fast case, except for "sparse" codebooks.
431 //version = STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH;
432 
433 // STB_VORBIS_DIVIDES_IN_RESIDUE
434 //     stb_vorbis precomputes the result of the scalar residue decoding
435 //     that would otherwise require a divide per chunk. you can trade off
436 //     space for time by defining this symbol.
437 //version = STB_VORBIS_DIVIDES_IN_RESIDUE;
438 
439 // STB_VORBIS_DIVIDES_IN_CODEBOOK
440 //     vorbis VQ codebooks can be encoded two ways: with every case explicitly
441 //     stored, or with all elements being chosen from a small range of values,
442 //     and all values possible in all elements. By default, stb_vorbis expands
443 //     this latter kind out to look like the former kind for ease of decoding,
444 //     because otherwise an integer divide-per-vector-element is required to
445 //     unpack the index. If you define STB_VORBIS_DIVIDES_IN_CODEBOOK, you can
446 //     trade off storage for speed.
447 //version = STB_VORBIS_DIVIDES_IN_CODEBOOK;
448 
449 version(STB_VORBIS_CODEBOOK_SHORTS) static assert(0, "STB_VORBIS_CODEBOOK_SHORTS is no longer supported as it produced incorrect results for some input formats");
450 
451 // STB_VORBIS_DIVIDE_TABLE
452 //     this replaces small integer divides in the floor decode loop with
453 //     table lookups. made less than 1% difference, so disabled by default.
454 //version = STB_VORBIS_DIVIDE_TABLE;
455 
456 // STB_VORBIS_NO_DEFER_FLOOR
457 //     Normally we only decode the floor without synthesizing the actual
458 //     full curve. We can instead synthesize the curve immediately. This
459 //     requires more memory and is very likely slower, so I don't think
460 //     you'd ever want to do it except for debugging.
461 //version = STB_VORBIS_NO_DEFER_FLOOR;
462 //version(STB_VORBIS_CODEBOOK_FLOATS) static assert(0);
463 
464 
465 // ////////////////////////////////////////////////////////////////////////// //
466 private:
467 static assert(STB_VORBIS_MAX_CHANNELS <= 256, "Value of STB_VORBIS_MAX_CHANNELS outside of allowed range");
468 static assert(STB_VORBIS_FAST_HUFFMAN_LENGTH <= 24, "Value of STB_VORBIS_FAST_HUFFMAN_LENGTH outside of allowed range");
469 
470 enum MAX_BLOCKSIZE_LOG = 13; // from specification
471 enum MAX_BLOCKSIZE = (1 << MAX_BLOCKSIZE_LOG);
472 
473 
474 alias codetype = float;
475 
476 // @NOTE
477 //
478 // Some arrays below are tagged "//varies", which means it's actually
479 // a variable-sized piece of data, but rather than malloc I assume it's
480 // small enough it's better to just allocate it all together with the
481 // main thing
482 //
483 // Most of the variables are specified with the smallest size I could pack
484 // them into. It might give better performance to make them all full-sized
485 // integers. It should be safe to freely rearrange the structures or change
486 // the sizes larger--nothing relies on silently truncating etc., nor the
487 // order of variables.
488 
489 enum FAST_HUFFMAN_TABLE_SIZE = (1<<STB_VORBIS_FAST_HUFFMAN_LENGTH);
490 enum FAST_HUFFMAN_TABLE_MASK = (FAST_HUFFMAN_TABLE_SIZE-1);
491 
492 struct Codebook {
493   int dimensions, entries;
494   ubyte* codeword_lengths;
495   float minimum_value;
496   float delta_value;
497   ubyte value_bits;
498   ubyte lookup_type;
499   ubyte sequence_p;
500   ubyte sparse;
501   uint lookup_values;
502   codetype* multiplicands;
503   uint *codewords;
504   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
505     short[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
506   } else {
507     int[FAST_HUFFMAN_TABLE_SIZE] fast_huffman;
508   }
509   uint* sorted_codewords;
510   int* sorted_values;
511   int sorted_entries;
512 }
513 
514 struct Floor0 {
515   ubyte order;
516   ushort rate;
517   ushort bark_map_size;
518   ubyte amplitude_bits;
519   ubyte amplitude_offset;
520   ubyte number_of_books;
521   ubyte[16] book_list; // varies
522 }
523 
524 struct Floor1 {
525   ubyte partitions;
526   ubyte[32] partition_class_list; // varies
527   ubyte[16] class_dimensions; // varies
528   ubyte[16] class_subclasses; // varies
529   ubyte[16] class_masterbooks; // varies
530   short[8][16] subclass_books; // varies
531   ushort[31*8+2] Xlist; // varies
532   ubyte[31*8+2] sorted_order;
533   ubyte[2][31*8+2] neighbors;
534   ubyte floor1_multiplier;
535   ubyte rangebits;
536   int values;
537 }
538 
539 union Floor {
540   Floor0 floor0;
541   Floor1 floor1;
542 }
543 
544 struct Residue {
545   uint begin, end;
546   uint part_size;
547   ubyte classifications;
548   ubyte classbook;
549   ubyte** classdata;
550   //int16 (*residue_books)[8];
551   short[8]* residue_books;
552 }
553 
554 struct MappingChannel {
555   ubyte magnitude;
556   ubyte angle;
557   ubyte mux;
558 }
559 
560 struct Mapping {
561   ushort coupling_steps;
562   MappingChannel* chan;
563   ubyte submaps;
564   ubyte[15] submap_floor; // varies
565   ubyte[15] submap_residue; // varies
566 }
567 
568 struct Mode {
569   ubyte blockflag;
570   ubyte mapping;
571   ushort windowtype;
572   ushort transformtype;
573 }
574 
575 struct CRCscan {
576   uint goal_crc;   // expected crc if match
577   int bytes_left;  // bytes left in packet
578   uint crc_so_far; // running crc
579   int bytes_done;  // bytes processed in _current_ chunk
580   uint sample_loc; // granule pos encoded in page
581 }
582 
583 struct ProbedPage {
584   uint page_start, page_end;
585   uint last_decoded_sample;
586 }
587 
588 private int error (VorbisDecoder f, STBVorbisError e) {
589   f.error = e;
590   if (!f.eof && e != STBVorbisError.need_more_data) {
591     f.error = e; // breakpoint for debugging
592   }
593   return 0;
594 }
595 
596 // these functions are used for allocating temporary memory
597 // while decoding. if you can afford the stack space, use
598 // alloca(); otherwise, provide a temp buffer and it will
599 // allocate out of those.
600 uint temp_alloc_save (VorbisDecoder f) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); return f.alloc.tempSave(f); }
601 void temp_alloc_restore (VorbisDecoder f, uint p) nothrow @nogc { static if (__VERSION__ > 2067) pragma(inline, true); f.alloc.tempRestore(p, f); }
602 void temp_free (VorbisDecoder f, void* p) nothrow @nogc {}
603 /*
604 T* temp_alloc(T) (VorbisDecoder f, uint count) nothrow @nogc {
605   auto res = f.alloc.alloc(count*T.sizeof, f);
606   return cast(T*)res;
607 }
608 */
609 
610 /+
611 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
612 
613 // has to be a mixin, due to `alloca`
614 template temp_alloc(string size) {
615   enum temp_alloc = q{(f.alloc.alloc_buffer ? setup_temp_malloc(f, (${size})) : alloca(${size}))}.cmacroFixVars!("size")(size);
616 }
617 
618 // has to be a mixin, due to `alloca`
619 template temp_block_array(string count, string size) {
620   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
621     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
622 }
623 +/
624 enum array_size_required(string count, string size) = q{((${count})*((void*).sizeof+(${size})))}.cmacroFixVars!("count", "size")(count, size);
625 
626 template temp_alloc(string size) {
627   enum temp_alloc = q{alloca(${size})}.cmacroFixVars!("size")(size);
628 }
629 
630 template temp_block_array(string count, string size) {
631   enum temp_block_array = q{(make_block_array(${tam}, (${count}), (${size})))}
632     .cmacroFixVars!("count", "size", "tam")(count, size, temp_alloc!(array_size_required!(count, size)));
633 }
634 
635 /*
636 T** temp_block_array(T) (VorbisDecoder f, uint count, uint size) {
637   size *= T.sizeof;
638   auto mem = f.alloc.alloc(count*(void*).sizeof+size, f);
639   if (mem !is null) make_block_array(mem, count, size);
640   return cast(T**)mem;
641 }
642 */
643 
644 // given a sufficiently large block of memory, make an array of pointers to subblocks of it
645 private void* make_block_array (void* mem, int count, int size) {
646   void** p = cast(void**)mem;
647   char* q = cast(char*)(p+count);
648   foreach (immutable i; 0..count) {
649     p[i] = q;
650     q += size;
651   }
652   return p;
653 }
654 
655 private T* setup_malloc(T) (VorbisDecoder f, uint sz) {
656   sz *= T.sizeof;
657   /*
658   f.setup_memory_required += sz;
659   if (f.alloc.alloc_buffer) {
660     void* p = cast(char*)f.alloc.alloc_buffer+f.setup_offset;
661     if (f.setup_offset+sz > f.temp_offset) return null;
662     f.setup_offset += sz;
663     return cast(T*)p;
664   }
665   */
666   auto res = f.alloc.alloc(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
667   if (res !is null) {
668     import core.stdc..string : memset;
669     memset(res, 0, sz+8);
670   }
671   return cast(T*)res;
672 }
673 
674 private void setup_free (VorbisDecoder f, void* p) {
675   //if (f.alloc.alloc_buffer) return; // do nothing; setup mem is a stack
676   if (p !is null) f.alloc.free(p, f);
677 }
678 
679 private void* setup_temp_malloc (VorbisDecoder f, uint sz) {
680   auto res = f.alloc.allocTemp(sz+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
681   if (res !is null) {
682     import core.stdc..string : memset;
683     memset(res, 0, sz+8);
684   }
685   return res;
686 }
687 
688 private void setup_temp_free (VorbisDecoder f, void* p, uint sz) {
689   if (p !is null) f.alloc.freeTemp(p, (sz ? sz : 1)+8, f); // +8 to compensate dmd codegen bug: it can read dword(qword?) when told to read only byte
690 }
691 
692 immutable uint[256] crc_table;
693 shared static this () {
694   enum CRC32_POLY = 0x04c11db7; // from spec
695   // init crc32 table
696   foreach (uint i; 0..256) {
697     uint s = i<<24;
698     foreach (immutable _; 0..8) s = (s<<1)^(s >= (1U<<31) ? CRC32_POLY : 0);
699     crc_table[i] = s;
700   }
701 }
702 
703 uint crc32_update (uint crc, ubyte b) {
704   static if (__VERSION__ > 2067) pragma(inline, true);
705   return (crc<<8)^crc_table[b^(crc>>24)];
706 }
707 
708 // used in setup, and for huffman that doesn't go fast path
709 private uint bit_reverse (uint n) {
710   static if (__VERSION__ > 2067) pragma(inline, true);
711   n = ((n&0xAAAAAAAA)>>1)|((n&0x55555555)<<1);
712   n = ((n&0xCCCCCCCC)>>2)|((n&0x33333333)<<2);
713   n = ((n&0xF0F0F0F0)>>4)|((n&0x0F0F0F0F)<<4);
714   n = ((n&0xFF00FF00)>>8)|((n&0x00FF00FF)<<8);
715   return (n>>16)|(n<<16);
716 }
717 
718 private float square (float x) {
719   static if (__VERSION__ > 2067) pragma(inline, true);
720   return x*x;
721 }
722 
723 // this is a weird definition of log2() for which log2(1) = 1, log2(2) = 2, log2(4) = 3
724 // as required by the specification. fast(?) implementation from stb.h
725 // @OPTIMIZE: called multiple times per-packet with "constants"; move to setup
726 immutable byte[16] log2_4 = [0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4];
727 private int ilog (int n) {
728   //static if (__VERSION__ > 2067) pragma(inline, true);
729   if (n < 0) return 0; // signed n returns 0
730   // 2 compares if n < 16, 3 compares otherwise (4 if signed or n > 1<<29)
731   if (n < (1<<14)) {
732     if (n < (1<<4)) return 0+log2_4[n];
733     if (n < (1<<9)) return 5+log2_4[n>>5];
734     return 10+log2_4[n>>10];
735   } else if (n < (1<<24)) {
736     if (n < (1<<19)) return 15+log2_4[n>>15];
737     return 20+log2_4[n>>20];
738   } else {
739     if (n < (1<<29)) return 25+log2_4[n>>25];
740     return 30+log2_4[n>>30];
741   }
742 }
743 
744 
745 // code length assigned to a value with no huffman encoding
746 enum NO_CODE = 255;
747 
748 /////////////////////// LEAF SETUP FUNCTIONS //////////////////////////
749 //
750 // these functions are only called at setup, and only a few times per file
751 private float float32_unpack (uint x) {
752   import core.math : ldexp;
753   //static if (__VERSION__ > 2067) pragma(inline, true);
754   // from the specification
755   uint mantissa = x&0x1fffff;
756   uint sign = x&0x80000000;
757   uint exp = (x&0x7fe00000)>>21;
758   double res = (sign ? -cast(double)mantissa : cast(double)mantissa);
759   return cast(float)ldexp(cast(float)res, cast(int)exp-788);
760 }
761 
762 // zlib & jpeg huffman tables assume that the output symbols
763 // can either be arbitrarily arranged, or have monotonically
764 // increasing frequencies--they rely on the lengths being sorted;
765 // this makes for a very simple generation algorithm.
766 // vorbis allows a huffman table with non-sorted lengths. This
767 // requires a more sophisticated construction, since symbols in
768 // order do not map to huffman codes "in order".
769 private void add_entry (Codebook* c, uint huff_code, int symbol, int count, ubyte len, uint* values) {
770   if (!c.sparse) {
771     c.codewords[symbol] = huff_code;
772   } else {
773     c.codewords[count] = huff_code;
774     c.codeword_lengths[count] = len;
775     values[count] = symbol;
776   }
777 }
778 
779 private int compute_codewords (Codebook* c, ubyte* len, int n, uint* values) {
780   import core.stdc..string : memset;
781 
782   int i, k, m = 0;
783   uint[32] available;
784 
785   memset(available.ptr, 0, available.sizeof);
786   // find the first entry
787   for (k = 0; k < n; ++k) if (len[k] < NO_CODE) break;
788   if (k == n) { assert(c.sorted_entries == 0); return true; }
789   // add to the list
790   add_entry(c, 0, k, m++, len[k], values);
791   // add all available leaves
792   for (i = 1; i <= len[k]; ++i) available[i] = 1U<<(32-i);
793   // note that the above code treats the first case specially,
794   // but it's really the same as the following code, so they
795   // could probably be combined (except the initial code is 0,
796   // and I use 0 in available[] to mean 'empty')
797   for (i = k+1; i < n; ++i) {
798     uint res;
799     int z = len[i];
800     if (z == NO_CODE) continue;
801     // find lowest available leaf (should always be earliest,
802     // which is what the specification calls for)
803     // note that this property, and the fact we can never have
804     // more than one free leaf at a given level, isn't totally
805     // trivial to prove, but it seems true and the assert never
806     // fires, so!
807     while (z > 0 && !available[z]) --z;
808     if (z == 0) return false;
809     res = available[z];
810     assert(z >= 0 && z < 32);
811     available[z] = 0;
812     ubyte xxx = len[i];
813     add_entry(c,
814       bit_reverse(res),
815       i,
816       m++,
817       xxx, // dmd bug: it reads 4 bytes without temp
818       values);
819     // propogate availability up the tree
820     if (z != len[i]) {
821       assert(len[i] >= 0 && len[i] < 32);
822       for (int y = len[i]; y > z; --y) {
823         assert(available[y] == 0);
824         available[y] = res+(1<<(32-y));
825       }
826     }
827   }
828   return true;
829 }
830 
831 // accelerated huffman table allows fast O(1) match of all symbols
832 // of length <= STB_VORBIS_FAST_HUFFMAN_LENGTH
833 private void compute_accelerated_huffman (Codebook* c) {
834   //for (i=0; i < FAST_HUFFMAN_TABLE_SIZE; ++i) c.fast_huffman.ptr[i] = -1;
835   c.fast_huffman.ptr[0..FAST_HUFFMAN_TABLE_SIZE] = -1;
836   auto len = (c.sparse ? c.sorted_entries : c.entries);
837   version(STB_VORBIS_FAST_HUFFMAN_SHORT) {
838     if (len > 32767) len = 32767; // largest possible value we can encode!
839   }
840   foreach (uint i; 0..len) {
841     if (c.codeword_lengths[i] <= STB_VORBIS_FAST_HUFFMAN_LENGTH) {
842       uint z = (c.sparse ? bit_reverse(c.sorted_codewords[i]) : c.codewords[i]);
843       // set table entries for all bit combinations in the higher bits
844       while (z < FAST_HUFFMAN_TABLE_SIZE) {
845         c.fast_huffman.ptr[z] = cast(typeof(c.fast_huffman[0]))i; //k8
846         z += 1<<c.codeword_lengths[i];
847       }
848     }
849   }
850 }
851 
852 extern(C) int uint32_compare (const void* p, const void* q) {
853   uint x = *cast(uint*)p;
854   uint y = *cast(uint*)q;
855   return (x < y ? -1 : x > y);
856 }
857 
858 private int include_in_sort (Codebook* c, uint len) {
859   if (c.sparse) { assert(len != NO_CODE); return true; }
860   if (len == NO_CODE) return false;
861   if (len > STB_VORBIS_FAST_HUFFMAN_LENGTH) return true;
862   return false;
863 }
864 
865 // if the fast table above doesn't work, we want to binary
866 // search them... need to reverse the bits
867 private void compute_sorted_huffman (Codebook* c, ubyte* lengths, uint* values) {
868   // build a list of all the entries
869   // OPTIMIZATION: don't include the short ones, since they'll be caught by FAST_HUFFMAN.
870   // this is kind of a frivolous optimization--I don't see any performance improvement,
871   // but it's like 4 extra lines of code, so.
872   if (!c.sparse) {
873     int k = 0;
874     foreach (uint i; 0..c.entries) if (include_in_sort(c, lengths[i])) c.sorted_codewords[k++] = bit_reverse(c.codewords[i]);
875     assert(k == c.sorted_entries);
876   } else {
877     foreach (uint i; 0..c.sorted_entries) c.sorted_codewords[i] = bit_reverse(c.codewords[i]);
878   }
879 
880   qsort(c.sorted_codewords, c.sorted_entries, (c.sorted_codewords[0]).sizeof, &uint32_compare);
881   c.sorted_codewords[c.sorted_entries] = 0xffffffff;
882 
883   auto len = (c.sparse ? c.sorted_entries : c.entries);
884   // now we need to indicate how they correspond; we could either
885   //   #1: sort a different data structure that says who they correspond to
886   //   #2: for each sorted entry, search the original list to find who corresponds
887   //   #3: for each original entry, find the sorted entry
888   // #1 requires extra storage, #2 is slow, #3 can use binary search!
889   foreach (uint i; 0..len) {
890     auto huff_len = (c.sparse ? lengths[values[i]] : lengths[i]);
891     if (include_in_sort(c, huff_len)) {
892       uint code = bit_reverse(c.codewords[i]);
893       int x = 0, n = c.sorted_entries;
894       while (n > 1) {
895         // invariant: sc[x] <= code < sc[x+n]
896         int m = x+(n>>1);
897         if (c.sorted_codewords[m] <= code) {
898           x = m;
899           n -= (n>>1);
900         } else {
901           n >>= 1;
902         }
903       }
904       assert(c.sorted_codewords[x] == code);
905       if (c.sparse) {
906         c.sorted_values[x] = values[i];
907         c.codeword_lengths[x] = huff_len;
908       } else {
909         c.sorted_values[x] = i;
910       }
911     }
912   }
913 }
914 
915 // only run while parsing the header (3 times)
916 private int vorbis_validate (const(void)* data) {
917   static if (__VERSION__ > 2067) pragma(inline, true);
918   immutable char[6] vorbis = "vorbis";
919   return ((cast(char*)data)[0..6] == vorbis[]);
920 }
921 
922 // called from setup only, once per code book
923 // (formula implied by specification)
924 private int lookup1_values (int entries, int dim) {
925   import core.stdc.math : lrintf;
926   import std.math : floor, exp, pow, log;
927   int r = cast(int)lrintf(floor(exp(cast(float)log(cast(float)entries)/dim)));
928   if (lrintf(floor(pow(cast(float)r+1, dim))) <= entries) ++r; // (int) cast for MinGW warning; floor() to avoid _ftol() when non-CRT
929   assert(pow(cast(float)r+1, dim) > entries);
930   assert(lrintf(floor(pow(cast(float)r, dim))) <= entries); // (int), floor() as above
931   return r;
932 }
933 
934 // called twice per file
935 private void compute_twiddle_factors (int n, float* A, float* B, float* C) {
936   import std.math : cos, sin, PI;
937   int n4 = n>>2, n8 = n>>3;
938   int k, k2;
939   for (k = k2 = 0; k < n4; ++k, k2 += 2) {
940     A[k2  ] = cast(float) cos(4*k*PI/n);
941     A[k2+1] = cast(float)-sin(4*k*PI/n);
942     B[k2  ] = cast(float) cos((k2+1)*PI/n/2)*0.5f;
943     B[k2+1] = cast(float) sin((k2+1)*PI/n/2)*0.5f;
944   }
945   for (k = k2 = 0; k < n8; ++k, k2 += 2) {
946     C[k2  ] = cast(float) cos(2*(k2+1)*PI/n);
947     C[k2+1] = cast(float)-sin(2*(k2+1)*PI/n);
948   }
949 }
950 
951 private void compute_window (int n, float* window) {
952   import std.math : sin, PI;
953   int n2 = n>>1;
954   foreach (int i; 0..n2) *window++ = cast(float)sin(0.5*PI*square(cast(float)sin((i-0+0.5)/n2*0.5*PI)));
955 }
956 
957 private void compute_bitreverse (int n, ushort* rev) {
958   int ld = ilog(n)-1; // ilog is off-by-one from normal definitions
959   int n8 = n>>3;
960   foreach (int i; 0..n8) *rev++ = cast(ushort)((bit_reverse(i)>>(32-ld+3))<<2); //k8
961 }
962 
963 private int init_blocksize (VorbisDecoder f, int b, int n) {
964   int n2 = n>>1, n4 = n>>2, n8 = n>>3;
965   f.A[b] = setup_malloc!float(f, n2);
966   f.B[b] = setup_malloc!float(f, n2);
967   f.C[b] = setup_malloc!float(f, n4);
968   if (f.A[b] is null || f.B[b] is null || f.C[b] is null) return error(f, STBVorbisError.outofmem);
969   compute_twiddle_factors(n, f.A[b], f.B[b], f.C[b]);
970   f.window[b] = setup_malloc!float(f, n2);
971   if (f.window[b] is null) return error(f, STBVorbisError.outofmem);
972   compute_window(n, f.window[b]);
973   f.bit_reverse[b] = setup_malloc!ushort(f, n8);
974   if (f.bit_reverse[b] is null) return error(f, STBVorbisError.outofmem);
975   compute_bitreverse(n, f.bit_reverse[b]);
976   return true;
977 }
978 
979 private void neighbors (ushort* x, int n, ushort* plow, ushort* phigh) {
980   int low = -1;
981   int high = 65536;
982   assert(n >= 0 && n <= ushort.max);
983   foreach (ushort i; 0..cast(ushort)n) {
984     if (x[i] > low  && x[i] < x[n]) { *plow = i; low = x[i]; }
985     if (x[i] < high && x[i] > x[n]) { *phigh = i; high = x[i]; }
986   }
987 }
988 
989 // this has been repurposed so y is now the original index instead of y
990 struct Point {
991   ushort x, y;
992 }
993 
994 extern(C) int point_compare (const void *p, const void *q) {
995   auto a = cast(const(Point)*)p;
996   auto b = cast(const(Point)*)q;
997   return (a.x < b.x ? -1 : a.x > b.x);
998 }
999 /////////////////////// END LEAF SETUP FUNCTIONS //////////////////////////
1000 
1001 // ///////////////////////////////////////////////////////////////////// //
1002 private ubyte get8 (VorbisDecoder f) {
1003   ubyte b = void;
1004   if (!f.eof) {
1005     if (f.rawRead((&b)[0..1]) != 1) { f.eof = true; b = 0; }
1006   }
1007   return b;
1008 }
1009 
1010 private uint get32 (VorbisDecoder f) {
1011   uint x = 0;
1012   if (!f.eof) {
1013     version(LittleEndian) {
1014       if (f.rawRead((&x)[0..1]) != x.sizeof) { f.eof = true; x = 0; }
1015     } else {
1016       x = get8(f);
1017       x |= cast(uint)get8(f)<<8;
1018       x |= cast(uint)get8(f)<<16;
1019       x |= cast(uint)get8(f)<<24;
1020     }
1021   }
1022   return x;
1023 }
1024 
1025 private bool getn (VorbisDecoder f, void* data, int n) {
1026   if (f.eof || n < 0) return false;
1027   if (n == 0) return true;
1028   if (f.rawRead(data[0..n]) != n) { f.eof = true; return false; }
1029   return true;
1030 }
1031 
1032 private void skip (VorbisDecoder f, int n) {
1033   if (f.eof || n <= 0) return;
1034   f.rawSkip(n);
1035 }
1036 
1037 private void set_file_offset (VorbisDecoder f, uint loc) {
1038   /+if (f.push_mode) return;+/
1039   f.eof = false;
1040   if (loc >= 0x80000000) { f.eof = true; return; }
1041   f.rawSeek(loc);
1042 }
1043 
1044 
1045 immutable char[4] ogg_page_header = "OggS"; //[ 0x4f, 0x67, 0x67, 0x53 ];
1046 
1047 private bool capture_pattern (VorbisDecoder f) {
1048   static if (__VERSION__ > 2067) pragma(inline, true);
1049   char[4] sign = void;
1050   if (!getn(f, sign.ptr, 4)) return false;
1051   return (sign == "OggS");
1052 }
1053 
1054 enum PAGEFLAG_continued_packet = 1;
1055 enum PAGEFLAG_first_page = 2;
1056 enum PAGEFLAG_last_page = 4;
1057 
1058 private int start_page_no_capturepattern (VorbisDecoder f) {
1059   uint loc0, loc1, n;
1060   // stream structure version
1061   if (get8(f) != 0) return error(f, STBVorbisError.invalid_stream_structure_version);
1062   // header flag
1063   f.page_flag = get8(f);
1064   // absolute granule position
1065   loc0 = get32(f);
1066   loc1 = get32(f);
1067   // @TODO: validate loc0, loc1 as valid positions?
1068   // stream serial number -- vorbis doesn't interleave, so discard
1069   get32(f);
1070   //if (f.serial != get32(f)) return error(f, STBVorbisError.incorrect_stream_serial_number);
1071   // page sequence number
1072   n = get32(f);
1073   f.last_page = n;
1074   // CRC32
1075   get32(f);
1076   // page_segments
1077   f.segment_count = get8(f);
1078   if (!getn(f, f.segments.ptr, f.segment_count)) return error(f, STBVorbisError.unexpected_eof);
1079   // assume we _don't_ know any the sample position of any segments
1080   f.end_seg_with_known_loc = -2;
1081   if (loc0 != ~0U || loc1 != ~0U) {
1082     int i;
1083     // determine which packet is the last one that will complete
1084     for (i = f.segment_count-1; i >= 0; --i) if (f.segments.ptr[i] < 255) break;
1085     // 'i' is now the index of the _last_ segment of a packet that ends
1086     if (i >= 0) {
1087       f.end_seg_with_known_loc = i;
1088       f.known_loc_for_packet = loc0;
1089     }
1090   }
1091   if (f.first_decode) {
1092     int len;
1093     ProbedPage p;
1094     len = 0;
1095     foreach (int i; 0..f.segment_count) len += f.segments.ptr[i];
1096     len += 27+f.segment_count;
1097     p.page_start = f.first_audio_page_offset;
1098     p.page_end = p.page_start+len;
1099     p.last_decoded_sample = loc0;
1100     f.p_first = p;
1101   }
1102   f.next_seg = 0;
1103   return true;
1104 }
1105 
1106 private int start_page (VorbisDecoder f) {
1107   if (!capture_pattern(f)) return error(f, STBVorbisError.missing_capture_pattern);
1108   return start_page_no_capturepattern(f);
1109 }
1110 
1111 private int start_packet (VorbisDecoder f) {
1112   while (f.next_seg == -1) {
1113     if (!start_page(f)) return false;
1114     if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.continued_packet_flag_invalid);
1115   }
1116   f.last_seg = false;
1117   f.valid_bits = 0;
1118   f.packet_bytes = 0;
1119   f.bytes_in_seg = 0;
1120   // f.next_seg is now valid
1121   return true;
1122 }
1123 
1124 private int maybe_start_packet (VorbisDecoder f) {
1125   if (f.next_seg == -1) {
1126     auto x = get8(f);
1127     if (f.eof) return false; // EOF at page boundary is not an error!
1128     if (0x4f != x      ) return error(f, STBVorbisError.missing_capture_pattern);
1129     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1130     if (0x67 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1131     if (0x53 != get8(f)) return error(f, STBVorbisError.missing_capture_pattern);
1132     if (!start_page_no_capturepattern(f)) return false;
1133     if (f.page_flag&PAGEFLAG_continued_packet) {
1134       // set up enough state that we can read this packet if we want,
1135       // e.g. during recovery
1136       f.last_seg = false;
1137       f.bytes_in_seg = 0;
1138       return error(f, STBVorbisError.continued_packet_flag_invalid);
1139     }
1140   }
1141   return start_packet(f);
1142 }
1143 
1144 private int next_segment (VorbisDecoder f) {
1145   if (f.last_seg) return 0;
1146   if (f.next_seg == -1) {
1147     f.last_seg_which = f.segment_count-1; // in case start_page fails
1148     if (!start_page(f)) { f.last_seg = 1; return 0; }
1149     if (!(f.page_flag&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.continued_packet_flag_invalid);
1150   }
1151   auto len = f.segments.ptr[f.next_seg++];
1152   if (len < 255) {
1153     f.last_seg = true;
1154     f.last_seg_which = f.next_seg-1;
1155   }
1156   if (f.next_seg >= f.segment_count) f.next_seg = -1;
1157   debug(stb_vorbis) assert(f.bytes_in_seg == 0);
1158   f.bytes_in_seg = len;
1159   return len;
1160 }
1161 
1162 enum EOP = (-1);
1163 enum INVALID_BITS = (-1);
1164 
1165 private int get8_packet_raw (VorbisDecoder f) {
1166   if (!f.bytes_in_seg) {  // CLANG!
1167     if (f.last_seg) return EOP;
1168     else if (!next_segment(f)) return EOP;
1169   }
1170   debug(stb_vorbis) assert(f.bytes_in_seg > 0);
1171   --f.bytes_in_seg;
1172   ++f.packet_bytes;
1173   return get8(f);
1174 }
1175 
1176 private int get8_packet (VorbisDecoder f) {
1177   int x = get8_packet_raw(f);
1178   f.valid_bits = 0;
1179   return x;
1180 }
1181 
1182 private uint get32_packet (VorbisDecoder f) {
1183   uint x = get8_packet(f), b;
1184   if (x == EOP) return EOP;
1185   if ((b = get8_packet(f)) == EOP) return EOP;
1186   x += b<<8;
1187   if ((b = get8_packet(f)) == EOP) return EOP;
1188   x += b<<16;
1189   if ((b = get8_packet(f)) == EOP) return EOP;
1190   x += b<<24;
1191   return x;
1192 }
1193 
1194 private void flush_packet (VorbisDecoder f) {
1195   while (get8_packet_raw(f) != EOP) {}
1196 }
1197 
1198 // @OPTIMIZE: this is the secondary bit decoder, so it's probably not as important
1199 // as the huffman decoder?
1200 private uint get_bits_main (VorbisDecoder f, int n) {
1201   uint z;
1202   if (f.valid_bits < 0) return 0;
1203   if (f.valid_bits < n) {
1204     if (n > 24) {
1205       // the accumulator technique below would not work correctly in this case
1206       z = get_bits_main(f, 24);
1207       z += get_bits_main(f, n-24)<<24;
1208       return z;
1209     }
1210     if (f.valid_bits == 0) f.acc = 0;
1211     while (f.valid_bits < n) {
1212       z = get8_packet_raw(f);
1213       if (z == EOP) {
1214         f.valid_bits = INVALID_BITS;
1215         return 0;
1216       }
1217       f.acc += z<<f.valid_bits;
1218       f.valid_bits += 8;
1219     }
1220   }
1221   if (f.valid_bits < 0) return 0;
1222   z = f.acc&((1<<n)-1);
1223   f.acc >>= n;
1224   f.valid_bits -= n;
1225   return z;
1226 }
1227 
1228 // chooses minimal possible integer type
1229 private auto get_bits(ubyte n) (VorbisDecoder f) if (n >= 1 && n <= 64) {
1230   static if (n <= 8) return cast(ubyte)get_bits_main(f, n);
1231   else static if (n <= 16) return cast(ushort)get_bits_main(f, n);
1232   else static if (n <= 32) return cast(uint)get_bits_main(f, n);
1233   else static if (n <= 64) return cast(ulong)get_bits_main(f, n);
1234   else static assert(0, "wtf?!");
1235 }
1236 
1237 // chooses minimal possible integer type, assume no overflow
1238 private auto get_bits_add_no(ubyte n) (VorbisDecoder f, ubyte add) if (n >= 1 && n <= 64) {
1239   static if (n <= 8) return cast(ubyte)(get_bits_main(f, n)+add);
1240   else static if (n <= 16) return cast(ushort)(get_bits_main(f, n)+add);
1241   else static if (n <= 32) return cast(uint)(get_bits_main(f, n)+add);
1242   else static if (n <= 64) return cast(ulong)(get_bits_main(f, n)+add);
1243   else static assert(0, "wtf?!");
1244 }
1245 
1246 // @OPTIMIZE: primary accumulator for huffman
1247 // expand the buffer to as many bits as possible without reading off end of packet
1248 // it might be nice to allow f.valid_bits and f.acc to be stored in registers,
1249 // e.g. cache them locally and decode locally
1250 //private /*__forceinline*/ void prep_huffman (VorbisDecoder f)
1251 enum PrepHuffmanMixin = q{
1252   if (f.valid_bits <= 24) {
1253     if (f.valid_bits == 0) f.acc = 0;
1254     int phmz = void;
1255     do {
1256       if (f.last_seg && !f.bytes_in_seg) break;
1257       phmz = get8_packet_raw(f);
1258       if (phmz == EOP) break;
1259       f.acc += cast(uint)phmz<<f.valid_bits;
1260       f.valid_bits += 8;
1261     } while (f.valid_bits <= 24);
1262   }
1263 };
1264 
1265 enum VorbisPacket {
1266   id = 1,
1267   comment = 3,
1268   setup = 5,
1269 }
1270 
1271 private int codebook_decode_scalar_raw (VorbisDecoder f, Codebook *c) {
1272   mixin(PrepHuffmanMixin);
1273 
1274   if (c.codewords is null && c.sorted_codewords is null) return -1;
1275   // cases to use binary search: sorted_codewords && !c.codewords
1276   //                             sorted_codewords && c.entries > 8
1277   auto cond = (c.entries > 8 ? c.sorted_codewords !is null : !c.codewords);
1278   if (cond) {
1279     // binary search
1280     uint code = bit_reverse(f.acc);
1281     int x = 0, n = c.sorted_entries, len;
1282     while (n > 1) {
1283       // invariant: sc[x] <= code < sc[x+n]
1284       int m = x+(n>>1);
1285       if (c.sorted_codewords[m] <= code) {
1286         x = m;
1287         n -= (n>>1);
1288       } else {
1289         n >>= 1;
1290       }
1291     }
1292     // x is now the sorted index
1293     if (!c.sparse) x = c.sorted_values[x];
1294     // x is now sorted index if sparse, or symbol otherwise
1295     len = c.codeword_lengths[x];
1296     if (f.valid_bits >= len) {
1297       f.acc >>= len;
1298       f.valid_bits -= len;
1299       return x;
1300     }
1301     f.valid_bits = 0;
1302     return -1;
1303   }
1304   // if small, linear search
1305   debug(stb_vorbis) assert(!c.sparse);
1306   foreach (uint i; 0..c.entries) {
1307     if (c.codeword_lengths[i] == NO_CODE) continue;
1308     if (c.codewords[i] == (f.acc&((1<<c.codeword_lengths[i])-1))) {
1309       if (f.valid_bits >= c.codeword_lengths[i]) {
1310         f.acc >>= c.codeword_lengths[i];
1311         f.valid_bits -= c.codeword_lengths[i];
1312         return i;
1313       }
1314       f.valid_bits = 0;
1315       return -1;
1316     }
1317   }
1318   error(f, STBVorbisError.invalid_stream);
1319   f.valid_bits = 0;
1320   return -1;
1321 }
1322 
1323 
1324 template DECODE_RAW(string var, string c) {
1325   enum DECODE_RAW = q{
1326     if (f.valid_bits < STB_VORBIS_FAST_HUFFMAN_LENGTH) { mixin(PrepHuffmanMixin); }
1327     // fast huffman table lookup
1328     ${i} = f.acc&FAST_HUFFMAN_TABLE_MASK;
1329     ${i} = ${c}.fast_huffman.ptr[${i}];
1330     if (${i} >= 0) {
1331       auto ${__temp_prefix__}n = ${c}.codeword_lengths[${i}];
1332       f.acc >>= ${__temp_prefix__}n;
1333       f.valid_bits -= ${__temp_prefix__}n;
1334       if (f.valid_bits < 0) { f.valid_bits = 0; ${i} = -1; }
1335     } else {
1336       ${i} = codebook_decode_scalar_raw(f, ${c});
1337     }
1338   }.cmacroFixVars!("i", "c")(var, c);
1339 }
1340 
1341 enum DECODE(string var, string c) = q{
1342   ${DECODE_RAW}
1343   if (${c}.sparse) ${var} = ${c}.sorted_values[${var}];
1344 }.cmacroFixVars!("var", "c", "DECODE_RAW")(var, c, DECODE_RAW!(var, c));
1345 
1346 
1347 version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1348   alias DECODE_VQ = DECODE;
1349 } else {
1350   alias DECODE_VQ = DECODE_RAW;
1351 }
1352 
1353 
1354 
1355 // CODEBOOK_ELEMENT_FAST is an optimization for the CODEBOOK_FLOATS case
1356 // where we avoid one addition
1357 enum CODEBOOK_ELEMENT(string c, string off) = "("~c~".multiplicands["~off~"])";
1358 enum CODEBOOK_ELEMENT_FAST(string c, string off) = "("~c~".multiplicands["~off~"])";
1359 enum CODEBOOK_ELEMENT_BASE(string c) = "(0)";
1360 
1361 
1362 private int codebook_decode_start (VorbisDecoder f, Codebook* c) {
1363   int z = -1;
1364   // type 0 is only legal in a scalar context
1365   if (c.lookup_type == 0) {
1366     error(f, STBVorbisError.invalid_stream);
1367   } else {
1368     mixin(DECODE_VQ!("z", "c"));
1369     debug(stb_vorbis) if (c.sparse) assert(z < c.sorted_entries);
1370     if (z < 0) {  // check for EOP
1371       if (!f.bytes_in_seg && f.last_seg) return z;
1372       error(f, STBVorbisError.invalid_stream);
1373     }
1374   }
1375   return z;
1376 }
1377 
1378 private int codebook_decode (VorbisDecoder f, Codebook* c, float* output, int len) {
1379   int z = codebook_decode_start(f, c);
1380   if (z < 0) return false;
1381   if (len > c.dimensions) len = c.dimensions;
1382 
1383   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1384     if (c.lookup_type == 1) {
1385       float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1386       int div = 1;
1387       foreach (immutable i; 0..len) {
1388         int off = (z/div)%c.lookup_values;
1389         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1390         output[i] += val;
1391         if (c.sequence_p) last = val+c.minimum_value;
1392         div *= c.lookup_values;
1393       }
1394       return true;
1395     }
1396   }
1397 
1398   z *= c.dimensions;
1399   if (c.sequence_p) {
1400     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1401     foreach (immutable i; 0..len) {
1402       float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1403       output[i] += val;
1404       last = val+c.minimum_value;
1405     }
1406   } else {
1407     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1408     foreach (immutable i; 0..len) output[i] += mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1409   }
1410 
1411   return true;
1412 }
1413 
1414 private int codebook_decode_step (VorbisDecoder f, Codebook* c, float* output, int len, int step) {
1415   int z = codebook_decode_start(f, c);
1416   float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1417   if (z < 0) return false;
1418   if (len > c.dimensions) len = c.dimensions;
1419 
1420   version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1421     if (c.lookup_type == 1) {
1422       int div = 1;
1423       foreach (immutable i; 0..len) {
1424         int off = (z/div)%c.lookup_values;
1425         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1426         output[i*step] += val;
1427         if (c.sequence_p) last = val;
1428         div *= c.lookup_values;
1429       }
1430       return true;
1431     }
1432   }
1433 
1434   z *= c.dimensions;
1435   foreach (immutable i; 0..len) {
1436     float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1437     output[i*step] += val;
1438     if (c.sequence_p) last = val;
1439   }
1440 
1441   return true;
1442 }
1443 
1444 private int codebook_decode_deinterleave_repeat (VorbisDecoder f, Codebook* c, ref float*[STB_VORBIS_MAX_CHANNELS] outputs, int ch, int* c_inter_p, int* p_inter_p, int len, int total_decode) {
1445   int c_inter = *c_inter_p;
1446   int p_inter = *p_inter_p;
1447   int z, effective = c.dimensions;
1448 
1449   // type 0 is only legal in a scalar context
1450   if (c.lookup_type == 0) return error(f, STBVorbisError.invalid_stream);
1451 
1452   while (total_decode > 0) {
1453     float last = mixin(CODEBOOK_ELEMENT_BASE!"c");
1454     mixin(DECODE_VQ!("z", "c"));
1455     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
1456       debug(stb_vorbis) assert(!c.sparse || z < c.sorted_entries);
1457     }
1458     if (z < 0) {
1459       if (!f.bytes_in_seg && f.last_seg) return false;
1460       return error(f, STBVorbisError.invalid_stream);
1461     }
1462 
1463     // if this will take us off the end of the buffers, stop short!
1464     // we check by computing the length of the virtual interleaved
1465     // buffer (len*ch), our current offset within it (p_inter*ch)+(c_inter),
1466     // and the length we'll be using (effective)
1467     if (c_inter+p_inter*ch+effective > len*ch) effective = len*ch-(p_inter*ch-c_inter);
1468 
1469     version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1470       if (c.lookup_type == 1) {
1471         int div = 1;
1472         foreach (immutable i; 0..effective) {
1473           int off = (z/div)%c.lookup_values;
1474           float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "off"))+last;
1475           if (outputs.ptr[c_inter]) outputs.ptr[c_inter].ptr[p_inter] += val;
1476           if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1477           if (c.sequence_p) last = val;
1478           div *= c.lookup_values;
1479         }
1480         goto skipit;
1481       }
1482     }
1483     z *= c.dimensions;
1484     if (c.sequence_p) {
1485       foreach (immutable i; 0..effective) {
1486         float val = mixin(CODEBOOK_ELEMENT_FAST!("c", "z+i"))+last;
1487         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1488         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1489         last = val;
1490       }
1491     } else {
1492       foreach (immutable i; 0..effective) {
1493         float val = mixin(CODEBOOK_ELEMENT_FAST!("c","z+i"))+last;
1494         if (outputs.ptr[c_inter]) outputs.ptr[c_inter][p_inter] += val;
1495         if (++c_inter == ch) { c_inter = 0; ++p_inter; }
1496       }
1497     }
1498    skipit:
1499     total_decode -= effective;
1500   }
1501   *c_inter_p = c_inter;
1502   *p_inter_p = p_inter;
1503   return true;
1504 }
1505 
1506 //private int predict_point (int x, int x0, int x1, int y0, int y1)
1507 enum predict_point(string dest, string x, string x0, string x1, string y0, string y1) = q{{
1508   //import std.math : abs;
1509   int dy = ${y1}-${y0};
1510   int adx = ${x1}-${x0};
1511   // @OPTIMIZE: force int division to round in the right direction... is this necessary on x86?
1512   int err = /*abs(dy)*/(dy < 0 ? -dy : dy)*(${x}-${x0});
1513   int off = err/adx;
1514   /*return*/${dest} = (dy < 0 ? ${y0}-off : ${y0}+off);
1515 }}.cmacroFixVars!("dest", "x", "x0", "x1", "y0", "y1")(dest, x, x0, x1, y0, y1);
1516 
1517 // the following table is block-copied from the specification
1518 immutable float[256] inverse_db_table = [
1519   1.0649863e-07f, 1.1341951e-07f, 1.2079015e-07f, 1.2863978e-07f,
1520   1.3699951e-07f, 1.4590251e-07f, 1.5538408e-07f, 1.6548181e-07f,
1521   1.7623575e-07f, 1.8768855e-07f, 1.9988561e-07f, 2.1287530e-07f,
1522   2.2670913e-07f, 2.4144197e-07f, 2.5713223e-07f, 2.7384213e-07f,
1523   2.9163793e-07f, 3.1059021e-07f, 3.3077411e-07f, 3.5226968e-07f,
1524   3.7516214e-07f, 3.9954229e-07f, 4.2550680e-07f, 4.5315863e-07f,
1525   4.8260743e-07f, 5.1396998e-07f, 5.4737065e-07f, 5.8294187e-07f,
1526   6.2082472e-07f, 6.6116941e-07f, 7.0413592e-07f, 7.4989464e-07f,
1527   7.9862701e-07f, 8.5052630e-07f, 9.0579828e-07f, 9.6466216e-07f,
1528   1.0273513e-06f, 1.0941144e-06f, 1.1652161e-06f, 1.2409384e-06f,
1529   1.3215816e-06f, 1.4074654e-06f, 1.4989305e-06f, 1.5963394e-06f,
1530   1.7000785e-06f, 1.8105592e-06f, 1.9282195e-06f, 2.0535261e-06f,
1531   2.1869758e-06f, 2.3290978e-06f, 2.4804557e-06f, 2.6416497e-06f,
1532   2.8133190e-06f, 2.9961443e-06f, 3.1908506e-06f, 3.3982101e-06f,
1533   3.6190449e-06f, 3.8542308e-06f, 4.1047004e-06f, 4.3714470e-06f,
1534   4.6555282e-06f, 4.9580707e-06f, 5.2802740e-06f, 5.6234160e-06f,
1535   5.9888572e-06f, 6.3780469e-06f, 6.7925283e-06f, 7.2339451e-06f,
1536   7.7040476e-06f, 8.2047000e-06f, 8.7378876e-06f, 9.3057248e-06f,
1537   9.9104632e-06f, 1.0554501e-05f, 1.1240392e-05f, 1.1970856e-05f,
1538   1.2748789e-05f, 1.3577278e-05f, 1.4459606e-05f, 1.5399272e-05f,
1539   1.6400004e-05f, 1.7465768e-05f, 1.8600792e-05f, 1.9809576e-05f,
1540   2.1096914e-05f, 2.2467911e-05f, 2.3928002e-05f, 2.5482978e-05f,
1541   2.7139006e-05f, 2.8902651e-05f, 3.0780908e-05f, 3.2781225e-05f,
1542   3.4911534e-05f, 3.7180282e-05f, 3.9596466e-05f, 4.2169667e-05f,
1543   4.4910090e-05f, 4.7828601e-05f, 5.0936773e-05f, 5.4246931e-05f,
1544   5.7772202e-05f, 6.1526565e-05f, 6.5524908e-05f, 6.9783085e-05f,
1545   7.4317983e-05f, 7.9147585e-05f, 8.4291040e-05f, 8.9768747e-05f,
1546   9.5602426e-05f, 0.00010181521f, 0.00010843174f, 0.00011547824f,
1547   0.00012298267f, 0.00013097477f, 0.00013948625f, 0.00014855085f,
1548   0.00015820453f, 0.00016848555f, 0.00017943469f, 0.00019109536f,
1549   0.00020351382f, 0.00021673929f, 0.00023082423f, 0.00024582449f,
1550   0.00026179955f, 0.00027881276f, 0.00029693158f, 0.00031622787f,
1551   0.00033677814f, 0.00035866388f, 0.00038197188f, 0.00040679456f,
1552   0.00043323036f, 0.00046138411f, 0.00049136745f, 0.00052329927f,
1553   0.00055730621f, 0.00059352311f, 0.00063209358f, 0.00067317058f,
1554   0.00071691700f, 0.00076350630f, 0.00081312324f, 0.00086596457f,
1555   0.00092223983f, 0.00098217216f, 0.0010459992f,  0.0011139742f,
1556   0.0011863665f,  0.0012634633f,  0.0013455702f,  0.0014330129f,
1557   0.0015261382f,  0.0016253153f,  0.0017309374f,  0.0018434235f,
1558   0.0019632195f,  0.0020908006f,  0.0022266726f,  0.0023713743f,
1559   0.0025254795f,  0.0026895994f,  0.0028643847f,  0.0030505286f,
1560   0.0032487691f,  0.0034598925f,  0.0036847358f,  0.0039241906f,
1561   0.0041792066f,  0.0044507950f,  0.0047400328f,  0.0050480668f,
1562   0.0053761186f,  0.0057254891f,  0.0060975636f,  0.0064938176f,
1563   0.0069158225f,  0.0073652516f,  0.0078438871f,  0.0083536271f,
1564   0.0088964928f,  0.009474637f,   0.010090352f,   0.010746080f,
1565   0.011444421f,   0.012188144f,   0.012980198f,   0.013823725f,
1566   0.014722068f,   0.015678791f,   0.016697687f,   0.017782797f,
1567   0.018938423f,   0.020169149f,   0.021479854f,   0.022875735f,
1568   0.024362330f,   0.025945531f,   0.027631618f,   0.029427276f,
1569   0.031339626f,   0.033376252f,   0.035545228f,   0.037855157f,
1570   0.040315199f,   0.042935108f,   0.045725273f,   0.048696758f,
1571   0.051861348f,   0.055231591f,   0.058820850f,   0.062643361f,
1572   0.066714279f,   0.071049749f,   0.075666962f,   0.080584227f,
1573   0.085821044f,   0.091398179f,   0.097337747f,   0.10366330f,
1574   0.11039993f,    0.11757434f,    0.12521498f,    0.13335215f,
1575   0.14201813f,    0.15124727f,    0.16107617f,    0.17154380f,
1576   0.18269168f,    0.19456402f,    0.20720788f,    0.22067342f,
1577   0.23501402f,    0.25028656f,    0.26655159f,    0.28387361f,
1578   0.30232132f,    0.32196786f,    0.34289114f,    0.36517414f,
1579   0.38890521f,    0.41417847f,    0.44109412f,    0.46975890f,
1580   0.50028648f,    0.53279791f,    0.56742212f,    0.60429640f,
1581   0.64356699f,    0.68538959f,    0.72993007f,    0.77736504f,
1582   0.82788260f,    0.88168307f,    0.9389798f,     1.0f
1583 ];
1584 
1585 
1586 // @OPTIMIZE: if you want to replace this bresenham line-drawing routine,
1587 // note that you must produce bit-identical output to decode correctly;
1588 // this specific sequence of operations is specified in the spec (it's
1589 // drawing integer-quantized frequency-space lines that the encoder
1590 // expects to be exactly the same)
1591 //     ... also, isn't the whole point of Bresenham's algorithm to NOT
1592 // have to divide in the setup? sigh.
1593 version(STB_VORBIS_NO_DEFER_FLOOR) {
1594   enum LINE_OP(string a, string b) = a~" = "~b~";";
1595 } else {
1596   enum LINE_OP(string a, string b) = a~" *= "~b~";";
1597 }
1598 
1599 version(STB_VORBIS_DIVIDE_TABLE) {
1600   enum DIVTAB_NUMER = 32;
1601   enum DIVTAB_DENOM = 64;
1602   byte[DIVTAB_DENOM][DIVTAB_NUMER] integer_divide_table; // 2KB
1603 }
1604 
1605 // nobranch abs trick
1606 enum ABS(string v) = q{(((${v})+((${v})>>31))^((${v})>>31))}.cmacroFixVars!"v"(v);
1607 
1608 // this is forceinline, but dmd inliner sux
1609 // but hey, i have my k00l macrosystem!
1610 //void draw_line (float* ${output}, int ${x0}, int ${y0}, int ${x1}, int ${y1}, int ${n})
1611 enum draw_line(string output, string x0, string y0, string x1, string y1, string n) = q{{
1612   int ${__temp_prefix__}dy = ${y1}-${y0};
1613   int ${__temp_prefix__}adx = ${x1}-${x0};
1614   int ${__temp_prefix__}ady = mixin(ABS!"${__temp_prefix__}dy");
1615   int ${__temp_prefix__}base;
1616   int ${__temp_prefix__}x = ${x0}, ${__temp_prefix__}y = ${y0};
1617   int ${__temp_prefix__}err = 0;
1618   int ${__temp_prefix__}sy;
1619 
1620   version(STB_VORBIS_DIVIDE_TABLE) {
1621     if (${__temp_prefix__}adx < DIVTAB_DENOM && ${__temp_prefix__}ady < DIVTAB_NUMER) {
1622       if (${__temp_prefix__}dy < 0) {
1623         ${__temp_prefix__}base = -integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1624         ${__temp_prefix__}sy = ${__temp_prefix__}base-1;
1625       } else {
1626         ${__temp_prefix__}base = integer_divide_table[${__temp_prefix__}ady].ptr[${__temp_prefix__}adx];
1627         ${__temp_prefix__}sy = ${__temp_prefix__}base+1;
1628       }
1629     } else {
1630       ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1631       ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1632     }
1633   } else {
1634     ${__temp_prefix__}base = ${__temp_prefix__}dy/${__temp_prefix__}adx;
1635     ${__temp_prefix__}sy = ${__temp_prefix__}base+(${__temp_prefix__}dy < 0 ? -1 : 1);
1636   }
1637   ${__temp_prefix__}ady -= mixin(ABS!"${__temp_prefix__}base")*${__temp_prefix__}adx;
1638   if (${x1} > ${n}) ${x1} = ${n};
1639   if (${__temp_prefix__}x < ${x1}) {
1640     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1641     for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1642       ${__temp_prefix__}err += ${__temp_prefix__}ady;
1643       if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1644         ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1645         ${__temp_prefix__}y += ${__temp_prefix__}sy;
1646       } else {
1647         ${__temp_prefix__}y += ${__temp_prefix__}base;
1648       }
1649       mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1650     }
1651   }
1652   /*
1653   mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1654   for (++${__temp_prefix__}x; ${__temp_prefix__}x < ${x1}; ++${__temp_prefix__}x) {
1655     ${__temp_prefix__}err += ${__temp_prefix__}ady;
1656     if (${__temp_prefix__}err >= ${__temp_prefix__}adx) {
1657       ${__temp_prefix__}err -= ${__temp_prefix__}adx;
1658       ${__temp_prefix__}y += ${__temp_prefix__}sy;
1659     } else {
1660       ${__temp_prefix__}y += ${__temp_prefix__}base;
1661     }
1662     mixin(LINE_OP!("${output}[${__temp_prefix__}x]", "inverse_db_table[${__temp_prefix__}y]"));
1663   }
1664   */
1665 }}.cmacroFixVars!("output", "x0", "y0", "x1", "y1", "n")(output, x0, y0, x1, y1, n);
1666 
1667 private int residue_decode (VorbisDecoder f, Codebook* book, float* target, int offset, int n, int rtype) {
1668   if (rtype == 0) {
1669     int step = n/book.dimensions;
1670     foreach (immutable k; 0..step) if (!codebook_decode_step(f, book, target+offset+k, n-offset-k, step)) return false;
1671   } else {
1672     for (int k = 0; k < n; ) {
1673       if (!codebook_decode(f, book, target+offset, n-k)) return false;
1674       k += book.dimensions;
1675       offset += book.dimensions;
1676     }
1677   }
1678   return true;
1679 }
1680 
1681 private void decode_residue (VorbisDecoder f, ref float*[STB_VORBIS_MAX_CHANNELS] residue_buffers, int ch, int n, int rn, ubyte* do_not_decode) {
1682   import core.stdc.stdlib : alloca;
1683   import core.stdc..string : memset;
1684 
1685   Residue* r = f.residue_config+rn;
1686   int rtype = f.residue_types.ptr[rn];
1687   int c = r.classbook;
1688   int classwords = f.codebooks[c].dimensions;
1689   int n_read = r.end-r.begin;
1690   int part_read = n_read/r.part_size;
1691   uint temp_alloc_point = temp_alloc_save(f);
1692   version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1693     int** classifications = cast(int**)mixin(temp_block_array!("f.vrchannels", "part_read*int.sizeof"));
1694   } else {
1695     ubyte*** part_classdata = cast(ubyte***)mixin(temp_block_array!("f.vrchannels", "part_read*cast(int)(ubyte*).sizeof"));
1696   }
1697 
1698   //stb_prof(2);
1699   foreach (immutable i; 0..ch) if (!do_not_decode[i]) memset(residue_buffers.ptr[i], 0, float.sizeof*n);
1700 
1701   if (rtype == 2 && ch != 1) {
1702     int j = void;
1703     for (j = 0; j < ch; ++j) if (!do_not_decode[j]) break;
1704     if (j == ch) goto done;
1705 
1706     //stb_prof(3);
1707     foreach (immutable pass; 0..8) {
1708       int pcount = 0, class_set = 0;
1709       if (ch == 2) {
1710         //stb_prof(13);
1711         while (pcount < part_read) {
1712           int z = r.begin+pcount*r.part_size;
1713           int c_inter = (z&1), p_inter = z>>1;
1714           if (pass == 0) {
1715             Codebook *cc = f.codebooks+r.classbook;
1716             int q;
1717             mixin(DECODE!("q", "cc"));
1718             if (q == EOP) goto done;
1719             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1720               for (int i = classwords-1; i >= 0; --i) {
1721                 classifications[0].ptr[i+pcount] = q%r.classifications;
1722                 q /= r.classifications;
1723               }
1724             } else {
1725               part_classdata[0][class_set] = r.classdata[q];
1726             }
1727           }
1728           //stb_prof(5);
1729           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1730             int zz = r.begin+pcount*r.part_size;
1731             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1732               int cc = classifications[0].ptr[pcount];
1733             } else {
1734               int cc = part_classdata[0][class_set][i];
1735             }
1736             int b = r.residue_books[cc].ptr[pass];
1737             if (b >= 0) {
1738               Codebook* book = f.codebooks+b;
1739               //stb_prof(20); // accounts for X time
1740               version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {
1741                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1742               } else {
1743                 // saves 1%
1744                 //if (!codebook_decode_deinterleave_repeat_2(f, book, residue_buffers, &c_inter, &p_inter, n, r.part_size)) goto done; // according to C source
1745                 if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1746               }
1747               //stb_prof(7);
1748             } else {
1749               zz += r.part_size;
1750               c_inter = zz&1;
1751               p_inter = zz>>1;
1752             }
1753           }
1754           //stb_prof(8);
1755           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1756             ++class_set;
1757           }
1758         }
1759       } else if (ch == 1) {
1760         while (pcount < part_read) {
1761           int z = r.begin+pcount*r.part_size;
1762           int c_inter = 0, p_inter = z;
1763           if (pass == 0) {
1764             Codebook* cc = f.codebooks+r.classbook;
1765             int q;
1766             mixin(DECODE!("q", "cc"));
1767             if (q == EOP) goto done;
1768             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1769               for (int i = classwords-1; i >= 0; --i) {
1770                 classifications[0].ptr[i+pcount] = q%r.classifications;
1771                 q /= r.classifications;
1772               }
1773             } else {
1774               part_classdata[0][class_set] = r.classdata[q];
1775             }
1776           }
1777           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1778             int zz = r.begin+pcount*r.part_size;
1779             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1780               int cc = classifications[0].ptr[pcount];
1781             } else {
1782               int cc = part_classdata[0][class_set][i];
1783             }
1784             int b = r.residue_books[cc].ptr[pass];
1785             if (b >= 0) {
1786               Codebook* book = f.codebooks+b;
1787               //stb_prof(22);
1788               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1789               //stb_prof(3);
1790             } else {
1791               zz += r.part_size;
1792               c_inter = 0;
1793               p_inter = zz;
1794             }
1795           }
1796           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1797             ++class_set;
1798           }
1799         }
1800       } else {
1801         while (pcount < part_read) {
1802           int z = r.begin+pcount*r.part_size;
1803           int c_inter = z%ch, p_inter = z/ch;
1804           if (pass == 0) {
1805             Codebook* cc = f.codebooks+r.classbook;
1806             int q;
1807             mixin(DECODE!("q", "cc"));
1808             if (q == EOP) goto done;
1809             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1810               for (int i = classwords-1; i >= 0; --i) {
1811                 classifications[0].ptr[i+pcount] = q%r.classifications;
1812                 q /= r.classifications;
1813               }
1814             } else {
1815               part_classdata[0][class_set] = r.classdata[q];
1816             }
1817           }
1818           for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1819             int zz = r.begin+pcount*r.part_size;
1820             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1821               int cc = classifications[0].ptr[pcount];
1822             } else {
1823               int cc = part_classdata[0][class_set][i];
1824             }
1825             int b = r.residue_books[cc].ptr[pass];
1826             if (b >= 0) {
1827               Codebook* book = f.codebooks+b;
1828               //stb_prof(22);
1829               if (!codebook_decode_deinterleave_repeat(f, book, residue_buffers, ch, &c_inter, &p_inter, n, r.part_size)) goto done;
1830               //stb_prof(3);
1831             } else {
1832               zz += r.part_size;
1833               c_inter = zz%ch;
1834               p_inter = zz/ch;
1835             }
1836           }
1837           version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1838             ++class_set;
1839           }
1840         }
1841       }
1842     }
1843     goto done;
1844   }
1845   //stb_prof(9);
1846 
1847   foreach (immutable pass; 0..8) {
1848     int pcount = 0, class_set=0;
1849     while (pcount < part_read) {
1850       if (pass == 0) {
1851         foreach (immutable j; 0..ch) {
1852           if (!do_not_decode[j]) {
1853             Codebook* cc = f.codebooks+r.classbook;
1854             int temp;
1855             mixin(DECODE!("temp", "cc"));
1856             if (temp == EOP) goto done;
1857             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1858               for (int i = classwords-1; i >= 0; --i) {
1859                 classifications[j].ptr[i+pcount] = temp%r.classifications;
1860                 temp /= r.classifications;
1861               }
1862             } else {
1863               part_classdata[j][class_set] = r.classdata[temp];
1864             }
1865           }
1866         }
1867       }
1868       for (int i = 0; i < classwords && pcount < part_read; ++i, ++pcount) {
1869         foreach (immutable j; 0..ch) {
1870           if (!do_not_decode[j]) {
1871             version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
1872               int cc = classifications[j].ptr[pcount];
1873             } else {
1874               int cc = part_classdata[j][class_set][i];
1875             }
1876             int b = r.residue_books[cc].ptr[pass];
1877             if (b >= 0) {
1878               float* target = residue_buffers.ptr[j];
1879               int offset = r.begin+pcount*r.part_size;
1880               int nn = r.part_size;
1881               Codebook* book = f.codebooks+b;
1882               if (!residue_decode(f, book, target, offset, nn, rtype)) goto done;
1883             }
1884           }
1885         }
1886       }
1887       version(STB_VORBIS_DIVIDES_IN_RESIDUE) {} else {
1888         ++class_set;
1889       }
1890     }
1891   }
1892  done:
1893   //stb_prof(0);
1894   version(STB_VORBIS_DIVIDES_IN_RESIDUE) temp_free(f, classifications); else temp_free(f, part_classdata);
1895   temp_alloc_restore(f, temp_alloc_point);
1896 }
1897 
1898 
1899 // the following were split out into separate functions while optimizing;
1900 // they could be pushed back up but eh. __forceinline showed no change;
1901 // they're probably already being inlined.
1902 private void imdct_step3_iter0_loop (int n, float* e, int i_off, int k_off, float* A) {
1903   float* ee0 = e+i_off;
1904   float* ee2 = ee0+k_off;
1905   debug(stb_vorbis) assert((n&3) == 0);
1906   foreach (immutable _; 0..n>>2) {
1907     float k00_20, k01_21;
1908     k00_20 = ee0[ 0]-ee2[ 0];
1909     k01_21 = ee0[-1]-ee2[-1];
1910     ee0[ 0] += ee2[ 0];//ee0[ 0] = ee0[ 0]+ee2[ 0];
1911     ee0[-1] += ee2[-1];//ee0[-1] = ee0[-1]+ee2[-1];
1912     ee2[ 0] = k00_20*A[0]-k01_21*A[1];
1913     ee2[-1] = k01_21*A[0]+k00_20*A[1];
1914     A += 8;
1915 
1916     k00_20 = ee0[-2]-ee2[-2];
1917     k01_21 = ee0[-3]-ee2[-3];
1918     ee0[-2] += ee2[-2];//ee0[-2] = ee0[-2]+ee2[-2];
1919     ee0[-3] += ee2[-3];//ee0[-3] = ee0[-3]+ee2[-3];
1920     ee2[-2] = k00_20*A[0]-k01_21*A[1];
1921     ee2[-3] = k01_21*A[0]+k00_20*A[1];
1922     A += 8;
1923 
1924     k00_20 = ee0[-4]-ee2[-4];
1925     k01_21 = ee0[-5]-ee2[-5];
1926     ee0[-4] += ee2[-4];//ee0[-4] = ee0[-4]+ee2[-4];
1927     ee0[-5] += ee2[-5];//ee0[-5] = ee0[-5]+ee2[-5];
1928     ee2[-4] = k00_20*A[0]-k01_21*A[1];
1929     ee2[-5] = k01_21*A[0]+k00_20*A[1];
1930     A += 8;
1931 
1932     k00_20 = ee0[-6]-ee2[-6];
1933     k01_21 = ee0[-7]-ee2[-7];
1934     ee0[-6] += ee2[-6];//ee0[-6] = ee0[-6]+ee2[-6];
1935     ee0[-7] += ee2[-7];//ee0[-7] = ee0[-7]+ee2[-7];
1936     ee2[-6] = k00_20*A[0]-k01_21*A[1];
1937     ee2[-7] = k01_21*A[0]+k00_20*A[1];
1938     A += 8;
1939     ee0 -= 8;
1940     ee2 -= 8;
1941   }
1942 }
1943 
1944 private void imdct_step3_inner_r_loop (int lim, float* e, int d0, int k_off, float* A, int k1) {
1945   float k00_20, k01_21;
1946   float* e0 = e+d0;
1947   float* e2 = e0+k_off;
1948   foreach (immutable _; 0..lim>>2) {
1949     k00_20 = e0[-0]-e2[-0];
1950     k01_21 = e0[-1]-e2[-1];
1951     e0[-0] += e2[-0];//e0[-0] = e0[-0]+e2[-0];
1952     e0[-1] += e2[-1];//e0[-1] = e0[-1]+e2[-1];
1953     e2[-0] = (k00_20)*A[0]-(k01_21)*A[1];
1954     e2[-1] = (k01_21)*A[0]+(k00_20)*A[1];
1955 
1956     A += k1;
1957 
1958     k00_20 = e0[-2]-e2[-2];
1959     k01_21 = e0[-3]-e2[-3];
1960     e0[-2] += e2[-2];//e0[-2] = e0[-2]+e2[-2];
1961     e0[-3] += e2[-3];//e0[-3] = e0[-3]+e2[-3];
1962     e2[-2] = (k00_20)*A[0]-(k01_21)*A[1];
1963     e2[-3] = (k01_21)*A[0]+(k00_20)*A[1];
1964 
1965     A += k1;
1966 
1967     k00_20 = e0[-4]-e2[-4];
1968     k01_21 = e0[-5]-e2[-5];
1969     e0[-4] += e2[-4];//e0[-4] = e0[-4]+e2[-4];
1970     e0[-5] += e2[-5];//e0[-5] = e0[-5]+e2[-5];
1971     e2[-4] = (k00_20)*A[0]-(k01_21)*A[1];
1972     e2[-5] = (k01_21)*A[0]+(k00_20)*A[1];
1973 
1974     A += k1;
1975 
1976     k00_20 = e0[-6]-e2[-6];
1977     k01_21 = e0[-7]-e2[-7];
1978     e0[-6] += e2[-6];//e0[-6] = e0[-6]+e2[-6];
1979     e0[-7] += e2[-7];//e0[-7] = e0[-7]+e2[-7];
1980     e2[-6] = (k00_20)*A[0]-(k01_21)*A[1];
1981     e2[-7] = (k01_21)*A[0]+(k00_20)*A[1];
1982 
1983     e0 -= 8;
1984     e2 -= 8;
1985 
1986     A += k1;
1987   }
1988 }
1989 
1990 private void imdct_step3_inner_s_loop (int n, float* e, int i_off, int k_off, float* A, int a_off, int k0) {
1991   float A0 = A[0];
1992   float A1 = A[0+1];
1993   float A2 = A[0+a_off];
1994   float A3 = A[0+a_off+1];
1995   float A4 = A[0+a_off*2+0];
1996   float A5 = A[0+a_off*2+1];
1997   float A6 = A[0+a_off*3+0];
1998   float A7 = A[0+a_off*3+1];
1999   float k00, k11;
2000   float *ee0 = e  +i_off;
2001   float *ee2 = ee0+k_off;
2002   foreach (immutable _; 0..n) {
2003     k00 = ee0[ 0]-ee2[ 0];
2004     k11 = ee0[-1]-ee2[-1];
2005     ee0[ 0] = ee0[ 0]+ee2[ 0];
2006     ee0[-1] = ee0[-1]+ee2[-1];
2007     ee2[ 0] = (k00)*A0-(k11)*A1;
2008     ee2[-1] = (k11)*A0+(k00)*A1;
2009 
2010     k00 = ee0[-2]-ee2[-2];
2011     k11 = ee0[-3]-ee2[-3];
2012     ee0[-2] = ee0[-2]+ee2[-2];
2013     ee0[-3] = ee0[-3]+ee2[-3];
2014     ee2[-2] = (k00)*A2-(k11)*A3;
2015     ee2[-3] = (k11)*A2+(k00)*A3;
2016 
2017     k00 = ee0[-4]-ee2[-4];
2018     k11 = ee0[-5]-ee2[-5];
2019     ee0[-4] = ee0[-4]+ee2[-4];
2020     ee0[-5] = ee0[-5]+ee2[-5];
2021     ee2[-4] = (k00)*A4-(k11)*A5;
2022     ee2[-5] = (k11)*A4+(k00)*A5;
2023 
2024     k00 = ee0[-6]-ee2[-6];
2025     k11 = ee0[-7]-ee2[-7];
2026     ee0[-6] = ee0[-6]+ee2[-6];
2027     ee0[-7] = ee0[-7]+ee2[-7];
2028     ee2[-6] = (k00)*A6-(k11)*A7;
2029     ee2[-7] = (k11)*A6+(k00)*A7;
2030 
2031     ee0 -= k0;
2032     ee2 -= k0;
2033   }
2034 }
2035 
2036 // this was forceinline
2037 //void iter_54(float *z)
2038 enum iter_54(string z) = q{{
2039   auto ${__temp_prefix__}z = (${z});
2040   float ${__temp_prefix__}k00, ${__temp_prefix__}k11, ${__temp_prefix__}k22, ${__temp_prefix__}k33;
2041   float ${__temp_prefix__}y0, ${__temp_prefix__}y1, ${__temp_prefix__}y2, ${__temp_prefix__}y3;
2042 
2043   ${__temp_prefix__}k00 = ${__temp_prefix__}z[ 0]-${__temp_prefix__}z[-4];
2044   ${__temp_prefix__}y0  = ${__temp_prefix__}z[ 0]+${__temp_prefix__}z[-4];
2045   ${__temp_prefix__}y2  = ${__temp_prefix__}z[-2]+${__temp_prefix__}z[-6];
2046   ${__temp_prefix__}k22 = ${__temp_prefix__}z[-2]-${__temp_prefix__}z[-6];
2047 
2048   ${__temp_prefix__}z[-0] = ${__temp_prefix__}y0+${__temp_prefix__}y2;   // z0+z4+z2+z6
2049   ${__temp_prefix__}z[-2] = ${__temp_prefix__}y0-${__temp_prefix__}y2;   // z0+z4-z2-z6
2050 
2051   // done with ${__temp_prefix__}y0, ${__temp_prefix__}y2
2052 
2053   ${__temp_prefix__}k33 = ${__temp_prefix__}z[-3]-${__temp_prefix__}z[-7];
2054 
2055   ${__temp_prefix__}z[-4] = ${__temp_prefix__}k00+${__temp_prefix__}k33; // z0-z4+z3-z7
2056   ${__temp_prefix__}z[-6] = ${__temp_prefix__}k00-${__temp_prefix__}k33; // z0-z4-z3+z7
2057 
2058   // done with ${__temp_prefix__}k33
2059 
2060   ${__temp_prefix__}k11 = ${__temp_prefix__}z[-1]-${__temp_prefix__}z[-5];
2061   ${__temp_prefix__}y1  = ${__temp_prefix__}z[-1]+${__temp_prefix__}z[-5];
2062   ${__temp_prefix__}y3  = ${__temp_prefix__}z[-3]+${__temp_prefix__}z[-7];
2063 
2064   ${__temp_prefix__}z[-1] = ${__temp_prefix__}y1+${__temp_prefix__}y3;   // z1+z5+z3+z7
2065   ${__temp_prefix__}z[-3] = ${__temp_prefix__}y1-${__temp_prefix__}y3;   // z1+z5-z3-z7
2066   ${__temp_prefix__}z[-5] = ${__temp_prefix__}k11-${__temp_prefix__}k22; // z1-z5+z2-z6
2067   ${__temp_prefix__}z[-7] = ${__temp_prefix__}k11+${__temp_prefix__}k22; // z1-z5-z2+z6
2068 }}.cmacroFixVars!"z"(z);
2069 
2070 private void imdct_step3_inner_s_loop_ld654 (int n, float* e, int i_off, float* A, int base_n) {
2071   int a_off = base_n>>3;
2072   float A2 = A[0+a_off];
2073   float* z = e+i_off;
2074   float* base = z-16*n;
2075   float k00, k11;
2076   while (z > base) {
2077     k00   = z[-0]-z[-8];
2078     k11   = z[-1]-z[-9];
2079     z[-0] = z[-0]+z[-8];
2080     z[-1] = z[-1]+z[-9];
2081     z[-8] =  k00;
2082     z[-9] =  k11;
2083 
2084     k00    = z[ -2]-z[-10];
2085     k11    = z[ -3]-z[-11];
2086     z[ -2] = z[ -2]+z[-10];
2087     z[ -3] = z[ -3]+z[-11];
2088     z[-10] = (k00+k11)*A2;
2089     z[-11] = (k11-k00)*A2;
2090 
2091     k00    = z[-12]-z[ -4];  // reverse to avoid a unary negation
2092     k11    = z[ -5]-z[-13];
2093     z[ -4] = z[ -4]+z[-12];
2094     z[ -5] = z[ -5]+z[-13];
2095     z[-12] = k11;
2096     z[-13] = k00;
2097 
2098     k00    = z[-14]-z[ -6];  // reverse to avoid a unary negation
2099     k11    = z[ -7]-z[-15];
2100     z[ -6] = z[ -6]+z[-14];
2101     z[ -7] = z[ -7]+z[-15];
2102     z[-14] = (k00+k11)*A2;
2103     z[-15] = (k00-k11)*A2;
2104 
2105     mixin(iter_54!"z");
2106     mixin(iter_54!"z-8");
2107     z -= 16;
2108   }
2109 }
2110 
2111 private void inverse_mdct (float* buffer, int n, VorbisDecoder f, int blocktype) {
2112   import core.stdc.stdlib : alloca;
2113 
2114   int n2 = n>>1, n4 = n>>2, n8 = n>>3, l;
2115   int ld;
2116   // @OPTIMIZE: reduce register pressure by using fewer variables?
2117   int save_point = temp_alloc_save(f);
2118   float *buf2;
2119   buf2 = cast(float*)mixin(temp_alloc!("n2*float.sizeof"));
2120   float *u = null, v = null;
2121   // twiddle factors
2122   float *A = f.A.ptr[blocktype];
2123 
2124   // IMDCT algorithm from "The use of multirate filter banks for coding of high quality digital audio"
2125   // See notes about bugs in that paper in less-optimal implementation 'inverse_mdct_old' after this function.
2126 
2127   // kernel from paper
2128 
2129 
2130   // merged:
2131   //   copy and reflect spectral data
2132   //   step 0
2133 
2134   // note that it turns out that the items added together during
2135   // this step are, in fact, being added to themselves (as reflected
2136   // by step 0). inexplicable inefficiency! this became obvious
2137   // once I combined the passes.
2138 
2139   // so there's a missing 'times 2' here (for adding X to itself).
2140   // this propogates through linearly to the end, where the numbers
2141   // are 1/2 too small, and need to be compensated for.
2142 
2143   {
2144     float* d, e, AA, e_stop;
2145     d = &buf2[n2-2];
2146     AA = A;
2147     e = &buffer[0];
2148     e_stop = &buffer[n2];
2149     while (e != e_stop) {
2150       d[1] = (e[0]*AA[0]-e[2]*AA[1]);
2151       d[0] = (e[0]*AA[1]+e[2]*AA[0]);
2152       d -= 2;
2153       AA += 2;
2154       e += 4;
2155     }
2156     e = &buffer[n2-3];
2157     while (d >= buf2) {
2158       d[1] = (-e[2]*AA[0]- -e[0]*AA[1]);
2159       d[0] = (-e[2]*AA[1]+ -e[0]*AA[0]);
2160       d -= 2;
2161       AA += 2;
2162       e -= 4;
2163     }
2164   }
2165 
2166   // now we use symbolic names for these, so that we can
2167   // possibly swap their meaning as we change which operations
2168   // are in place
2169 
2170   u = buffer;
2171   v = buf2;
2172 
2173   // step 2    (paper output is w, now u)
2174   // this could be in place, but the data ends up in the wrong
2175   // place... _somebody_'s got to swap it, so this is nominated
2176   {
2177     float* AA = &A[n2-8];
2178     float* d0, d1, e0, e1;
2179     e0 = &v[n4];
2180     e1 = &v[0];
2181     d0 = &u[n4];
2182     d1 = &u[0];
2183     while (AA >= A) {
2184       float v40_20, v41_21;
2185 
2186       v41_21 = e0[1]-e1[1];
2187       v40_20 = e0[0]-e1[0];
2188       d0[1]  = e0[1]+e1[1];
2189       d0[0]  = e0[0]+e1[0];
2190       d1[1]  = v41_21*AA[4]-v40_20*AA[5];
2191       d1[0]  = v40_20*AA[4]+v41_21*AA[5];
2192 
2193       v41_21 = e0[3]-e1[3];
2194       v40_20 = e0[2]-e1[2];
2195       d0[3]  = e0[3]+e1[3];
2196       d0[2]  = e0[2]+e1[2];
2197       d1[3]  = v41_21*AA[0]-v40_20*AA[1];
2198       d1[2]  = v40_20*AA[0]+v41_21*AA[1];
2199 
2200       AA -= 8;
2201 
2202       d0 += 4;
2203       d1 += 4;
2204       e0 += 4;
2205       e1 += 4;
2206     }
2207   }
2208 
2209   // step 3
2210   ld = ilog(n)-1; // ilog is off-by-one from normal definitions
2211 
2212   // optimized step 3:
2213 
2214   // the original step3 loop can be nested r inside s or s inside r;
2215   // it's written originally as s inside r, but this is dumb when r
2216   // iterates many times, and s few. So I have two copies of it and
2217   // switch between them halfway.
2218 
2219   // this is iteration 0 of step 3
2220   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*0, -(n>>3), A);
2221   imdct_step3_iter0_loop(n>>4, u, n2-1-n4*1, -(n>>3), A);
2222 
2223   // this is iteration 1 of step 3
2224   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*0, -(n>>4), A, 16);
2225   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*1, -(n>>4), A, 16);
2226   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*2, -(n>>4), A, 16);
2227   imdct_step3_inner_r_loop(n>>5, u, n2-1-n8*3, -(n>>4), A, 16);
2228 
2229   l = 2;
2230   for (; l < (ld-3)>>1; ++l) {
2231     int k0 = n>>(l+2), k0_2 = k0>>1;
2232     int lim = 1<<(l+1);
2233     foreach (int i; 0..lim) imdct_step3_inner_r_loop(n>>(l+4), u, n2-1-k0*i, -k0_2, A, 1<<(l+3));
2234   }
2235 
2236   for (; l < ld-6; ++l) {
2237     int k0 = n>>(l+2), k1 = 1<<(l+3), k0_2 = k0>>1;
2238     int rlim = n>>(l+6);
2239     int lim = 1<<(l+1);
2240     int i_off;
2241     float *A0 = A;
2242     i_off = n2-1;
2243     foreach (immutable _; 0..rlim) {
2244       imdct_step3_inner_s_loop(lim, u, i_off, -k0_2, A0, k1, k0);
2245       A0 += k1*4;
2246       i_off -= 8;
2247     }
2248   }
2249 
2250   // iterations with count:
2251   //   ld-6,-5,-4 all interleaved together
2252   //       the big win comes from getting rid of needless flops
2253   //         due to the constants on pass 5 & 4 being all 1 and 0;
2254   //       combining them to be simultaneous to improve cache made little difference
2255   imdct_step3_inner_s_loop_ld654(n>>5, u, n2-1, A, n);
2256 
2257   // output is u
2258 
2259   // step 4, 5, and 6
2260   // cannot be in-place because of step 5
2261   {
2262     ushort *bitrev = f.bit_reverse.ptr[blocktype];
2263     // weirdly, I'd have thought reading sequentially and writing
2264     // erratically would have been better than vice-versa, but in
2265     // fact that's not what my testing showed. (That is, with
2266     // j = bitreverse(i), do you read i and write j, or read j and write i.)
2267     float *d0 = &v[n4-4];
2268     float *d1 = &v[n2-4];
2269     int k4;
2270     while (d0 >= v) {
2271       k4 = bitrev[0];
2272       d1[3] = u[k4+0];
2273       d1[2] = u[k4+1];
2274       d0[3] = u[k4+2];
2275       d0[2] = u[k4+3];
2276 
2277       k4 = bitrev[1];
2278       d1[1] = u[k4+0];
2279       d1[0] = u[k4+1];
2280       d0[1] = u[k4+2];
2281       d0[0] = u[k4+3];
2282 
2283       d0 -= 4;
2284       d1 -= 4;
2285       bitrev += 2;
2286     }
2287   }
2288   // (paper output is u, now v)
2289 
2290 
2291   // data must be in buf2
2292   debug(stb_vorbis) assert(v == buf2);
2293 
2294   // step 7   (paper output is v, now v)
2295   // this is now in place
2296   {
2297     float a02, a11, b0, b1, b2, b3;
2298     float* C = f.C.ptr[blocktype];
2299     float* d, e;
2300     d = v;
2301     e = v+n2-4;
2302     while (d < e) {
2303       a02 = d[0]-e[2];
2304       a11 = d[1]+e[3];
2305 
2306       b0 = C[1]*a02+C[0]*a11;
2307       b1 = C[1]*a11-C[0]*a02;
2308 
2309       b2 = d[0]+e[ 2];
2310       b3 = d[1]-e[ 3];
2311 
2312       d[0] = b2+b0;
2313       d[1] = b3+b1;
2314       e[2] = b2-b0;
2315       e[3] = b1-b3;
2316 
2317       a02 = d[2]-e[0];
2318       a11 = d[3]+e[1];
2319 
2320       b0 = C[3]*a02+C[2]*a11;
2321       b1 = C[3]*a11-C[2]*a02;
2322 
2323       b2 = d[2]+e[ 0];
2324       b3 = d[3]-e[ 1];
2325 
2326       d[2] = b2+b0;
2327       d[3] = b3+b1;
2328       e[0] = b2-b0;
2329       e[1] = b1-b3;
2330 
2331       C += 4;
2332       d += 4;
2333       e -= 4;
2334     }
2335   }
2336 
2337   // data must be in buf2
2338 
2339 
2340   // step 8+decode   (paper output is X, now buffer)
2341   // this generates pairs of data a la 8 and pushes them directly through
2342   // the decode kernel (pushing rather than pulling) to avoid having
2343   // to make another pass later
2344 
2345   // this cannot POSSIBLY be in place, so we refer to the buffers directly
2346   {
2347     float p0, p1, p2, p3;
2348     float* d0, d1, d2, d3;
2349     float* B = f.B.ptr[blocktype]+n2-8;
2350     float* e = buf2+n2-8;
2351     d0 = &buffer[0];
2352     d1 = &buffer[n2-4];
2353     d2 = &buffer[n2];
2354     d3 = &buffer[n-4];
2355     while (e >= v) {
2356       p3 =  e[6]*B[7]-e[7]*B[6];
2357       p2 = -e[6]*B[6]-e[7]*B[7];
2358 
2359       d0[0] =   p3;
2360       d1[3] =  -p3;
2361       d2[0] =   p2;
2362       d3[3] =   p2;
2363 
2364       p1 =  e[4]*B[5]-e[5]*B[4];
2365       p0 = -e[4]*B[4]-e[5]*B[5];
2366 
2367       d0[1] =   p1;
2368       d1[2] = - p1;
2369       d2[1] =   p0;
2370       d3[2] =   p0;
2371 
2372       p3 =  e[2]*B[3]-e[3]*B[2];
2373       p2 = -e[2]*B[2]-e[3]*B[3];
2374 
2375       d0[2] =   p3;
2376       d1[1] = - p3;
2377       d2[2] =   p2;
2378       d3[1] =   p2;
2379 
2380       p1 =  e[0]*B[1]-e[1]*B[0];
2381       p0 = -e[0]*B[0]-e[1]*B[1];
2382 
2383       d0[3] =   p1;
2384       d1[0] = - p1;
2385       d2[3] =   p0;
2386       d3[0] =   p0;
2387 
2388       B -= 8;
2389       e -= 8;
2390       d0 += 4;
2391       d2 += 4;
2392       d1 -= 4;
2393       d3 -= 4;
2394     }
2395   }
2396 
2397   temp_free(f, buf2);
2398   temp_alloc_restore(f, save_point);
2399 }
2400 
2401 private float *get_window (VorbisDecoder f, int len) {
2402   len <<= 1;
2403   if (len == f.blocksize_0) return f.window.ptr[0];
2404   if (len == f.blocksize_1) return f.window.ptr[1];
2405   assert(0);
2406 }
2407 
2408 version(STB_VORBIS_NO_DEFER_FLOOR) {
2409   alias YTYPE = int;
2410 } else {
2411   alias YTYPE = short;
2412 }
2413 
2414 private int do_floor (VorbisDecoder f, Mapping* map, int i, int n, float* target, YTYPE* finalY, ubyte* step2_flag) {
2415   int n2 = n>>1;
2416   int s = map.chan[i].mux, floor;
2417   floor = map.submap_floor.ptr[s];
2418   if (f.floor_types.ptr[floor] == 0) {
2419     return error(f, STBVorbisError.invalid_stream);
2420   } else {
2421     Floor1* g = &f.floor_config[floor].floor1;
2422     int lx = 0, ly = finalY[0]*g.floor1_multiplier;
2423     foreach (immutable q; 1..g.values) {
2424       int j = g.sorted_order.ptr[q];
2425       version(STB_VORBIS_NO_DEFER_FLOOR) {
2426         auto cond = step2_flag[j];
2427       } else {
2428         auto cond = (finalY[j] >= 0);
2429       }
2430       if (cond) {
2431         int hy = finalY[j]*g.floor1_multiplier;
2432         int hx = g.Xlist.ptr[j];
2433         if (lx != hx) { mixin(draw_line!("target", "lx", "ly", "hx", "hy", "n2")); }
2434         lx = hx; ly = hy;
2435       }
2436     }
2437     if (lx < n2) {
2438       // optimization of: draw_line(target, lx, ly, n, ly, n2);
2439       foreach (immutable j; lx..n2) { mixin(LINE_OP!("target[j]", "inverse_db_table[ly]")); }
2440     }
2441   }
2442   return true;
2443 }
2444 
2445 // The meaning of "left" and "right"
2446 //
2447 // For a given frame:
2448 //     we compute samples from 0..n
2449 //     window_center is n/2
2450 //     we'll window and mix the samples from left_start to left_end with data from the previous frame
2451 //     all of the samples from left_end to right_start can be output without mixing; however,
2452 //        this interval is 0-length except when transitioning between short and long frames
2453 //     all of the samples from right_start to right_end need to be mixed with the next frame,
2454 //        which we don't have, so those get saved in a buffer
2455 //     frame N's right_end-right_start, the number of samples to mix with the next frame,
2456 //        has to be the same as frame N+1's left_end-left_start (which they are by
2457 //        construction)
2458 
2459 private int vorbis_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
2460   Mode *m;
2461   int i, n, prev, next, window_center;
2462   f.channel_buffer_start = f.channel_buffer_end = 0;
2463 
2464  retry:
2465   if (f.eof) return false;
2466   if (!maybe_start_packet(f)) return false;
2467   // check packet type
2468   if (get_bits!1(f) != 0) {
2469     /+if (f.push_mode) return error(f, STBVorbisError.bad_packet_type);+/
2470     while (EOP != get8_packet(f)) {}
2471     goto retry;
2472   }
2473 
2474   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2475 
2476   i = get_bits_main(f, ilog(f.mode_count-1));
2477   if (i == EOP) return false;
2478   if (i >= f.mode_count) return false;
2479   *mode = i;
2480   m = f.mode_config.ptr+i;
2481   if (m.blockflag) {
2482     n = f.blocksize_1;
2483     prev = get_bits!1(f);
2484     next = get_bits!1(f);
2485   } else {
2486     prev = next = 0;
2487     n = f.blocksize_0;
2488   }
2489 
2490   // WINDOWING
2491   window_center = n>>1;
2492   if (m.blockflag && !prev) {
2493     *p_left_start = (n-f.blocksize_0)>>2;
2494     *p_left_end   = (n+f.blocksize_0)>>2;
2495   } else {
2496     *p_left_start = 0;
2497     *p_left_end   = window_center;
2498   }
2499   if (m.blockflag && !next) {
2500     *p_right_start = (n*3-f.blocksize_0)>>2;
2501     *p_right_end   = (n*3+f.blocksize_0)>>2;
2502   } else {
2503     *p_right_start = window_center;
2504     *p_right_end   = n;
2505   }
2506   return true;
2507 }
2508 
2509 private int vorbis_decode_packet_rest (VorbisDecoder f, int* len, Mode* m, int left_start, int left_end, int right_start, int right_end, int* p_left) {
2510   import core.stdc..string : memcpy, memset;
2511 
2512   Mapping* map;
2513   int n, n2;
2514   int[256] zero_channel;
2515   int[256] really_zero_channel;
2516 
2517   // WINDOWING
2518   n = f.blocksize.ptr[m.blockflag];
2519   map = &f.mapping[m.mapping];
2520 
2521   // FLOORS
2522   n2 = n>>1;
2523 
2524   //stb_prof(1);
2525   foreach (immutable i; 0..f.vrchannels) {
2526     int s = map.chan[i].mux, floor;
2527     zero_channel[i] = false;
2528     floor = map.submap_floor.ptr[s];
2529     if (f.floor_types.ptr[floor] == 0) {
2530       return error(f, STBVorbisError.invalid_stream);
2531     } else {
2532       Floor1* g = &f.floor_config[floor].floor1;
2533       if (get_bits!1(f)) {
2534         short* finalY;
2535         ubyte[256] step2_flag = void;
2536         immutable int[4] range_list = [ 256, 128, 86, 64 ];
2537         int range = range_list[g.floor1_multiplier-1];
2538         int offset = 2;
2539         finalY = f.finalY.ptr[i];
2540         finalY[0] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2541         finalY[1] = cast(short)get_bits_main(f, ilog(range)-1); //k8
2542         foreach (immutable j; 0..g.partitions) {
2543           int pclass = g.partition_class_list.ptr[j];
2544           int cdim = g.class_dimensions.ptr[pclass];
2545           int cbits = g.class_subclasses.ptr[pclass];
2546           int csub = (1<<cbits)-1;
2547           int cval = 0;
2548           if (cbits) {
2549             Codebook *cc = f.codebooks+g.class_masterbooks.ptr[pclass];
2550             mixin(DECODE!("cval", "cc"));
2551           }
2552           foreach (immutable k; 0..cdim) {
2553             int book = g.subclass_books.ptr[pclass].ptr[cval&csub];
2554             cval = cval>>cbits;
2555             if (book >= 0) {
2556               int temp;
2557               Codebook *cc = f.codebooks+book;
2558               mixin(DECODE!("temp", "cc"));
2559               finalY[offset++] = cast(short)temp; //k8
2560             } else {
2561               finalY[offset++] = 0;
2562             }
2563           }
2564         }
2565         if (f.valid_bits == INVALID_BITS) goto error; // behavior according to spec
2566         step2_flag[0] = step2_flag[1] = 1;
2567         foreach (immutable j; 2..g.values) {
2568           int low = g.neighbors.ptr[j].ptr[0];
2569           int high = g.neighbors.ptr[j].ptr[1];
2570           //neighbors(g.Xlist, j, &low, &high);
2571           int pred = void;
2572           mixin(predict_point!("pred", "g.Xlist.ptr[j]", "g.Xlist.ptr[low]", "g.Xlist.ptr[high]", "finalY[low]", "finalY[high]"));
2573           int val = finalY[j];
2574           int highroom = range-pred;
2575           int lowroom = pred;
2576           auto room = (highroom < lowroom ? highroom : lowroom)*2;
2577           if (val) {
2578             step2_flag[low] = step2_flag[high] = 1;
2579             step2_flag[j] = 1;
2580             if (val >= room) {
2581               finalY[j] = cast(short)(highroom > lowroom ? val-lowroom+pred : pred-val+highroom-1); //k8
2582             } else {
2583               finalY[j] = cast(short)(val&1 ? pred-((val+1)>>1) : pred+(val>>1)); //k8
2584             }
2585           } else {
2586             step2_flag[j] = 0;
2587             finalY[j] = cast(short)pred; //k8
2588           }
2589         }
2590 
2591         version(STB_VORBIS_NO_DEFER_FLOOR) {
2592           do_floor(f, map, i, n, f.floor_buffers.ptr[i], finalY, step2_flag);
2593         } else {
2594           // defer final floor computation until _after_ residue
2595           foreach (immutable j; 0..g.values) if (!step2_flag[j]) finalY[j] = -1;
2596         }
2597       } else {
2598   error:
2599         zero_channel[i] = true;
2600       }
2601       // So we just defer everything else to later
2602       // at this point we've decoded the floor into buffer
2603     }
2604   }
2605   //stb_prof(0);
2606   // at this point we've decoded all floors
2607 
2608   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2609 
2610   // re-enable coupled channels if necessary
2611   memcpy(really_zero_channel.ptr, zero_channel.ptr, (really_zero_channel[0]).sizeof*f.vrchannels);
2612   foreach (immutable i; 0..map.coupling_steps) {
2613     if (!zero_channel[map.chan[i].magnitude] || !zero_channel[map.chan[i].angle]) {
2614       zero_channel[map.chan[i].magnitude] = zero_channel[map.chan[i].angle] = false;
2615     }
2616   }
2617 
2618   // RESIDUE DECODE
2619   foreach (immutable i; 0..map.submaps) {
2620     float*[STB_VORBIS_MAX_CHANNELS] residue_buffers;
2621     ubyte[256] do_not_decode = void;
2622     int ch = 0;
2623     foreach (immutable j; 0..f.vrchannels) {
2624       if (map.chan[j].mux == i) {
2625         if (zero_channel[j]) {
2626           do_not_decode[ch] = true;
2627           residue_buffers.ptr[ch] = null;
2628         } else {
2629           do_not_decode[ch] = false;
2630           residue_buffers.ptr[ch] = f.channel_buffers.ptr[j];
2631         }
2632         ++ch;
2633       }
2634     }
2635     int r = map.submap_residue.ptr[i];
2636     decode_residue(f, residue_buffers, ch, n2, r, do_not_decode.ptr);
2637   }
2638 
2639   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2640 
2641    // INVERSE COUPLING
2642   //stb_prof(14);
2643   foreach_reverse (immutable i; 0..map.coupling_steps) {
2644     int n2n = n>>1;
2645     float* mm = f.channel_buffers.ptr[map.chan[i].magnitude];
2646     float* a = f.channel_buffers.ptr[map.chan[i].angle];
2647     foreach (immutable j; 0..n2n) {
2648       float a2, m2;
2649       if (mm[j] > 0) {
2650         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]-a[j]; } else { a2 = mm[j]; m2 = mm[j]+a[j]; }
2651       } else {
2652         if (a[j] > 0) { m2 = mm[j]; a2 = mm[j]+a[j]; } else { a2 = mm[j]; m2 = mm[j]-a[j]; }
2653       }
2654       mm[j] = m2;
2655       a[j] = a2;
2656     }
2657   }
2658 
2659   // finish decoding the floors
2660   version(STB_VORBIS_NO_DEFER_FLOOR) {
2661     foreach (immutable i; 0..f.vrchannels) {
2662       if (really_zero_channel[i]) {
2663         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2664       } else {
2665         foreach (immutable j; 0..n2) f.channel_buffers.ptr[i].ptr[j] *= f.floor_buffers.ptr[i].ptr[j];
2666       }
2667     }
2668   } else {
2669     //stb_prof(15);
2670     foreach (immutable i; 0..f.vrchannels) {
2671       if (really_zero_channel[i]) {
2672         memset(f.channel_buffers.ptr[i], 0, (*f.channel_buffers.ptr[i]).sizeof*n2);
2673       } else {
2674         do_floor(f, map, i, n, f.channel_buffers.ptr[i], f.finalY.ptr[i], null);
2675       }
2676     }
2677   }
2678 
2679   // INVERSE MDCT
2680   //stb_prof(16);
2681   foreach (immutable i; 0..f.vrchannels) inverse_mdct(f.channel_buffers.ptr[i], n, f, m.blockflag);
2682   //stb_prof(0);
2683 
2684   // this shouldn't be necessary, unless we exited on an error
2685   // and want to flush to get to the next packet
2686   flush_packet(f);
2687 
2688   if (f.first_decode) {
2689     // assume we start so first non-discarded sample is sample 0
2690     // this isn't to spec, but spec would require us to read ahead
2691     // and decode the size of all current frames--could be done,
2692     // but presumably it's not a commonly used feature
2693     f.current_loc = -n2; // start of first frame is positioned for discard
2694     // we might have to discard samples "from" the next frame too,
2695     // if we're lapping a large block then a small at the start?
2696     f.discard_samples_deferred = n-right_end;
2697     f.current_loc_valid = true;
2698     f.first_decode = false;
2699   } else if (f.discard_samples_deferred) {
2700     if (f.discard_samples_deferred >= right_start-left_start) {
2701       f.discard_samples_deferred -= (right_start-left_start);
2702       left_start = right_start;
2703       *p_left = left_start;
2704     } else {
2705       left_start += f.discard_samples_deferred;
2706       *p_left = left_start;
2707       f.discard_samples_deferred = 0;
2708     }
2709   } else if (f.previous_length == 0 && f.current_loc_valid) {
2710     // we're recovering from a seek... that means we're going to discard
2711     // the samples from this packet even though we know our position from
2712     // the last page header, so we need to update the position based on
2713     // the discarded samples here
2714     // but wait, the code below is going to add this in itself even
2715     // on a discard, so we don't need to do it here...
2716   }
2717 
2718   // check if we have ogg information about the sample # for this packet
2719   if (f.last_seg_which == f.end_seg_with_known_loc) {
2720     // if we have a valid current loc, and this is final:
2721     if (f.current_loc_valid && (f.page_flag&PAGEFLAG_last_page)) {
2722       uint current_end = f.known_loc_for_packet-(n-right_end);
2723       // then let's infer the size of the (probably) short final frame
2724       if (current_end < f.current_loc+right_end) {
2725         if (current_end < f.current_loc+(right_end-left_start)) {
2726           // negative truncation, that's impossible!
2727           *len = 0;
2728         } else {
2729           *len = current_end-f.current_loc;
2730         }
2731         *len += left_start;
2732         if (*len > right_end) *len = right_end; // this should never happen
2733         f.current_loc += *len;
2734         return true;
2735       }
2736     }
2737     // otherwise, just set our sample loc
2738     // guess that the ogg granule pos refers to the _middle_ of the
2739     // last frame?
2740     // set f.current_loc to the position of left_start
2741     f.current_loc = f.known_loc_for_packet-(n2-left_start);
2742     f.current_loc_valid = true;
2743   }
2744   if (f.current_loc_valid) f.current_loc += (right_start-left_start);
2745 
2746   //debug(stb_vorbis) if (f.alloc.alloc_buffer) assert(f.alloc.alloc_buffer_length_in_bytes == f.temp_offset);
2747 
2748   *len = right_end;  // ignore samples after the window goes to 0
2749   return true;
2750 }
2751 
2752 private int vorbis_decode_packet (VorbisDecoder f, int* len, int* p_left, int* p_right) {
2753   int mode, left_end, right_end;
2754   if (!vorbis_decode_initial(f, p_left, &left_end, p_right, &right_end, &mode)) return 0;
2755   return vorbis_decode_packet_rest(f, len, f.mode_config.ptr+mode, *p_left, left_end, *p_right, right_end, p_left);
2756 }
2757 
2758 private int vorbis_finish_frame (VorbisDecoder f, int len, int left, int right) {
2759   // we use right&left (the start of the right- and left-window sin()-regions)
2760   // to determine how much to return, rather than inferring from the rules
2761   // (same result, clearer code); 'left' indicates where our sin() window
2762   // starts, therefore where the previous window's right edge starts, and
2763   // therefore where to start mixing from the previous buffer. 'right'
2764   // indicates where our sin() ending-window starts, therefore that's where
2765   // we start saving, and where our returned-data ends.
2766 
2767   // mixin from previous window
2768   if (f.previous_length) {
2769     int n = f.previous_length;
2770     float *w = get_window(f, n);
2771     foreach (immutable i; 0..f.vrchannels) {
2772       foreach (immutable j; 0..n) {
2773         (f.channel_buffers.ptr[i])[left+j] =
2774           (f.channel_buffers.ptr[i])[left+j]*w[    j]+
2775           (f.previous_window.ptr[i])[     j]*w[n-1-j];
2776       }
2777     }
2778   }
2779 
2780   auto prev = f.previous_length;
2781 
2782   // last half of this data becomes previous window
2783   f.previous_length = len-right;
2784 
2785   // @OPTIMIZE: could avoid this copy by double-buffering the
2786   // output (flipping previous_window with channel_buffers), but
2787   // then previous_window would have to be 2x as large, and
2788   // channel_buffers couldn't be temp mem (although they're NOT
2789   // currently temp mem, they could be (unless we want to level
2790   // performance by spreading out the computation))
2791   foreach (immutable i; 0..f.vrchannels) {
2792     for (uint j = 0; right+j < len; ++j) (f.previous_window.ptr[i])[j] = (f.channel_buffers.ptr[i])[right+j];
2793   }
2794 
2795   if (!prev) {
2796     // there was no previous packet, so this data isn't valid...
2797     // this isn't entirely true, only the would-have-overlapped data
2798     // isn't valid, but this seems to be what the spec requires
2799     return 0;
2800   }
2801 
2802   // truncate a short frame
2803   if (len < right) right = len;
2804 
2805   f.samples_output += right-left;
2806 
2807   return right-left;
2808 }
2809 
2810 private bool vorbis_pump_first_frame (VorbisDecoder f) {
2811   int len, right, left;
2812   if (vorbis_decode_packet(f, &len, &left, &right)) {
2813     vorbis_finish_frame(f, len, left, right);
2814     return true;
2815   }
2816   return false;
2817 }
2818 
2819 /+ k8: i don't need that, so it's dead
2820 private int is_whole_packet_present (VorbisDecoder f, int end_page) {
2821   import core.stdc.string : memcmp;
2822 
2823   // make sure that we have the packet available before continuing...
2824   // this requires a full ogg parse, but we know we can fetch from f.stream
2825 
2826   // instead of coding this out explicitly, we could save the current read state,
2827   // read the next packet with get8() until end-of-packet, check f.eof, then
2828   // reset the state? but that would be slower, esp. since we'd have over 256 bytes
2829   // of state to restore (primarily the page segment table)
2830 
2831   int s = f.next_seg, first = true;
2832   ubyte *p = f.stream;
2833 
2834   if (s != -1) { // if we're not starting the packet with a 'continue on next page' flag
2835     for (; s < f.segment_count; ++s) {
2836       p += f.segments[s];
2837       if (f.segments[s] < 255) break; // stop at first short segment
2838     }
2839     // either this continues, or it ends it...
2840     if (end_page && s < f.segment_count-1) return error(f, STBVorbisError.invalid_stream);
2841     if (s == f.segment_count) s = -1; // set 'crosses page' flag
2842     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2843     first = false;
2844   }
2845   while (s == -1) {
2846     ubyte* q = void;
2847     int n = void;
2848     // check that we have the page header ready
2849     if (p+26 >= f.stream_end) return error(f, STBVorbisError.need_more_data);
2850     // validate the page
2851     if (memcmp(p, ogg_page_header.ptr, 4)) return error(f, STBVorbisError.invalid_stream);
2852     if (p[4] != 0) return error(f, STBVorbisError.invalid_stream);
2853     if (first) { // the first segment must NOT have 'continued_packet', later ones MUST
2854       if (f.previous_length && (p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2855       // if no previous length, we're resynching, so we can come in on a continued-packet,
2856       // which we'll just drop
2857     } else {
2858       if (!(p[5]&PAGEFLAG_continued_packet)) return error(f, STBVorbisError.invalid_stream);
2859     }
2860     n = p[26]; // segment counts
2861     q = p+27; // q points to segment table
2862     p = q+n; // advance past header
2863     // make sure we've read the segment table
2864     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2865     for (s = 0; s < n; ++s) {
2866       p += q[s];
2867       if (q[s] < 255) break;
2868     }
2869     if (end_page && s < n-1) return error(f, STBVorbisError.invalid_stream);
2870     if (s == n) s = -1; // set 'crosses page' flag
2871     if (p > f.stream_end) return error(f, STBVorbisError.need_more_data);
2872     first = false;
2873   }
2874   return true;
2875 }
2876 +/
2877 
2878 private int start_decoder (VorbisDecoder f) {
2879   import core.stdc..string : memcpy, memset;
2880 
2881   ubyte[6] header;
2882   ubyte x, y;
2883   int len, max_submaps = 0;
2884   int longest_floorlist = 0;
2885 
2886   // first page, first packet
2887 
2888   if (!start_page(f)) return false;
2889   // validate page flag
2890   if (!(f.page_flag&PAGEFLAG_first_page)) return error(f, STBVorbisError.invalid_first_page);
2891   if (f.page_flag&PAGEFLAG_last_page) return error(f, STBVorbisError.invalid_first_page);
2892   if (f.page_flag&PAGEFLAG_continued_packet) return error(f, STBVorbisError.invalid_first_page);
2893   // check for expected packet length
2894   if (f.segment_count != 1) return error(f, STBVorbisError.invalid_first_page);
2895   if (f.segments[0] != 30) return error(f, STBVorbisError.invalid_first_page);
2896   // read packet
2897   // check packet header
2898   if (get8(f) != VorbisPacket.id) return error(f, STBVorbisError.invalid_first_page);
2899   if (!getn(f, header.ptr, 6)) return error(f, STBVorbisError.unexpected_eof);
2900   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_first_page);
2901   // vorbis_version
2902   if (get32(f) != 0) return error(f, STBVorbisError.invalid_first_page);
2903   f.vrchannels = get8(f); if (!f.vrchannels) return error(f, STBVorbisError.invalid_first_page);
2904   if (f.vrchannels > STB_VORBIS_MAX_CHANNELS) return error(f, STBVorbisError.too_many_channels);
2905   f.sample_rate = get32(f); if (!f.sample_rate) return error(f, STBVorbisError.invalid_first_page);
2906   get32(f); // bitrate_maximum
2907   get32(f); // bitrate_nominal
2908   get32(f); // bitrate_minimum
2909   x = get8(f);
2910   {
2911     int log0 = x&15;
2912     int log1 = x>>4;
2913     f.blocksize_0 = 1<<log0;
2914     f.blocksize_1 = 1<<log1;
2915     if (log0 < 6 || log0 > 13) return error(f, STBVorbisError.invalid_setup);
2916     if (log1 < 6 || log1 > 13) return error(f, STBVorbisError.invalid_setup);
2917     if (log0 > log1) return error(f, STBVorbisError.invalid_setup);
2918   }
2919 
2920   // framing_flag
2921   x = get8(f);
2922   if (!(x&1)) return error(f, STBVorbisError.invalid_first_page);
2923 
2924   // second packet! (comments)
2925   if (!start_page(f)) return false;
2926 
2927   // read comments
2928   if (!start_packet(f)) return false;
2929 
2930   if (f.read_comments) {
2931     /+if (f.push_mode) {
2932       if (!is_whole_packet_present(f, true)) {
2933         // convert error in ogg header to write type
2934         if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2935         return false;
2936       }
2937     }+/
2938     if (get8_packet(f) != VorbisPacket.comment) return error(f, STBVorbisError.invalid_setup);
2939     foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2940     if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
2941 
2942     // skip vendor id
2943     uint vidsize = get32_packet(f);
2944     //{ import core.stdc.stdio; printf("vendor size: %u\n", vidsize); }
2945     if (vidsize == EOP) return error(f, STBVorbisError.invalid_setup);
2946     while (vidsize--) get8_packet(f);
2947 
2948     // read comments section
2949     uint cmtcount = get32_packet(f);
2950     if (cmtcount == EOP) return error(f, STBVorbisError.invalid_setup);
2951     if (cmtcount > 0) {
2952       uint cmtsize = 32768; // this should be enough for everyone
2953       f.comment_data = setup_malloc!ubyte(f, cmtsize);
2954       if (f.comment_data is null) return error(f, STBVorbisError.outofmem);
2955       auto cmtpos = 0;
2956       auto d = f.comment_data;
2957       while (cmtcount--) {
2958         uint linelen = get32_packet(f);
2959         //{ import core.stdc.stdio; printf("linelen: %u; lines left: %u\n", linelen, cmtcount); }
2960         if (linelen == EOP || linelen > ushort.max-2) break;
2961         if (linelen == 0) { continue; }
2962         if (cmtpos+2+linelen > cmtsize) break;
2963         cmtpos += linelen+2;
2964         *d++ = (linelen+2)&0xff;
2965         *d++ = ((linelen+2)>>8)&0xff;
2966         while (linelen--) {
2967           auto b = get8_packet(f);
2968           if (b == EOP) return error(f, STBVorbisError.outofmem);
2969           *d++ = cast(ubyte)b;
2970         }
2971         //{ import core.stdc.stdio; printf("%u bytes of comments read\n", cmtpos); }
2972         f.comment_size = cmtpos;
2973       }
2974     }
2975     flush_packet(f);
2976     f.comment_rewind();
2977   } else {
2978     // skip comments
2979     do {
2980       len = next_segment(f);
2981       skip(f, len);
2982       f.bytes_in_seg = 0;
2983     } while (len);
2984   }
2985 
2986   // third packet!
2987   if (!start_packet(f)) return false;
2988 
2989   /+if (f.push_mode) {
2990     if (!is_whole_packet_present(f, true)) {
2991       // convert error in ogg header to write type
2992       if (f.error == STBVorbisError.invalid_stream) f.error = STBVorbisError.invalid_setup;
2993       return false;
2994     }
2995   }+/
2996 
2997   if (get8_packet(f) != VorbisPacket.setup) return error(f, STBVorbisError.invalid_setup);
2998   foreach (immutable i; 0..6) header[i] = cast(ubyte)get8_packet(f); //k8
2999   if (!vorbis_validate(header.ptr)) return error(f, STBVorbisError.invalid_setup);
3000 
3001   // codebooks
3002   f.codebook_count = get_bits!8(f)+1;
3003   f.codebooks = setup_malloc!Codebook(f, f.codebook_count);
3004   static assert((*f.codebooks).sizeof == Codebook.sizeof);
3005   if (f.codebooks is null) return error(f, STBVorbisError.outofmem);
3006   memset(f.codebooks, 0, (*f.codebooks).sizeof*f.codebook_count);
3007   foreach (immutable i; 0..f.codebook_count) {
3008     uint* values;
3009     int ordered, sorted_count;
3010     int total = 0;
3011     ubyte* lengths;
3012     Codebook* c = f.codebooks+i;
3013     x = get_bits!8(f); if (x != 0x42) return error(f, STBVorbisError.invalid_setup);
3014     x = get_bits!8(f); if (x != 0x43) return error(f, STBVorbisError.invalid_setup);
3015     x = get_bits!8(f); if (x != 0x56) return error(f, STBVorbisError.invalid_setup);
3016     x = get_bits!8(f);
3017     c.dimensions = (get_bits!8(f)<<8)+x;
3018     x = get_bits!8(f);
3019     y = get_bits!8(f);
3020     c.entries = (get_bits!8(f)<<16)+(y<<8)+x;
3021     ordered = get_bits!1(f);
3022     c.sparse = (ordered ? 0 : get_bits!1(f));
3023 
3024     if (c.dimensions == 0 && c.entries != 0) return error(f, STBVorbisError.invalid_setup);
3025 
3026     if (c.sparse) {
3027       lengths = cast(ubyte*)setup_temp_malloc(f, c.entries);
3028     } else {
3029       lengths = c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3030     }
3031 
3032     if (lengths is null) return error(f, STBVorbisError.outofmem);
3033 
3034     if (ordered) {
3035       int current_entry = 0;
3036       int current_length = get_bits_add_no!5(f, 1);
3037       while (current_entry < c.entries) {
3038         int limit = c.entries-current_entry;
3039         int n = get_bits_main(f, ilog(limit));
3040         if (current_entry+n > cast(int)c.entries) return error(f, STBVorbisError.invalid_setup);
3041         memset(lengths+current_entry, current_length, n);
3042         current_entry += n;
3043         ++current_length;
3044       }
3045     } else {
3046       foreach (immutable j; 0..c.entries) {
3047         int present = (c.sparse ? get_bits!1(f) : 1);
3048         if (present) {
3049           lengths[j] = get_bits_add_no!5(f, 1);
3050           ++total;
3051           if (lengths[j] == 32) return error(f, STBVorbisError.invalid_setup);
3052         } else {
3053           lengths[j] = NO_CODE;
3054         }
3055       }
3056     }
3057 
3058     if (c.sparse && total >= c.entries>>2) {
3059       // convert sparse items to non-sparse!
3060       if (c.entries > cast(int)f.setup_temp_memory_required) f.setup_temp_memory_required = c.entries;
3061       c.codeword_lengths = setup_malloc!ubyte(f, c.entries);
3062       if (c.codeword_lengths is null) return error(f, STBVorbisError.outofmem);
3063       memcpy(c.codeword_lengths, lengths, c.entries);
3064       setup_temp_free(f, lengths, c.entries); // note this is only safe if there have been no intervening temp mallocs!
3065       lengths = c.codeword_lengths;
3066       c.sparse = 0;
3067     }
3068 
3069     // compute the size of the sorted tables
3070     if (c.sparse) {
3071       sorted_count = total;
3072     } else {
3073       sorted_count = 0;
3074       version(STB_VORBIS_NO_HUFFMAN_BINARY_SEARCH) {} else {
3075         foreach (immutable j; 0..c.entries) if (lengths[j] > STB_VORBIS_FAST_HUFFMAN_LENGTH && lengths[j] != NO_CODE) ++sorted_count;
3076       }
3077     }
3078 
3079     c.sorted_entries = sorted_count;
3080     values = null;
3081 
3082     if (!c.sparse) {
3083       c.codewords = setup_malloc!uint(f, c.entries);
3084       if (!c.codewords) return error(f, STBVorbisError.outofmem);
3085     } else {
3086       if (c.sorted_entries) {
3087         c.codeword_lengths = setup_malloc!ubyte(f, c.sorted_entries);
3088         if (!c.codeword_lengths) return error(f, STBVorbisError.outofmem);
3089         c.codewords = cast(uint*)setup_temp_malloc(f, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3090         if (!c.codewords) return error(f, STBVorbisError.outofmem);
3091         values = cast(uint*)setup_temp_malloc(f, cast(int)(*values).sizeof*c.sorted_entries);
3092         if (!values) return error(f, STBVorbisError.outofmem);
3093       }
3094       uint size = c.entries+cast(int)((*c.codewords).sizeof+(*values).sizeof)*c.sorted_entries;
3095       if (size > f.setup_temp_memory_required) f.setup_temp_memory_required = size;
3096     }
3097 
3098     if (!compute_codewords(c, lengths, c.entries, values)) {
3099       if (c.sparse) setup_temp_free(f, values, 0);
3100       return error(f, STBVorbisError.invalid_setup);
3101     }
3102 
3103     if (c.sorted_entries) {
3104       // allocate an extra slot for sentinels
3105       c.sorted_codewords = setup_malloc!uint(f, c.sorted_entries+1);
3106       if (c.sorted_codewords is null) return error(f, STBVorbisError.outofmem);
3107       // allocate an extra slot at the front so that c.sorted_values[-1] is defined
3108       // so that we can catch that case without an extra if
3109       c.sorted_values = setup_malloc!int(f, c.sorted_entries+1);
3110       if (c.sorted_values is null) return error(f, STBVorbisError.outofmem);
3111       ++c.sorted_values;
3112       c.sorted_values[-1] = -1;
3113       compute_sorted_huffman(c, lengths, values);
3114     }
3115 
3116     if (c.sparse) {
3117       setup_temp_free(f, values, cast(int)(*values).sizeof*c.sorted_entries);
3118       setup_temp_free(f, c.codewords, cast(int)(*c.codewords).sizeof*c.sorted_entries);
3119       setup_temp_free(f, lengths, c.entries);
3120       c.codewords = null;
3121     }
3122 
3123     compute_accelerated_huffman(c);
3124 
3125     c.lookup_type = get_bits!4(f);
3126     if (c.lookup_type > 2) return error(f, STBVorbisError.invalid_setup);
3127     if (c.lookup_type > 0) {
3128       ushort* mults;
3129       c.minimum_value = float32_unpack(get_bits!32(f));
3130       c.delta_value = float32_unpack(get_bits!32(f));
3131       c.value_bits = get_bits_add_no!4(f, 1);
3132       c.sequence_p = get_bits!1(f);
3133       if (c.lookup_type == 1) {
3134         c.lookup_values = lookup1_values(c.entries, c.dimensions);
3135       } else {
3136         c.lookup_values = c.entries*c.dimensions;
3137       }
3138       if (c.lookup_values == 0) return error(f, STBVorbisError.invalid_setup);
3139       mults = cast(ushort*)setup_temp_malloc(f, cast(int)(mults[0]).sizeof*c.lookup_values);
3140       if (mults is null) return error(f, STBVorbisError.outofmem);
3141       foreach (immutable j; 0..cast(int)c.lookup_values) {
3142         int q = get_bits_main(f, c.value_bits);
3143         if (q == EOP) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.invalid_setup); }
3144         mults[j] = cast(ushort)q; //k8
3145       }
3146 
3147       version(STB_VORBIS_DIVIDES_IN_CODEBOOK) {} else {
3148         if (c.lookup_type == 1) {
3149           int sparse = c.sparse; //len
3150           float last = 0;
3151           // pre-expand the lookup1-style multiplicands, to avoid a divide in the inner loop
3152           if (sparse) {
3153             if (c.sorted_entries == 0) goto skip;
3154             c.multiplicands = setup_malloc!codetype(f, c.sorted_entries*c.dimensions);
3155           } else {
3156             c.multiplicands = setup_malloc!codetype(f, c.entries*c.dimensions);
3157           }
3158           if (c.multiplicands is null) { setup_temp_free(f, mults, cast(int)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3159           foreach (immutable j; 0..(sparse ? c.sorted_entries : c.entries)) {
3160             uint z = (sparse ? c.sorted_values[j] : j);
3161             uint div = 1;
3162             foreach (immutable k; 0..c.dimensions) {
3163               int off = (z/div)%c.lookup_values;
3164               float val = mults[off];
3165               val = val*c.delta_value+c.minimum_value+last;
3166               c.multiplicands[j*c.dimensions+k] = val;
3167               if (c.sequence_p) last = val;
3168               if (k+1 < c.dimensions) {
3169                  if (div > uint.max/cast(uint)c.lookup_values) {
3170                     setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3171                     return error(f, STBVorbisError.invalid_setup);
3172                  }
3173                  div *= c.lookup_values;
3174               }
3175             }
3176           }
3177           c.lookup_type = 2;
3178           goto skip;
3179         }
3180         //else
3181       }
3182       {
3183         float last = 0;
3184         c.multiplicands = setup_malloc!codetype(f, c.lookup_values);
3185         if (c.multiplicands is null) { setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values); return error(f, STBVorbisError.outofmem); }
3186         foreach (immutable j; 0..cast(int)c.lookup_values) {
3187           float val = mults[j]*c.delta_value+c.minimum_value+last;
3188           c.multiplicands[j] = val;
3189           if (c.sequence_p) last = val;
3190         }
3191       }
3192      //version(STB_VORBIS_DIVIDES_IN_CODEBOOK)
3193      skip: // this is versioned out in C
3194       setup_temp_free(f, mults, cast(uint)(mults[0]).sizeof*c.lookup_values);
3195     }
3196   }
3197 
3198   // time domain transfers (notused)
3199   x = get_bits_add_no!6(f, 1);
3200   foreach (immutable i; 0..x) {
3201     auto z = get_bits!16(f);
3202     if (z != 0) return error(f, STBVorbisError.invalid_setup);
3203   }
3204 
3205   // Floors
3206   f.floor_count = get_bits_add_no!6(f, 1);
3207   f.floor_config = setup_malloc!Floor(f, f.floor_count);
3208   if (f.floor_config is null) return error(f, STBVorbisError.outofmem);
3209   foreach (immutable i; 0..f.floor_count) {
3210     f.floor_types[i] = get_bits!16(f);
3211     if (f.floor_types[i] > 1) return error(f, STBVorbisError.invalid_setup);
3212     if (f.floor_types[i] == 0) {
3213       Floor0* g = &f.floor_config[i].floor0;
3214       g.order = get_bits!8(f);
3215       g.rate = get_bits!16(f);
3216       g.bark_map_size = get_bits!16(f);
3217       g.amplitude_bits = get_bits!6(f);
3218       g.amplitude_offset = get_bits!8(f);
3219       g.number_of_books = get_bits_add_no!4(f, 1);
3220       foreach (immutable j; 0..g.number_of_books) g.book_list[j] = get_bits!8(f);
3221       return error(f, STBVorbisError.feature_not_supported);
3222     } else {
3223       Point[31*8+2] p;
3224       Floor1 *g = &f.floor_config[i].floor1;
3225       int max_class = -1;
3226       g.partitions = get_bits!5(f);
3227       foreach (immutable j; 0..g.partitions) {
3228         g.partition_class_list[j] = get_bits!4(f);
3229         if (g.partition_class_list[j] > max_class) max_class = g.partition_class_list[j];
3230       }
3231       foreach (immutable j; 0..max_class+1) {
3232         g.class_dimensions[j] = get_bits_add_no!3(f, 1);
3233         g.class_subclasses[j] = get_bits!2(f);
3234         if (g.class_subclasses[j]) {
3235           g.class_masterbooks[j] = get_bits!8(f);
3236           if (g.class_masterbooks[j] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3237         }
3238         foreach (immutable k; 0..1<<g.class_subclasses[j]) {
3239           g.subclass_books[j].ptr[k] = get_bits!8(f)-1;
3240           if (g.subclass_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3241         }
3242       }
3243       g.floor1_multiplier = get_bits_add_no!2(f, 1);
3244       g.rangebits = get_bits!4(f);
3245       g.Xlist[0] = 0;
3246       g.Xlist[1] = cast(ushort)(1<<g.rangebits); //k8
3247       g.values = 2;
3248       foreach (immutable j; 0..g.partitions) {
3249         int c = g.partition_class_list[j];
3250         foreach (immutable k; 0..g.class_dimensions[c]) {
3251           g.Xlist[g.values] = cast(ushort)get_bits_main(f, g.rangebits); //k8
3252           ++g.values;
3253         }
3254       }
3255       assert(g.values <= ushort.max);
3256       // precompute the sorting
3257       foreach (ushort j; 0..cast(ushort)g.values) {
3258         p[j].x = g.Xlist[j];
3259         p[j].y = j;
3260       }
3261       qsort(p.ptr, g.values, (p[0]).sizeof, &point_compare);
3262       foreach (uint j; 0..g.values) g.sorted_order.ptr[j] = cast(ubyte)p.ptr[j].y;
3263       // precompute the neighbors
3264       foreach (uint j; 2..g.values) {
3265         ushort low = void, hi = void;
3266         neighbors(g.Xlist.ptr, j, &low, &hi);
3267         assert(low <= ubyte.max);
3268         assert(hi <= ubyte.max);
3269         g.neighbors[j].ptr[0] = cast(ubyte)low;
3270         g.neighbors[j].ptr[1] = cast(ubyte)hi;
3271       }
3272       if (g.values > longest_floorlist) longest_floorlist = g.values;
3273     }
3274   }
3275 
3276   // Residue
3277   f.residue_count = get_bits_add_no!6(f, 1);
3278   f.residue_config = setup_malloc!Residue(f, f.residue_count);
3279   if (f.residue_config is null) return error(f, STBVorbisError.outofmem);
3280   memset(f.residue_config, 0, f.residue_count*(f.residue_config[0]).sizeof);
3281   foreach (immutable i; 0..f.residue_count) {
3282     ubyte[64] residue_cascade;
3283     Residue* r = f.residue_config+i;
3284     f.residue_types[i] = get_bits!16(f);
3285     if (f.residue_types[i] > 2) return error(f, STBVorbisError.invalid_setup);
3286     r.begin = get_bits!24(f);
3287     r.end = get_bits!24(f);
3288     if (r.end < r.begin) return error(f, STBVorbisError.invalid_setup);
3289     r.part_size = get_bits_add_no!24(f, 1);
3290     r.classifications = get_bits_add_no!6(f, 1);
3291     r.classbook = get_bits!8(f);
3292     if (r.classbook >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3293     foreach (immutable j; 0..r.classifications) {
3294       ubyte high_bits = 0;
3295       ubyte low_bits = get_bits!3(f);
3296       if (get_bits!1(f)) high_bits = get_bits!5(f);
3297       assert(high_bits*8+low_bits <= ubyte.max);
3298       residue_cascade[j] = cast(ubyte)(high_bits*8+low_bits);
3299     }
3300     static assert(r.residue_books[0].sizeof == 16);
3301     r.residue_books = setup_malloc!(short[8])(f, r.classifications);
3302     if (r.residue_books is null) return error(f, STBVorbisError.outofmem);
3303     foreach (immutable j; 0..r.classifications) {
3304       foreach (immutable k; 0..8) {
3305         if (residue_cascade[j]&(1<<k)) {
3306           r.residue_books[j].ptr[k] = get_bits!8(f);
3307           if (r.residue_books[j].ptr[k] >= f.codebook_count) return error(f, STBVorbisError.invalid_setup);
3308         } else {
3309           r.residue_books[j].ptr[k] = -1;
3310         }
3311       }
3312     }
3313     // precompute the classifications[] array to avoid inner-loop mod/divide
3314     // call it 'classdata' since we already have r.classifications
3315     r.classdata = setup_malloc!(ubyte*)(f, f.codebooks[r.classbook].entries);
3316     if (!r.classdata) return error(f, STBVorbisError.outofmem);
3317     memset(r.classdata, 0, (*r.classdata).sizeof*f.codebooks[r.classbook].entries);
3318     foreach (immutable j; 0..f.codebooks[r.classbook].entries) {
3319       int classwords = f.codebooks[r.classbook].dimensions;
3320       int temp = j;
3321       r.classdata[j] = setup_malloc!ubyte(f, classwords);
3322       if (r.classdata[j] is null) return error(f, STBVorbisError.outofmem);
3323       foreach_reverse (immutable k; 0..classwords) {
3324         assert(temp%r.classifications >= 0 && temp%r.classifications <= ubyte.max);
3325         r.classdata[j][k] = cast(ubyte)(temp%r.classifications);
3326         temp /= r.classifications;
3327       }
3328     }
3329   }
3330 
3331   f.mapping_count = get_bits_add_no!6(f, 1);
3332   f.mapping = setup_malloc!Mapping(f, f.mapping_count);
3333   if (f.mapping is null) return error(f, STBVorbisError.outofmem);
3334   memset(f.mapping, 0, f.mapping_count*(*f.mapping).sizeof);
3335   foreach (immutable i; 0..f.mapping_count) {
3336     Mapping* m = f.mapping+i;
3337     int mapping_type = get_bits!16(f);
3338     if (mapping_type != 0) return error(f, STBVorbisError.invalid_setup);
3339     m.chan = setup_malloc!MappingChannel(f, f.vrchannels);
3340     if (m.chan is null) return error(f, STBVorbisError.outofmem);
3341     m.submaps = (get_bits!1(f) ? get_bits_add_no!4(f, 1) : 1);
3342     if (m.submaps > max_submaps) max_submaps = m.submaps;
3343     if (get_bits!1(f)) {
3344       m.coupling_steps = get_bits_add_no!8(f, 1);
3345       foreach (immutable k; 0..m.coupling_steps) {
3346         m.chan[k].magnitude = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3347         m.chan[k].angle = cast(ubyte)get_bits_main(f, ilog(f.vrchannels-1)); //k8
3348         if (m.chan[k].magnitude >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3349         if (m.chan[k].angle     >= f.vrchannels) return error(f, STBVorbisError.invalid_setup);
3350         if (m.chan[k].magnitude == m.chan[k].angle) return error(f, STBVorbisError.invalid_setup);
3351       }
3352     } else {
3353       m.coupling_steps = 0;
3354     }
3355 
3356     // reserved field
3357     if (get_bits!2(f)) return error(f, STBVorbisError.invalid_setup);
3358     if (m.submaps > 1) {
3359       foreach (immutable j; 0..f.vrchannels) {
3360         m.chan[j].mux = get_bits!4(f);
3361         if (m.chan[j].mux >= m.submaps) return error(f, STBVorbisError.invalid_setup);
3362       }
3363     } else {
3364       // @SPECIFICATION: this case is missing from the spec
3365       foreach (immutable j; 0..f.vrchannels) m.chan[j].mux = 0;
3366     }
3367     foreach (immutable j; 0..m.submaps) {
3368       get_bits!8(f); // discard
3369       m.submap_floor[j] = get_bits!8(f);
3370       m.submap_residue[j] = get_bits!8(f);
3371       if (m.submap_floor[j] >= f.floor_count) return error(f, STBVorbisError.invalid_setup);
3372       if (m.submap_residue[j] >= f.residue_count) return error(f, STBVorbisError.invalid_setup);
3373     }
3374   }
3375 
3376   // Modes
3377   f.mode_count = get_bits_add_no!6(f, 1);
3378   foreach (immutable i; 0..f.mode_count) {
3379     Mode* m = f.mode_config.ptr+i;
3380     m.blockflag = get_bits!1(f);
3381     m.windowtype = get_bits!16(f);
3382     m.transformtype = get_bits!16(f);
3383     m.mapping = get_bits!8(f);
3384     if (m.windowtype != 0) return error(f, STBVorbisError.invalid_setup);
3385     if (m.transformtype != 0) return error(f, STBVorbisError.invalid_setup);
3386     if (m.mapping >= f.mapping_count) return error(f, STBVorbisError.invalid_setup);
3387   }
3388 
3389   flush_packet(f);
3390 
3391   f.previous_length = 0;
3392 
3393   foreach (immutable i; 0..f.vrchannels) {
3394     f.channel_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1);
3395     f.previous_window.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3396     f.finalY.ptr[i]          = setup_malloc!short(f, longest_floorlist);
3397     if (f.channel_buffers.ptr[i] is null || f.previous_window.ptr[i] is null || f.finalY.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3398     version(STB_VORBIS_NO_DEFER_FLOOR) {
3399       f.floor_buffers.ptr[i] = setup_malloc!float(f, f.blocksize_1/2);
3400       if (f.floor_buffers.ptr[i] is null) return error(f, STBVorbisError.outofmem);
3401     }
3402   }
3403 
3404   if (!init_blocksize(f, 0, f.blocksize_0)) return false;
3405   if (!init_blocksize(f, 1, f.blocksize_1)) return false;
3406   f.blocksize.ptr[0] = f.blocksize_0;
3407   f.blocksize.ptr[1] = f.blocksize_1;
3408 
3409   version(STB_VORBIS_DIVIDE_TABLE) {
3410     if (integer_divide_table[1].ptr[1] == 0) {
3411       foreach (immutable i; 0..DIVTAB_NUMER) foreach (immutable j; 1..DIVTAB_DENOM) integer_divide_table[i].ptr[j] = i/j;
3412     }
3413   }
3414 
3415   // compute how much temporary memory is needed
3416 
3417   // 1.
3418   {
3419     uint imdct_mem = (f.blocksize_1*cast(uint)(float).sizeof>>1);
3420     uint classify_mem;
3421     int max_part_read = 0;
3422     foreach (immutable i; 0..f.residue_count) {
3423       Residue* r = f.residue_config+i;
3424       int n_read = r.end-r.begin;
3425       int part_read = n_read/r.part_size;
3426       if (part_read > max_part_read) max_part_read = part_read;
3427     }
3428     version(STB_VORBIS_DIVIDES_IN_RESIDUE) {
3429       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(int*).sizeof);
3430     } else {
3431       classify_mem = f.vrchannels*cast(uint)((void*).sizeof+max_part_read*(ubyte*).sizeof);
3432     }
3433     f.temp_memory_required = classify_mem;
3434     if (imdct_mem > f.temp_memory_required) f.temp_memory_required = imdct_mem;
3435   }
3436 
3437   f.first_decode = true;
3438 
3439   /+
3440   if (f.alloc.alloc_buffer) {
3441     debug(stb_vorbis) assert(f.temp_offset == f.alloc.alloc_buffer_length_in_bytes);
3442     // check if there's enough temp memory so we don't error later
3443     if (f.setup_offset+ /*(*f).sizeof+*/ f.temp_memory_required > cast(uint)f.temp_offset) return error(f, STBVorbisError.outofmem);
3444   }
3445   +/
3446 
3447   f.first_audio_page_offset = f.fileOffset();
3448 
3449   return true;
3450 }
3451 
3452 /+
3453 private int vorbis_search_for_page_pushdata (VorbisDecoder f, ubyte* data, int data_len) {
3454   import core.stdc.string : memcmp;
3455 
3456   foreach (immutable i; 0..f.page_crc_tests) f.scan.ptr[i].bytes_done = 0;
3457 
3458   // if we have room for more scans, search for them first, because
3459   // they may cause us to stop early if their header is incomplete
3460   if (f.page_crc_tests < STB_VORBIS_PUSHDATA_CRC_COUNT) {
3461     if (data_len < 4) return 0;
3462     data_len -= 3; // need to look for 4-byte sequence, so don't miss one that straddles a boundary
3463     foreach (immutable i; 0..data_len) {
3464       if (data[i] == 0x4f) {
3465         if (memcmp(data+i, ogg_page_header.ptr, 4) == 0) {
3466           // make sure we have the whole page header
3467           if (i+26 >= data_len || i+27+data[i+26] >= data_len) {
3468             // only read up to this page start, so hopefully we'll
3469             // have the whole page header start next time
3470             data_len = i;
3471             break;
3472           }
3473           // ok, we have it all; compute the length of the page
3474           auto len = 27+data[i+26];
3475           foreach (immutable j; 0..data[i+26]) len += data[i+27+j];
3476           // scan everything up to the embedded crc (which we must 0)
3477           uint crc = 0;
3478           foreach (immutable j; 0..22) crc = crc32_update(crc, data[i+j]);
3479           // now process 4 0-bytes
3480           foreach (immutable j; 22..26) crc = crc32_update(crc, 0);
3481           // len is the total number of bytes we need to scan
3482           auto n = f.page_crc_tests++;
3483           f.scan.ptr[n].bytes_left = len-/*j*/26;
3484           f.scan.ptr[n].crc_so_far = crc;
3485           f.scan.ptr[n].goal_crc = data[i+22]+(data[i+23]<<8)+(data[i+24]<<16)+(data[i+25]<<24);
3486           // if the last frame on a page is continued to the next, then
3487           // we can't recover the sample_loc immediately
3488           if (data[i+27+data[i+26]-1] == 255) {
3489             f.scan.ptr[n].sample_loc = ~0;
3490           } else {
3491             f.scan.ptr[n].sample_loc = data[i+6]+(data[i+7]<<8)+(data[i+8]<<16)+(data[i+9]<<24);
3492           }
3493           f.scan.ptr[n].bytes_done = i+26/*j*/;
3494           if (f.page_crc_tests == STB_VORBIS_PUSHDATA_CRC_COUNT) break;
3495           // keep going if we still have room for more
3496         }
3497       }
3498     }
3499   }
3500 
3501   for (uint i = 0; i < f.page_crc_tests; ) {
3502     int nn = f.scan.ptr[i].bytes_done;
3503     int m = f.scan.ptr[i].bytes_left;
3504     if (m > data_len-nn) m = data_len-nn;
3505     // m is the bytes to scan in the current chunk
3506     uint crc = f.scan.ptr[i].crc_so_far;
3507     foreach (immutable j; 0..m) crc = crc32_update(crc, data[nn+j]);
3508     f.scan.ptr[i].bytes_left -= m;
3509     f.scan.ptr[i].crc_so_far = crc;
3510     if (f.scan.ptr[i].bytes_left == 0) {
3511       // does it match?
3512       if (f.scan.ptr[i].crc_so_far == f.scan.ptr[i].goal_crc) {
3513         // Houston, we have page
3514         data_len = nn+m; // consumption amount is wherever that scan ended
3515         f.page_crc_tests = -1; // drop out of page scan mode
3516         f.previous_length = 0; // decode-but-don't-output one frame
3517         f.next_seg = -1;       // start a new page
3518         f.current_loc = f.scan.ptr[i].sample_loc; // set the current sample location to the amount we'd have decoded had we decoded this page
3519         f.current_loc_valid = f.current_loc != ~0U;
3520         return data_len;
3521       }
3522       // delete entry
3523       f.scan.ptr[i] = f.scan.ptr[--f.page_crc_tests];
3524     } else {
3525       ++i;
3526     }
3527   }
3528 
3529   return data_len;
3530 }
3531 +/
3532 
3533 private uint vorbis_find_page (VorbisDecoder f, uint* end, uint* last) {
3534   for (;;) {
3535     if (f.eof) return 0;
3536     auto n = get8(f);
3537     if (n == 0x4f) { // page header candidate
3538       uint retry_loc = f.fileOffset;
3539       // check if we're off the end of a file_section stream
3540       if (retry_loc-25 > f.stream_len) return 0;
3541       // check the rest of the header
3542       int i = void;
3543       for (i = 1; i < 4; ++i) if (get8(f) != ogg_page_header[i]) break;
3544       if (f.eof) return 0;
3545       if (i == 4) {
3546         ubyte[27] header;
3547         //for (i=0; i < 4; ++i) header[i] = ogg_page_header[i];
3548         header[0..4] = cast(immutable(ubyte)[])ogg_page_header[0..4];
3549         for (i = 4; i < 27; ++i) header[i] = get8(f);
3550         if (f.eof) return 0;
3551         if (header[4] != 0) goto invalid;
3552         uint goal = header[22]+(header[23]<<8)+(header[24]<<16)+(header[25]<<24);
3553         for (i = 22; i < 26; ++i) header[i] = 0;
3554         uint crc = 0;
3555         for (i = 0; i < 27; ++i) crc = crc32_update(crc, header[i]);
3556         uint len = 0;
3557         for (i = 0; i < header[26]; ++i) {
3558           auto s = get8(f);
3559           crc = crc32_update(crc, s);
3560           len += s;
3561         }
3562         if (len && f.eof) return 0;
3563         for (i = 0; i < len; ++i) crc = crc32_update(crc, get8(f));
3564         // finished parsing probable page
3565         if (crc == goal) {
3566           // we could now check that it's either got the last
3567           // page flag set, OR it's followed by the capture
3568           // pattern, but I guess TECHNICALLY you could have
3569           // a file with garbage between each ogg page and recover
3570           // from it automatically? So even though that paranoia
3571           // might decrease the chance of an invalid decode by
3572           // another 2^32, not worth it since it would hose those
3573           // invalid-but-useful files?
3574           if (end) *end = f.fileOffset;
3575           if (last) *last = (header[5]&0x04 ? 1 : 0);
3576           set_file_offset(f, retry_loc-1);
3577           return 1;
3578         }
3579       }
3580      invalid:
3581       // not a valid page, so rewind and look for next one
3582       set_file_offset(f, retry_loc);
3583     }
3584   }
3585   assert(0);
3586 }
3587 
3588 enum SAMPLE_unknown = 0xffffffff;
3589 
3590 // seeking is implemented with a binary search, which narrows down the range to
3591 // 64K, before using a linear search (because finding the synchronization
3592 // pattern can be expensive, and the chance we'd find the end page again is
3593 // relatively high for small ranges)
3594 //
3595 // two initial interpolation-style probes are used at the start of the search
3596 // to try to bound either side of the binary search sensibly, while still
3597 // working in O(log n) time if they fail.
3598 private int get_seek_page_info (VorbisDecoder f, ProbedPage* z) {
3599   ubyte[27] header;
3600   ubyte[255] lacing;
3601 
3602   // record where the page starts
3603   z.page_start = f.fileOffset;
3604 
3605   // parse the header
3606   getn(f, header.ptr, 27);
3607   if (header[0] != 'O' || header[1] != 'g' || header[2] != 'g' || header[3] != 'S') return 0;
3608   getn(f, lacing.ptr, header[26]);
3609 
3610   // determine the length of the payload
3611   uint len = 0;
3612   foreach (immutable i; 0..header[26]) len += lacing[i];
3613 
3614   // this implies where the page ends
3615   z.page_end = z.page_start+27+header[26]+len;
3616 
3617   // read the last-decoded sample out of the data
3618   z.last_decoded_sample = header[6]+(header[7]<<8)+(header[8]<<16)+(header[9]<<24);
3619 
3620   // restore file state to where we were
3621   set_file_offset(f, z.page_start);
3622   return 1;
3623 }
3624 
3625 // rarely used function to seek back to the preceeding page while finding the start of a packet
3626 private int go_to_page_before (VorbisDecoder f, uint limit_offset) {
3627   uint previous_safe, end;
3628 
3629   // now we want to seek back 64K from the limit
3630   if (limit_offset >= 65536 && limit_offset-65536 >= f.first_audio_page_offset) {
3631     previous_safe = limit_offset-65536;
3632   } else {
3633     previous_safe = f.first_audio_page_offset;
3634   }
3635 
3636   set_file_offset(f, previous_safe);
3637 
3638   while (vorbis_find_page(f, &end, null)) {
3639     if (end >= limit_offset && f.fileOffset < limit_offset) return 1;
3640     set_file_offset(f, end);
3641   }
3642 
3643   return 0;
3644 }
3645 
3646 // implements the search logic for finding a page and starting decoding. if
3647 // the function succeeds, current_loc_valid will be true and current_loc will
3648 // be less than or equal to the provided sample number (the closer the
3649 // better).
3650 private int seek_to_sample_coarse (VorbisDecoder f, uint sample_number) {
3651   ProbedPage left, right, mid;
3652   int i, start_seg_with_known_loc, end_pos, page_start;
3653   uint delta, stream_length, padding;
3654   double offset, bytes_per_sample;
3655   int probe = 0;
3656 
3657   // find the last page and validate the target sample
3658   stream_length = f.streamLengthInSamples;
3659   if (stream_length == 0) return error(f, STBVorbisError.seek_without_length);
3660   if (sample_number > stream_length) return error(f, STBVorbisError.seek_invalid);
3661 
3662   // this is the maximum difference between the window-center (which is the
3663   // actual granule position value), and the right-start (which the spec
3664   // indicates should be the granule position (give or take one)).
3665   padding = ((f.blocksize_1-f.blocksize_0)>>2);
3666   if (sample_number < padding) sample_number = 0; else sample_number -= padding;
3667 
3668   left = f.p_first;
3669   while (left.last_decoded_sample == ~0U) {
3670     // (untested) the first page does not have a 'last_decoded_sample'
3671     set_file_offset(f, left.page_end);
3672     if (!get_seek_page_info(f, &left)) goto error;
3673   }
3674 
3675   right = f.p_last;
3676   debug(stb_vorbis) assert(right.last_decoded_sample != ~0U);
3677 
3678   // starting from the start is handled differently
3679   if (sample_number <= left.last_decoded_sample) {
3680     f.seekStart;
3681     return 1;
3682   }
3683 
3684   while (left.page_end != right.page_start) {
3685     debug(stb_vorbis) assert(left.page_end < right.page_start);
3686     // search range in bytes
3687     delta = right.page_start-left.page_end;
3688     if (delta <= 65536) {
3689       // there's only 64K left to search - handle it linearly
3690       set_file_offset(f, left.page_end);
3691     } else {
3692       if (probe < 2) {
3693         if (probe == 0) {
3694           // first probe (interpolate)
3695           double data_bytes = right.page_end-left.page_start;
3696           bytes_per_sample = data_bytes/right.last_decoded_sample;
3697           offset = left.page_start+bytes_per_sample*(sample_number-left.last_decoded_sample);
3698         } else {
3699           // second probe (try to bound the other side)
3700           double error = (cast(double)sample_number-mid.last_decoded_sample)*bytes_per_sample;
3701           if (error >= 0 && error <  8000) error =  8000;
3702           if (error <  0 && error > -8000) error = -8000;
3703           offset += error*2;
3704         }
3705 
3706         // ensure the offset is valid
3707         if (offset < left.page_end) offset = left.page_end;
3708         if (offset > right.page_start-65536) offset = right.page_start-65536;
3709 
3710         set_file_offset(f, cast(uint)offset);
3711       } else {
3712         // binary search for large ranges (offset by 32K to ensure
3713         // we don't hit the right page)
3714         set_file_offset(f, left.page_end+(delta/2)-32768);
3715       }
3716 
3717       if (!vorbis_find_page(f, null, null)) goto error;
3718     }
3719 
3720     for (;;) {
3721       if (!get_seek_page_info(f, &mid)) goto error;
3722       if (mid.last_decoded_sample != ~0U) break;
3723       // (untested) no frames end on this page
3724       set_file_offset(f, mid.page_end);
3725       debug(stb_vorbis) assert(mid.page_start < right.page_start);
3726     }
3727 
3728     // if we've just found the last page again then we're in a tricky file,
3729     // and we're close enough.
3730     if (mid.page_start == right.page_start) break;
3731 
3732     if (sample_number < mid.last_decoded_sample) right = mid; else left = mid;
3733 
3734     ++probe;
3735   }
3736 
3737   // seek back to start of the last packet
3738   page_start = left.page_start;
3739   set_file_offset(f, page_start);
3740   if (!start_page(f)) return error(f, STBVorbisError.seek_failed);
3741   end_pos = f.end_seg_with_known_loc;
3742   debug(stb_vorbis) assert(end_pos >= 0);
3743 
3744   for (;;) {
3745     for (i = end_pos; i > 0; --i) if (f.segments.ptr[i-1] != 255) break;
3746     start_seg_with_known_loc = i;
3747     if (start_seg_with_known_loc > 0 || !(f.page_flag&PAGEFLAG_continued_packet)) break;
3748     // (untested) the final packet begins on an earlier page
3749     if (!go_to_page_before(f, page_start)) goto error;
3750     page_start = f.fileOffset;
3751     if (!start_page(f)) goto error;
3752     end_pos = f.segment_count-1;
3753   }
3754 
3755   // prepare to start decoding
3756   f.current_loc_valid = false;
3757   f.last_seg = false;
3758   f.valid_bits = 0;
3759   f.packet_bytes = 0;
3760   f.bytes_in_seg = 0;
3761   f.previous_length = 0;
3762   f.next_seg = start_seg_with_known_loc;
3763 
3764   for (i = 0; i < start_seg_with_known_loc; ++i) skip(f, f.segments.ptr[i]);
3765 
3766   // start decoding (optimizable - this frame is generally discarded)
3767   if (!vorbis_pump_first_frame(f)) return 0;
3768   if (f.current_loc > sample_number) return error(f, STBVorbisError.seek_failed);
3769   return 1;
3770 
3771 error:
3772   // try to restore the file to a valid state
3773   f.seekStart;
3774   return error(f, STBVorbisError.seek_failed);
3775 }
3776 
3777 // the same as vorbis_decode_initial, but without advancing
3778 private int peek_decode_initial (VorbisDecoder f, int* p_left_start, int* p_left_end, int* p_right_start, int* p_right_end, int* mode) {
3779   if (!vorbis_decode_initial(f, p_left_start, p_left_end, p_right_start, p_right_end, mode)) return 0;
3780 
3781   // either 1 or 2 bytes were read, figure out which so we can rewind
3782   int bits_read = 1+ilog(f.mode_count-1);
3783   if (f.mode_config.ptr[*mode].blockflag) bits_read += 2;
3784   int bytes_read = (bits_read+7)/8;
3785 
3786   f.bytes_in_seg += bytes_read;
3787   f.packet_bytes -= bytes_read;
3788   skip(f, -bytes_read);
3789   if (f.next_seg == -1) f.next_seg = f.segment_count-1; else --f.next_seg;
3790   f.valid_bits = 0;
3791 
3792   return 1;
3793 }
3794 
3795 // ////////////////////////////////////////////////////////////////////////// //
3796 // utility and supporting functions for getting s16 samples
3797 enum PLAYBACK_MONO  = (1<<0);
3798 enum PLAYBACK_LEFT  = (1<<1);
3799 enum PLAYBACK_RIGHT = (1<<2);
3800 
3801 enum L = (PLAYBACK_LEFT |PLAYBACK_MONO);
3802 enum C = (PLAYBACK_LEFT |PLAYBACK_RIGHT|PLAYBACK_MONO);
3803 enum R = (PLAYBACK_RIGHT|PLAYBACK_MONO);
3804 
3805 immutable byte[6][7] channel_position = [
3806   [ 0 ],
3807   [ C ],
3808   [ L, R ],
3809   [ L, C, R ],
3810   [ L, R, L, R ],
3811   [ L, C, R, L, R ],
3812   [ L, C, R, L, R, C ],
3813 ];
3814 
3815 
3816 version(STB_VORBIS_NO_FAST_SCALED_FLOAT) {
3817   enum declfcvar(string name) = "{}";
3818   template FAST_SCALED_FLOAT_TO_INT(string x, string s) {
3819     static assert(s == "15");
3820     enum FAST_SCALED_FLOAT_TO_INT = q{import core.stdc.math : lrintf; int v = lrintf((${x})*32768.0f);}.cmacroFixVars!"x"(x);
3821   }
3822 } else {
3823   //k8: actually, this is only marginally faster than using `lrintf()`, but anyway...
3824   align(1) union float_conv {
3825   align(1):
3826     float f;
3827     int i;
3828   }
3829   enum declfcvar(string name) = "float_conv "~name~" = void;";
3830   static assert(float_conv.i.sizeof == 4 && float_conv.f.sizeof == 4);
3831   // add (1<<23) to convert to int, then divide by 2^SHIFT, then add 0.5/2^SHIFT to round
3832   //#define check_endianness()
3833   enum MAGIC(string SHIFT) = q{(1.5f*(1<<(23-${SHIFT}))+0.5f/(1<<${SHIFT}))}.cmacroFixVars!("SHIFT")(SHIFT);
3834   enum ADDEND(string SHIFT) = q{(((150-${SHIFT})<<23)+(1<<22))}.cmacroFixVars!("SHIFT")(SHIFT);
3835   enum FAST_SCALED_FLOAT_TO_INT(string x, string s) = q{temp.f = (${x})+${MAGIC}; int v = temp.i-${ADDEND};}
3836     .cmacroFixVars!("x", "s", "MAGIC", "ADDEND")(x, s, MAGIC!(s), ADDEND!(s));
3837 }
3838 
3839 private void copy_samples (short* dest, float* src, int len) {
3840   //check_endianness();
3841   mixin(declfcvar!"temp");
3842   foreach (immutable _; 0..len) {
3843     mixin(FAST_SCALED_FLOAT_TO_INT!("*src", "15"));
3844     if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3845     *dest++ = cast(short)v; //k8
3846     ++src;
3847   }
3848 }
3849 
3850 private void compute_samples (int mask, short* output, int num_c, float** data, int d_offset, int len) {
3851   import core.stdc..string : memset;
3852   enum BUFFER_SIZE = 32;
3853   float[BUFFER_SIZE] buffer;
3854   int n = BUFFER_SIZE;
3855   //check_endianness();
3856   mixin(declfcvar!"temp");
3857   for (uint o = 0; o < len; o += BUFFER_SIZE) {
3858     memset(buffer.ptr, 0, (buffer).sizeof);
3859     if (o+n > len) n = len-o;
3860     foreach (immutable j; 0..num_c) {
3861       if (channel_position[num_c].ptr[j]&mask) foreach (immutable i; 0..n) buffer.ptr[i] += data[j][d_offset+o+i];
3862     }
3863     foreach (immutable i; 0..n) {
3864       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3865       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3866       output[o+i] = cast(short)v; //k8
3867     }
3868   }
3869 }
3870 
3871 private void compute_stereo_samples (short* output, int num_c, float** data, int d_offset, int len) {
3872   import core.stdc..string : memset;
3873 
3874   enum BUFFER_SIZE = 32;
3875   float[BUFFER_SIZE] buffer;
3876   int n = BUFFER_SIZE>>1;
3877   // o is the offset in the source data
3878   //check_endianness();
3879   mixin(declfcvar!"temp");
3880   for (uint o = 0; o < len; o += BUFFER_SIZE>>1) {
3881     // o2 is the offset in the output data
3882     int o2 = o<<1;
3883     memset(buffer.ptr, 0, buffer.sizeof);
3884     if (o+n > len) n = len-o;
3885     foreach (immutable j; 0..num_c) {
3886       int m = channel_position[num_c].ptr[j]&(PLAYBACK_LEFT|PLAYBACK_RIGHT);
3887       if (m == (PLAYBACK_LEFT|PLAYBACK_RIGHT)) {
3888         foreach (immutable i; 0..n) {
3889           buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3890           buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3891         }
3892       } else if (m == PLAYBACK_LEFT) {
3893         foreach (immutable i; 0..n) buffer.ptr[i*2+0] += data[j][d_offset+o+i];
3894       } else if (m == PLAYBACK_RIGHT) {
3895         foreach (immutable i; 0..n) buffer.ptr[i*2+1] += data[j][d_offset+o+i];
3896       }
3897     }
3898     foreach (immutable i; 0..n<<1) {
3899       mixin(FAST_SCALED_FLOAT_TO_INT!("buffer[i]", "15"));
3900       if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3901       output[o2+i] = cast(short)v; //k8
3902     }
3903   }
3904 }
3905 
3906 private void convert_samples_short (int buf_c, short** buffer, int b_offset, int data_c, float** data, int d_offset, int samples) {
3907   import core.stdc..string : memset;
3908 
3909   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3910     immutable int[2][3] channel_selector = [ [0,0], [PLAYBACK_MONO,0], [PLAYBACK_LEFT, PLAYBACK_RIGHT] ];
3911     foreach (immutable i; 0..buf_c) compute_samples(channel_selector[buf_c].ptr[i], buffer[i]+b_offset, data_c, data, d_offset, samples);
3912   } else {
3913     int limit = (buf_c < data_c ? buf_c : data_c);
3914     foreach (immutable i; 0..limit) copy_samples(buffer[i]+b_offset, data[i]+d_offset, samples);
3915     foreach (immutable i; limit..buf_c) memset(buffer[i]+b_offset, 0, short.sizeof*samples);
3916   }
3917 }
3918 
3919 private void convert_channels_short_interleaved (int buf_c, short* buffer, int data_c, float** data, int d_offset, int len) {
3920   //check_endianness();
3921   mixin(declfcvar!"temp");
3922   if (buf_c != data_c && buf_c <= 2 && data_c <= 6) {
3923     debug(stb_vorbis) assert(buf_c == 2);
3924     foreach (immutable i; 0..buf_c) compute_stereo_samples(buffer, data_c, data, d_offset, len);
3925   } else {
3926     int limit = (buf_c < data_c ? buf_c : data_c);
3927     foreach (immutable j; 0..len) {
3928       foreach (immutable i; 0..limit) {
3929         float f = data[i][d_offset+j];
3930         mixin(FAST_SCALED_FLOAT_TO_INT!("f", "15"));//data[i][d_offset+j], 15);
3931         if (cast(uint)(v+32768) > 65535) v = (v < 0 ? -32768 : 32767);
3932         *buffer++ = cast(short)v; //k8
3933       }
3934       foreach (immutable i; limit..buf_c) *buffer++ = 0;
3935     }
3936   }
3937 }
3938 } // @nogc
3939 
3940 
3941 public class VorbisDecoder {
3942   // return # of bytes read, 0 on eof, -1 on error
3943   // if called with `buf is null`, do `close()`
3944   alias readCB = int delegate (void[] buf, uint ofs, VorbisDecoder vb) nothrow @nogc;
3945 
3946   //TODO
3947   static struct Allocator {
3948   static nothrow @nogc: // because
3949     void* alloc (uint sz, VorbisDecoder vb) {
3950       import core.stdc.stdlib : malloc;
3951       return malloc(sz);
3952     }
3953     void free (void* p, VorbisDecoder vb) {
3954       import core.stdc.stdlib : free;
3955       free(p);
3956     }
3957     void* allocTemp (uint sz, VorbisDecoder vb) {
3958       import core.stdc.stdlib : malloc;
3959       return malloc(sz);
3960     }
3961     void freeTemp (void* p, uint sz, VorbisDecoder vb) {
3962       import core.stdc.stdlib : free;
3963       free(p);
3964     }
3965     uint tempSave (VorbisDecoder vb) { return 0; }
3966     void tempRestore (uint pos, VorbisDecoder vb) {}
3967   }
3968 
3969 nothrow @nogc:
3970 private:
3971   bool isOpened;
3972   readCB stmread;
3973   uint stlastofs = uint.max;
3974   uint stst;
3975   uint stpos;
3976   uint stend;
3977   bool stclose;
3978   FILE* stfl;
3979 
3980 private:
3981   //ubyte* stream;
3982   //ubyte* stream_start;
3983   //ubyte* stream_end;
3984   //uint stream_len;
3985 
3986   /+bool push_mode;+/
3987 
3988   uint first_audio_page_offset;
3989 
3990   ProbedPage p_first, p_last;
3991 
3992   // memory management
3993   Allocator alloc;
3994   int setup_offset;
3995   int temp_offset;
3996 
3997   // run-time results
3998   bool eof = true;
3999   STBVorbisError error;
4000 
4001   // header info
4002   int[2] blocksize;
4003   int blocksize_0, blocksize_1;
4004   int codebook_count;
4005   Codebook* codebooks;
4006   int floor_count;
4007   ushort[64] floor_types; // varies
4008   Floor* floor_config;
4009   int residue_count;
4010   ushort[64] residue_types; // varies
4011   Residue* residue_config;
4012   int mapping_count;
4013   Mapping* mapping;
4014   int mode_count;
4015   Mode[64] mode_config;  // varies
4016 
4017   uint total_samples;
4018 
4019   // decode buffer
4020   float*[STB_VORBIS_MAX_CHANNELS] channel_buffers;
4021   float*[STB_VORBIS_MAX_CHANNELS] outputs;
4022 
4023   float*[STB_VORBIS_MAX_CHANNELS] previous_window;
4024   int previous_length;
4025 
4026   version(STB_VORBIS_NO_DEFER_FLOOR) {
4027     float*[STB_VORBIS_MAX_CHANNELS] floor_buffers;
4028   } else {
4029     short*[STB_VORBIS_MAX_CHANNELS] finalY;
4030   }
4031 
4032   uint current_loc; // sample location of next frame to decode
4033   int current_loc_valid;
4034 
4035   // per-blocksize precomputed data
4036 
4037   // twiddle factors
4038   float*[2] A, B, C;
4039   float*[2] window;
4040   ushort*[2] bit_reverse;
4041 
4042   // current page/packet/segment streaming info
4043   uint serial; // stream serial number for verification
4044   int last_page;
4045   int segment_count;
4046   ubyte[255] segments;
4047   ubyte page_flag;
4048   ubyte bytes_in_seg;
4049   ubyte first_decode;
4050   int next_seg;
4051   int last_seg;  // flag that we're on the last segment
4052   int last_seg_which; // what was the segment number of the last seg?
4053   uint acc;
4054   int valid_bits;
4055   int packet_bytes;
4056   int end_seg_with_known_loc;
4057   uint known_loc_for_packet;
4058   int discard_samples_deferred;
4059   uint samples_output;
4060 
4061   // push mode scanning
4062   /+
4063   int page_crc_tests; // only in push_mode: number of tests active; -1 if not searching
4064   CRCscan[STB_VORBIS_PUSHDATA_CRC_COUNT] scan;
4065   +/
4066 
4067   // sample-access
4068   int channel_buffer_start;
4069   int channel_buffer_end;
4070 
4071 private: // k8: 'cause i'm evil
4072   // user-accessible info
4073   uint sample_rate;
4074   int vrchannels;
4075 
4076   uint setup_memory_required;
4077   uint temp_memory_required;
4078   uint setup_temp_memory_required;
4079 
4080   bool read_comments;
4081   ubyte* comment_data;
4082   uint comment_size;
4083 
4084   // functions to get comment data
4085   uint comment_data_pos;
4086 
4087 private:
4088   int rawRead (void[] buf) {
4089     static if (__VERSION__ > 2067) pragma(inline, true);
4090     if (isOpened && buf.length > 0 && stpos < stend) {
4091       if (stend-stpos < buf.length) buf = buf[0..stend-stpos];
4092       auto rd = stmread(buf, stpos, this);
4093       if (rd > 0) stpos += rd;
4094       return rd;
4095     }
4096     return 0;
4097   }
4098   void rawSkip (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened && n > 0) { if ((stpos += n) > stend) stpos = stend; } }
4099   void rawSeek (int n) { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { stpos = stst+(n < 0 ? 0 : n); if (stpos > stend) stpos = stend; } }
4100   void rawClose () { static if (__VERSION__ > 2067) pragma(inline, true); if (isOpened) { isOpened = false; stmread(null, 0, this); } }
4101 
4102 final:
4103 private:
4104   void doInit () {
4105     import core.stdc..string : memset;
4106     /*
4107     if (z) {
4108       alloc = *z;
4109       alloc.alloc_buffer_length_in_bytes = (alloc.alloc_buffer_length_in_bytes+3)&~3;
4110       temp_offset = alloc.alloc_buffer_length_in_bytes;
4111     }
4112     */
4113     eof = false;
4114     error = STBVorbisError.no_error;
4115     /+stream = null;+/
4116     codebooks = null;
4117     /+page_crc_tests = -1;+/
4118   }
4119 
4120   static int stflRead (void[] buf, uint ofs, VorbisDecoder vb) {
4121     if (buf !is null) {
4122       //{ import core.stdc.stdio; printf("stflRead: ofs=%u; len=%u\n", ofs, cast(uint)buf.length); }
4123       if (vb.stlastofs != ofs) {
4124         import core.stdc.stdio : fseek, SEEK_SET;
4125         vb.stlastofs = ofs;
4126         fseek(vb.stfl, ofs, SEEK_SET);
4127       }
4128       import core.stdc.stdio : fread;
4129       return cast(int)fread(buf.ptr, 1, buf.length, vb.stfl);
4130     } else {
4131       if (vb.stclose) {
4132         import core.stdc.stdio : fclose;
4133         if (vb.stfl !is null) fclose(vb.stfl);
4134       }
4135       vb.stfl = null;
4136       return 0;
4137     }
4138   }
4139 
4140 public:
4141   this () {}
4142   ~this () { close(); }
4143 
4144   this (int asize, readCB rcb) {
4145   	assert(rcb !is null);
4146 	stend = (asize > 0 ? asize : 0);
4147 	stmread = rcb;
4148 	isOpened = true;
4149 	eof = false;
4150 	read_comments = true;
4151 	if (start_decoder(this)) {
4152 		vorbis_pump_first_frame(this);
4153 		return;
4154 	}
4155   }
4156   this (FILE* fl, bool doclose=true) { open(fl, doclose); }
4157   this (const(char)[] filename) { open(filename); }
4158 
4159   @property bool closed () { return !isOpened; }
4160 
4161   void open (FILE *fl, bool doclose=true) {
4162     import core.stdc.stdio : ftell, fseek, SEEK_SET, SEEK_END;
4163     close();
4164     if (fl is null) { error = STBVorbisError.invalid_stream; return; }
4165     stclose = doclose;
4166     stst = stpos = cast(uint)ftell(fl);
4167     fseek(fl, 0, SEEK_END);
4168     stend = cast(uint)ftell(fl);
4169     stlastofs = stlastofs.max;
4170     stclose = false;
4171     stfl = fl;
4172     import std.functional : toDelegate;
4173     stmread = toDelegate(&stflRead);
4174     isOpened = true;
4175     eof = false;
4176     read_comments = true;
4177     if (start_decoder(this)) {
4178       vorbis_pump_first_frame(this);
4179       return;
4180     }
4181     auto err = error;
4182     close();
4183     error = err;
4184   }
4185 
4186   void open (const(char)[] filename) {
4187     import core.stdc.stdio : fopen;
4188     import std.internal.cstring; // sorry
4189     close();
4190     FILE* fl = fopen(filename.tempCString, "rb");
4191     if (fl is null) { error = STBVorbisError.file_open_failure; return; }
4192     open(fl, true);
4193   }
4194 
4195   /+
4196   void openPushdata(void* data, int data_len, // the memory available for decoding
4197                     int* data_used)           // only defined on success
4198   {
4199     close();
4200     eof = false;
4201     stream = cast(ubyte*)data;
4202     stream_end = stream+data_len;
4203     push_mode = true;
4204     if (!start_decoder(this)) {
4205       auto err = error;
4206       if (eof) err = STBVorbisError.need_more_data; else close();
4207       error = err;
4208       return;
4209     }
4210     *data_used = stream-(cast(ubyte*)data);
4211     error = STBVorbisError.no_error;
4212   }
4213   +/
4214 
4215   void close () {
4216     import core.stdc..string : memset;
4217 
4218     setup_free(this, this.comment_data);
4219     if (this.residue_config) {
4220       foreach (immutable i; 0..this.residue_count) {
4221         Residue* r = this.residue_config+i;
4222         if (r.classdata) {
4223           foreach (immutable j; 0..this.codebooks[r.classbook].entries) setup_free(this, r.classdata[j]);
4224           setup_free(this, r.classdata);
4225         }
4226         setup_free(this, r.residue_books);
4227       }
4228     }
4229 
4230     if (this.codebooks) {
4231       foreach (immutable i; 0..this.codebook_count) {
4232         Codebook* c = this.codebooks+i;
4233         setup_free(this, c.codeword_lengths);
4234         setup_free(this, c.multiplicands);
4235         setup_free(this, c.codewords);
4236         setup_free(this, c.sorted_codewords);
4237         // c.sorted_values[-1] is the first entry in the array
4238         setup_free(this, c.sorted_values ? c.sorted_values-1 : null);
4239       }
4240       setup_free(this, this.codebooks);
4241     }
4242     setup_free(this, this.floor_config);
4243     setup_free(this, this.residue_config);
4244     if (this.mapping) {
4245       foreach (immutable i; 0..this.mapping_count) setup_free(this, this.mapping[i].chan);
4246       setup_free(this, this.mapping);
4247     }
4248     foreach (immutable i; 0..(this.vrchannels > STB_VORBIS_MAX_CHANNELS ? STB_VORBIS_MAX_CHANNELS : this.vrchannels)) {
4249       setup_free(this, this.channel_buffers.ptr[i]);
4250       setup_free(this, this.previous_window.ptr[i]);
4251       version(STB_VORBIS_NO_DEFER_FLOOR) setup_free(this, this.floor_buffers.ptr[i]);
4252       setup_free(this, this.finalY.ptr[i]);
4253     }
4254     foreach (immutable i; 0..2) {
4255       setup_free(this, this.A.ptr[i]);
4256       setup_free(this, this.B.ptr[i]);
4257       setup_free(this, this.C.ptr[i]);
4258       setup_free(this, this.window.ptr[i]);
4259       setup_free(this, this.bit_reverse.ptr[i]);
4260     }
4261 
4262     rawClose();
4263     isOpened = false;
4264     stmread = null;
4265     stlastofs = uint.max;
4266     stst = 0;
4267     stpos = 0;
4268     stend = 0;
4269     stclose = false;
4270     stfl = null;
4271 
4272     sample_rate = 0;
4273     vrchannels = 0;
4274 
4275     setup_memory_required = 0;
4276     temp_memory_required = 0;
4277     setup_temp_memory_required = 0;
4278 
4279     read_comments = 0;
4280     comment_data = null;
4281     comment_size = 0;
4282 
4283     comment_data_pos = 0;
4284 
4285     /+
4286     stream = null;
4287     stream_start = null;
4288     stream_end = null;
4289     +/
4290 
4291     //stream_len = 0;
4292 
4293     /+push_mode = false;+/
4294 
4295     first_audio_page_offset = 0;
4296 
4297     p_first = p_first.init;
4298     p_last = p_last.init;
4299 
4300     setup_offset = 0;
4301     temp_offset = 0;
4302 
4303     eof = true;
4304     error = STBVorbisError.no_error;
4305 
4306     blocksize[] = 0;
4307     blocksize_0 = 0;
4308     blocksize_1 = 0;
4309     codebook_count = 0;
4310     codebooks = null;
4311     floor_count = 0;
4312     floor_types[] = 0;
4313     floor_config = null;
4314     residue_count = 0;
4315     residue_types[] = 0;
4316     residue_config = null;
4317     mapping_count = 0;
4318     mapping = null;
4319     mode_count = 0;
4320     mode_config[] = Mode.init;
4321 
4322     total_samples = 0;
4323 
4324     channel_buffers[] = null;
4325     outputs[] = null;
4326 
4327     previous_window[] = null;
4328     previous_length = 0;
4329 
4330     version(STB_VORBIS_NO_DEFER_FLOOR) {
4331       floor_buffers[] = null;
4332     } else {
4333       finalY[] = null;
4334     }
4335 
4336     current_loc = 0;
4337     current_loc_valid = 0;
4338 
4339     A[] = null;
4340     B[] = null;
4341     C[] = null;
4342     window[] = null;
4343     bit_reverse = null;
4344 
4345     serial = 0;
4346     last_page = 0;
4347     segment_count = 0;
4348     segments[] = 0;
4349     page_flag = 0;
4350     bytes_in_seg = 0;
4351     first_decode = 0;
4352     next_seg = 0;
4353     last_seg = 0;
4354     last_seg_which = 0;
4355     acc = 0;
4356     valid_bits = 0;
4357     packet_bytes = 0;
4358     end_seg_with_known_loc = 0;
4359     known_loc_for_packet = 0;
4360     discard_samples_deferred = 0;
4361     samples_output = 0;
4362 
4363     /+
4364     page_crc_tests = -1;
4365     scan[] = CRCscan.init;
4366     +/
4367 
4368     channel_buffer_start = 0;
4369     channel_buffer_end = 0;
4370   }
4371 
4372   @property const pure {
4373     int getSampleOffset () { return (current_loc_valid ? current_loc : -1); }
4374 
4375     @property ubyte chans () { return (isOpened ? cast(ubyte)this.vrchannels : 0); }
4376     @property uint sampleRate () { return (isOpened ? this.sample_rate : 0); }
4377     @property uint maxFrameSize () { return (isOpened ? this.blocksize_1>>1 : 0); }
4378 
4379     @property uint getSetupMemoryRequired () { return (isOpened ? this.setup_memory_required : 0); }
4380     @property uint getSetupTempMemoryRequired () { return (isOpened ? this.setup_temp_memory_required : 0); }
4381     @property uint getTempMemoryRequired () { return (isOpened ? this.temp_memory_required : 0); }
4382   }
4383 
4384   // will clear last error
4385   @property int lastError () {
4386     int e = error;
4387     error = STBVorbisError.no_error;
4388     return e;
4389   }
4390 
4391   // PUSHDATA API
4392   /+
4393   void flushPushdata () {
4394     if (push_mode) {
4395       previous_length = 0;
4396       page_crc_tests = 0;
4397       discard_samples_deferred = 0;
4398       current_loc_valid = false;
4399       first_decode = false;
4400       samples_output = 0;
4401       channel_buffer_start = 0;
4402       channel_buffer_end = 0;
4403     }
4404   }
4405 
4406   // return value: number of bytes we used
4407   int decodeFramePushdata(
4408            void* data, int data_len, // the memory available for decoding
4409            int* channels,            // place to write number of float* buffers
4410            float*** output,          // place to write float** array of float* buffers
4411            int* samples              // place to write number of output samples
4412        )
4413   {
4414     if (!this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);
4415 
4416     if (this.page_crc_tests >= 0) {
4417       *samples = 0;
4418       return vorbis_search_for_page_pushdata(this, cast(ubyte*)data, data_len);
4419     }
4420 
4421     this.stream = cast(ubyte*)data;
4422     this.stream_end = this.stream+data_len;
4423     this.error = STBVorbisError.no_error;
4424 
4425     // check that we have the entire packet in memory
4426     if (!is_whole_packet_present(this, false)) {
4427       *samples = 0;
4428       return 0;
4429     }
4430 
4431     int len, left, right;
4432 
4433     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4434       // save the actual error we encountered
4435       STBVorbisError error = this.error;
4436       if (error == STBVorbisError.bad_packet_type) {
4437         // flush and resynch
4438         this.error = STBVorbisError.no_error;
4439         while (get8_packet(this) != EOP) if (this.eof) break;
4440         *samples = 0;
4441         return this.stream-data;
4442       }
4443       if (error == STBVorbisError.continued_packet_flag_invalid) {
4444         if (this.previous_length == 0) {
4445           // we may be resynching, in which case it's ok to hit one
4446           // of these; just discard the packet
4447           this.error = STBVorbisError.no_error;
4448           while (get8_packet(this) != EOP) if (this.eof) break;
4449           *samples = 0;
4450           return this.stream-data;
4451         }
4452       }
4453       // if we get an error while parsing, what to do?
4454       // well, it DEFINITELY won't work to continue from where we are!
4455       flushPushdata();
4456       // restore the error that actually made us bail
4457       this.error = error;
4458       *samples = 0;
4459       return 1;
4460     }
4461 
4462     // success!
4463     len = vorbis_finish_frame(this, len, left, right);
4464     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4465 
4466     if (channels) *channels = this.vrchannels;
4467     *samples = len;
4468     *output = this.outputs.ptr;
4469     return this.stream-data;
4470   }
4471   +/
4472 
4473   public uint fileOffset () {
4474     if (/+push_mode ||+/ !isOpened) return 0;
4475     /+if (stream !is null) return cast(uint)(stream-stream_start);+/
4476     return (stpos > stst ? stpos-stst : 0);
4477   }
4478 
4479   public uint stream_len () { return stend-stst; }
4480 
4481   // DATA-PULLING API
4482   public int seekFrame (uint sample_number) {
4483     uint max_frame_samples;
4484 
4485     /+if (this.push_mode) return -.error(this, STBVorbisError.invalid_api_mixing);+/
4486 
4487     // fast page-level search
4488     if (!seek_to_sample_coarse(this, sample_number)) return 0;
4489 
4490     assert(this.current_loc_valid);
4491     assert(this.current_loc <= sample_number);
4492 
4493     // linear search for the relevant packet
4494     max_frame_samples = (this.blocksize_1*3-this.blocksize_0)>>2;
4495     while (this.current_loc < sample_number) {
4496       int left_start, left_end, right_start, right_end, mode, frame_samples;
4497       if (!peek_decode_initial(this, &left_start, &left_end, &right_start, &right_end, &mode)) return .error(this, STBVorbisError.seek_failed);
4498       // calculate the number of samples returned by the next frame
4499       frame_samples = right_start-left_start;
4500       if (this.current_loc+frame_samples > sample_number) {
4501         return 1; // the next frame will contain the sample
4502       } else if (this.current_loc+frame_samples+max_frame_samples > sample_number) {
4503         // there's a chance the frame after this could contain the sample
4504         vorbis_pump_first_frame(this);
4505       } else {
4506         // this frame is too early to be relevant
4507         this.current_loc += frame_samples;
4508         this.previous_length = 0;
4509         maybe_start_packet(this);
4510         flush_packet(this);
4511       }
4512     }
4513     // the next frame will start with the sample
4514     assert(this.current_loc == sample_number);
4515     return 1;
4516   }
4517 
4518   public int seek (uint sample_number) {
4519     if (!seekFrame(sample_number)) return 0;
4520     if (sample_number != this.current_loc) {
4521       int n;
4522       uint frame_start = this.current_loc;
4523       getFrameFloat(&n, null);
4524       assert(sample_number > frame_start);
4525       assert(this.channel_buffer_start+cast(int)(sample_number-frame_start) <= this.channel_buffer_end);
4526       this.channel_buffer_start += (sample_number-frame_start);
4527     }
4528     return 1;
4529   }
4530 
4531   public bool seekStart () {
4532     /+if (push_mode) { .error(this, STBVorbisError.invalid_api_mixing); return; }+/
4533     set_file_offset(this, first_audio_page_offset);
4534     previous_length = 0;
4535     first_decode = true;
4536     next_seg = -1;
4537     return vorbis_pump_first_frame(this);
4538   }
4539 
4540   public uint streamLengthInSamples () {
4541     uint restore_offset, previous_safe;
4542     uint end, last_page_loc;
4543 
4544     /+if (this.push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4545     if (!this.total_samples) {
4546       uint last;
4547       uint lo, hi;
4548       char[6] header;
4549 
4550       // first, store the current decode position so we can restore it
4551       restore_offset = fileOffset;
4552 
4553       // now we want to seek back 64K from the end (the last page must
4554       // be at most a little less than 64K, but let's allow a little slop)
4555       if (this.stream_len >= 65536 && this.stream_len-65536 >= this.first_audio_page_offset) {
4556         previous_safe = this.stream_len-65536;
4557       } else {
4558         previous_safe = this.first_audio_page_offset;
4559       }
4560 
4561       set_file_offset(this, previous_safe);
4562       // previous_safe is now our candidate 'earliest known place that seeking
4563       // to will lead to the final page'
4564 
4565       if (!vorbis_find_page(this, &end, &last)) {
4566         // if we can't find a page, we're hosed!
4567         this.error = STBVorbisError.cant_find_last_page;
4568         this.total_samples = 0xffffffff;
4569         goto done;
4570       }
4571 
4572       // check if there are more pages
4573       last_page_loc = fileOffset;
4574 
4575       // stop when the last_page flag is set, not when we reach eof;
4576       // this allows us to stop short of a 'file_section' end without
4577       // explicitly checking the length of the section
4578       while (!last) {
4579         set_file_offset(this, end);
4580         if (!vorbis_find_page(this, &end, &last)) {
4581           // the last page we found didn't have the 'last page' flag set. whoops!
4582           break;
4583         }
4584         previous_safe = last_page_loc+1;
4585         last_page_loc = fileOffset;
4586       }
4587 
4588       set_file_offset(this, last_page_loc);
4589 
4590       // parse the header
4591       getn(this, cast(ubyte*)header, 6);
4592       // extract the absolute granule position
4593       lo = get32(this);
4594       hi = get32(this);
4595       if (lo == 0xffffffff && hi == 0xffffffff) {
4596         this.error = STBVorbisError.cant_find_last_page;
4597         this.total_samples = SAMPLE_unknown;
4598         goto done;
4599       }
4600       if (hi) lo = 0xfffffffe; // saturate
4601       this.total_samples = lo;
4602 
4603       this.p_last.page_start = last_page_loc;
4604       this.p_last.page_end = end;
4605       this.p_last.last_decoded_sample = lo;
4606 
4607      done:
4608       set_file_offset(this, restore_offset);
4609     }
4610     return (this.total_samples == SAMPLE_unknown ? 0 : this.total_samples);
4611   }
4612 
4613   public float streamLengthInSeconds () {
4614     return (isOpened ? streamLengthInSamples()/cast(float)sample_rate : 0.0f);
4615   }
4616 
4617   public int getFrameFloat (int* channels, float*** output) {
4618     int len, right, left;
4619     /+if (push_mode) return .error(this, STBVorbisError.invalid_api_mixing);+/
4620 
4621     if (!vorbis_decode_packet(this, &len, &left, &right)) {
4622       channel_buffer_start = channel_buffer_end = 0;
4623       return 0;
4624     }
4625 
4626     len = vorbis_finish_frame(this, len, left, right);
4627     foreach (immutable i; 0..this.vrchannels) this.outputs.ptr[i] = this.channel_buffers.ptr[i]+left;
4628 
4629     channel_buffer_start = left;
4630     channel_buffer_end = left+len;
4631 
4632     if (channels) *channels = this.vrchannels;
4633     if (output) *output = this.outputs.ptr;
4634     return len;
4635   }
4636 
4637   /+
4638   public VorbisDecoder stb_vorbis_open_memory (const(void)* data, int len, int* error=null, stb_vorbis_alloc* alloc=null) {
4639     VorbisDecoder this;
4640     stb_vorbis_ctx p = void;
4641     if (data is null) return null;
4642     vorbis_init(&p, alloc);
4643     p.stream = cast(ubyte*)data;
4644     p.stream_end = cast(ubyte*)data+len;
4645     p.stream_start = cast(ubyte*)p.stream;
4646     p.stream_len = len;
4647     p.push_mode = false;
4648     if (start_decoder(&p)) {
4649       this = vorbis_alloc(&p);
4650       if (this) {
4651         *this = p;
4652         vorbis_pump_first_frame(this);
4653         return this;
4654       }
4655     }
4656     if (error) *error = p.error;
4657     vorbis_deinit(&p);
4658     return null;
4659   }
4660   +/
4661 
4662   // s16 samples API
4663   int getFrameShort (int num_c, short** buffer, int num_samples) {
4664     float** output;
4665     int len = getFrameFloat(null, &output);
4666     if (len > num_samples) len = num_samples;
4667     if (len) convert_samples_short(num_c, buffer, 0, vrchannels, output, 0, len);
4668     return len;
4669   }
4670 
4671   int getFrameShortInterleaved (int num_c, short* buffer, int num_shorts) {
4672     float** output;
4673     int len;
4674     if (num_c == 1) return getFrameShort(num_c, &buffer, num_shorts);
4675     len = getFrameFloat(null, &output);
4676     if (len) {
4677       if (len*num_c > num_shorts) len = num_shorts/num_c;
4678       convert_channels_short_interleaved(num_c, buffer, vrchannels, output, 0, len);
4679     }
4680     return len;
4681   }
4682 
4683   int getSamplesShortInterleaved (int channels, short* buffer, int num_shorts) {
4684     float** outputs;
4685     int len = num_shorts/channels;
4686     int n = 0;
4687     int z = this.vrchannels;
4688     if (z > channels) z = channels;
4689     while (n < len) {
4690       int k = channel_buffer_end-channel_buffer_start;
4691       if (n+k >= len) k = len-n;
4692       if (k) convert_channels_short_interleaved(channels, buffer, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4693       buffer += k*channels;
4694       n += k;
4695       channel_buffer_start += k;
4696       if (n == len) break;
4697       if (!getFrameFloat(null, &outputs)) break;
4698     }
4699     return n;
4700   }
4701 
4702   int getSamplesShort (int channels, short** buffer, int len) {
4703     float** outputs;
4704     int n = 0;
4705     int z = this.vrchannels;
4706     if (z > channels) z = channels;
4707     while (n < len) {
4708       int k = channel_buffer_end-channel_buffer_start;
4709       if (n+k >= len) k = len-n;
4710       if (k) convert_samples_short(channels, buffer, n, vrchannels, channel_buffers.ptr, channel_buffer_start, k);
4711       n += k;
4712       channel_buffer_start += k;
4713       if (n == len) break;
4714       if (!getFrameFloat(null, &outputs)) break;
4715     }
4716     return n;
4717   }
4718 
4719   /+
4720   public int stb_vorbis_decode_filename (string filename, int* channels, int* sample_rate, short** output) {
4721     import core.stdc.stdlib : malloc, realloc;
4722 
4723     int data_len, offset, total, limit, error;
4724     short* data;
4725     VorbisDecoder v = stb_vorbis_open_filename(filename, &error, null);
4726     if (v is null) return -1;
4727     limit = v.vrchannels*4096;
4728     *channels = v.vrchannels;
4729     if (sample_rate) *sample_rate = v.sample_rate;
4730     offset = data_len = 0;
4731     total = limit;
4732     data = cast(short*)malloc(total*(*data).sizeof);
4733     if (data is null) {
4734       stb_vorbis_close(v);
4735       return -2;
4736     }
4737     for (;;) {
4738       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4739       if (n == 0) break;
4740       data_len += n;
4741       offset += n*v.vrchannels;
4742       if (offset+limit > total) {
4743         short *data2;
4744         total *= 2;
4745         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4746         if (data2 is null) {
4747           import core.stdc.stdlib : free;
4748           free(data);
4749           stb_vorbis_close(v);
4750           return -2;
4751         }
4752         data = data2;
4753       }
4754     }
4755     *output = data;
4756     stb_vorbis_close(v);
4757     return data_len;
4758   }
4759 
4760   public int stb_vorbis_decode_memory (const(void)* mem, int len, int* channels, int* sample_rate, short** output) {
4761     import core.stdc.stdlib : malloc, realloc;
4762 
4763     int data_len, offset, total, limit, error;
4764     short* data;
4765     VorbisDecoder v = stb_vorbis_open_memory(mem, len, &error, null);
4766     if (v is null) return -1;
4767     limit = v.vrchannels*4096;
4768     *channels = v.vrchannels;
4769     if (sample_rate) *sample_rate = v.sample_rate;
4770     offset = data_len = 0;
4771     total = limit;
4772     data = cast(short*)malloc(total*(*data).sizeof);
4773     if (data is null) {
4774       stb_vorbis_close(v);
4775       return -2;
4776     }
4777     for (;;) {
4778       int n = stb_vorbis_get_frame_short_interleaved(v, v.vrchannels, data+offset, total-offset);
4779       if (n == 0) break;
4780       data_len += n;
4781       offset += n*v.vrchannels;
4782       if (offset+limit > total) {
4783         short *data2;
4784         total *= 2;
4785         data2 = cast(short*)realloc(data, total*(*data).sizeof);
4786         if (data2 is null) {
4787           import core.stdc.stdlib : free;
4788           free(data);
4789           stb_vorbis_close(v);
4790           return -2;
4791         }
4792         data = data2;
4793       }
4794     }
4795     *output = data;
4796     stb_vorbis_close(v);
4797     return data_len;
4798   }
4799 
4800   public int stb_vorbis_get_samples_float_interleaved (VorbisDecoder this, int channels, float* buffer, int num_floats) {
4801     float** outputs;
4802     int len = num_floats/channels;
4803     int n = 0;
4804     int z = this.vrchannels;
4805     if (z > channels) z = channels;
4806     while (n < len) {
4807       int k = this.channel_buffer_end-this.channel_buffer_start;
4808       if (n+k >= len) k = len-n;
4809       foreach (immutable j; 0..k) {
4810         foreach (immutable i; 0..z) *buffer++ = (this.channel_buffers.ptr[i])[this.channel_buffer_start+j];
4811         foreach (immutable i; z..channels) *buffer++ = 0;
4812       }
4813       n += k;
4814       this.channel_buffer_start += k;
4815       if (n == len) break;
4816       if (!stb_vorbis_get_frame_float(this, null, &outputs)) break;
4817     }
4818     return n;
4819   }
4820   +/
4821 
4822   public int getSamplesFloat (int achans, float** buffer, int num_samples) {
4823     import core.stdc..string : memcpy, memset;
4824     float** outputs;
4825     int n = 0;
4826     int z = vrchannels;
4827     if (z > achans) z = achans;
4828     while (n < num_samples) {
4829       int k = channel_buffer_end-channel_buffer_start;
4830       if (n+k >= num_samples) k = num_samples-n;
4831       if (k) {
4832         foreach (immutable i; 0..z) memcpy(buffer[i]+n, channel_buffers.ptr[i]+channel_buffer_start, float.sizeof*k);
4833         foreach (immutable i; z..achans) memset(buffer[i]+n, 0, float.sizeof*k);
4834       }
4835       n += k;
4836       channel_buffer_start += k;
4837       if (n == num_samples) break;
4838       if (!getFrameFloat(null, &outputs)) break;
4839     }
4840     return n;
4841   }
4842 
4843 private: // k8: 'cause i'm evil
4844   private enum cmt_len_size = 2;
4845   nothrow /*@trusted*/ @nogc {
4846     public @property bool comment_empty () const pure { return (comment_get_line_len == 0); }
4847 
4848     // 0: error
4849     // includes length itself
4850     private uint comment_get_line_len () const pure {
4851       if (comment_data_pos >= comment_size) return 0;
4852       if (comment_size-comment_data_pos < cmt_len_size) return 0;
4853       uint len = comment_data[comment_data_pos];
4854       len += cast(uint)comment_data[comment_data_pos+1]<<8;
4855       return (len >= cmt_len_size && comment_data_pos+len <= comment_size ? len : 0);
4856     }
4857 
4858     public bool comment_rewind () {
4859       comment_data_pos = 0;
4860       for (;;) {
4861         auto len = comment_get_line_len();
4862         if (!len) { comment_data_pos = comment_size; return false; }
4863         if (len != cmt_len_size) return true;
4864         comment_data_pos += len;
4865       }
4866     }
4867 
4868     // true: has something to read after skip
4869     public bool comment_skip () {
4870       comment_data_pos += comment_get_line_len();
4871       for (;;) {
4872         auto len = comment_get_line_len();
4873         if (!len) { comment_data_pos = comment_size; return false; }
4874         if (len != cmt_len_size) break;
4875         comment_data_pos += len;
4876       }
4877       return true;
4878     }
4879 
4880     public const(char)[] comment_line () {
4881       auto len = comment_get_line_len();
4882       if (len < cmt_len_size) return null;
4883       if (len == cmt_len_size) return "";
4884       return (cast(char*)comment_data+comment_data_pos+cmt_len_size)[0..len-cmt_len_size];
4885     }
4886 
4887     public const(char)[] comment_name () {
4888       auto line = comment_line();
4889       if (line.length == 0) return line;
4890       uint epos = 0;
4891       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4892       return (epos < line.length ? line[0..epos] : "");
4893     }
4894 
4895     public const(char)[] comment_value () {
4896       auto line = comment_line();
4897       if (line.length == 0) return line;
4898       uint epos = 0;
4899       while (epos < line.length && line.ptr[epos] != '=') ++epos;
4900       return (epos < line.length ? line[epos+1..$] : line);
4901     }
4902   }
4903 }
4904 
4905 
4906 // ////////////////////////////////////////////////////////////////////////// //
4907 private:
4908 // cool helper to translate C defines
4909 template cmacroFixVars(T...) {
4910   /**
4911    * 64-bit implementation of fasthash
4912    *
4913    * Params:
4914    *   buf =  data buffer
4915    *   seed = the seed
4916    *
4917    * Returns:
4918    *   32-bit or 64-bit hash
4919    */
4920   size_t hashOf (const(void)* buf, size_t len, size_t seed=0) pure nothrow @trusted @nogc {
4921     enum Get8Bytes = q{
4922       cast(ulong)data[0]|
4923       (cast(ulong)data[1]<<8)|
4924       (cast(ulong)data[2]<<16)|
4925       (cast(ulong)data[3]<<24)|
4926       (cast(ulong)data[4]<<32)|
4927       (cast(ulong)data[5]<<40)|
4928       (cast(ulong)data[6]<<48)|
4929       (cast(ulong)data[7]<<56)
4930     };
4931     enum m = 0x880355f21e6d1965UL;
4932     auto data = cast(const(ubyte)*)buf;
4933     ulong h = seed;
4934     ulong t;
4935     foreach (immutable _; 0..len/8) {
4936       version(HasUnalignedOps) {
4937         if (__ctfe) {
4938           t = mixin(Get8Bytes);
4939         } else {
4940           t = *cast(ulong*)data;
4941         }
4942       } else {
4943         t = mixin(Get8Bytes);
4944       }
4945       data += 8;
4946       t ^= t>>23;
4947       t *= 0x2127599bf4325c37UL;
4948       t ^= t>>47;
4949       h ^= t;
4950       h *= m;
4951     }
4952 
4953     h ^= len*m;
4954     t = 0;
4955     switch (len&7) {
4956       case 7: t ^= cast(ulong)data[6]<<48; goto case 6;
4957       case 6: t ^= cast(ulong)data[5]<<40; goto case 5;
4958       case 5: t ^= cast(ulong)data[4]<<32; goto case 4;
4959       case 4: t ^= cast(ulong)data[3]<<24; goto case 3;
4960       case 3: t ^= cast(ulong)data[2]<<16; goto case 2;
4961       case 2: t ^= cast(ulong)data[1]<<8; goto case 1;
4962       case 1: t ^= cast(ulong)data[0]; goto default;
4963       default:
4964         t ^= t>>23;
4965         t *= 0x2127599bf4325c37UL;
4966         t ^= t>>47;
4967         h ^= t;
4968         h *= m;
4969         break;
4970     }
4971 
4972     h ^= h>>23;
4973     h *= 0x2127599bf4325c37UL;
4974     h ^= h>>47;
4975     static if (size_t.sizeof == 4) {
4976       // 32-bit hash
4977       // the following trick converts the 64-bit hashcode to Fermat
4978       // residue, which shall retain information from both the higher
4979       // and lower parts of hashcode.
4980       return cast(size_t)(h-(h>>32));
4981     } else {
4982       return h;
4983     }
4984   }
4985 
4986   string cmacroFixVars (string s, string[] names...) {
4987     assert(T.length == names.length, "cmacroFixVars: names and arguments count mismatch");
4988     enum tmpPfxName = "__temp_prefix__";
4989     string res;
4990     string tmppfx;
4991     uint pos = 0;
4992     // skip empty lines (for pretty printing)
4993     // trim trailing spaces
4994     while (s.length > 0 && s[$-1] <= ' ') s = s[0..$-1];
4995     uint linestpos = 0; // start of the current line
4996     while (pos < s.length) {
4997       if (s[pos] > ' ') break;
4998       if (s[pos] == '\n') linestpos = pos+1;
4999       ++pos;
5000     }
5001     pos = linestpos;
5002     while (pos+2 < s.length) {
5003       int epos = pos;
5004       while (epos+2 < s.length && (s[epos] != '$' || s[epos+1] != '{')) ++epos;
5005       if (epos > pos) {
5006         if (s.length-epos < 3) break;
5007         res ~= s[pos..epos];
5008         pos = epos;
5009       }
5010       assert(s[pos] == '$' && s[pos+1] == '{');
5011       pos += 2;
5012       bool found = false;
5013       if (s.length-pos >= tmpPfxName.length+1 && s[pos+tmpPfxName.length] == '}' && s[pos..pos+tmpPfxName.length] == tmpPfxName) {
5014         if (tmppfx.length == 0) {
5015           // generate temporary prefix
5016           auto hash = hashOf(s.ptr, s.length);
5017           immutable char[16] hexChars = "0123456789abcdef";
5018           tmppfx = "_temp_macro_var_";
5019           foreach_reverse (immutable idx; 0..size_t.sizeof*2) {
5020             tmppfx ~= hexChars[hash&0x0f];
5021             hash >>= 4;
5022           }
5023           tmppfx ~= "_";
5024         }
5025         pos += tmpPfxName.length+1;
5026         res ~= tmppfx;
5027         found = true;
5028       } else {
5029         foreach (immutable nidx, string oname; T) {
5030           static assert(oname.length > 0);
5031           if (s.length-pos >= oname.length+1 && s[pos+oname.length] == '}' && s[pos..pos+oname.length] == oname) {
5032             found = true;
5033             pos += oname.length+1;
5034             res ~= names[nidx];
5035             break;
5036           }
5037         }
5038       }
5039       assert(found, "unknown variable in macro");
5040     }
5041     if (pos < s.length) res ~= s[pos..$];
5042     return res;
5043   }
5044 }
5045 
5046 // ////////////////////////////////////////////////////////////////////////// //
5047 /* Version history
5048     1.09    - 2016/04/04 - back out 'avoid discarding last frame' fix from previous version
5049     1.08    - 2016/04/02 - fixed multiple warnings; fix setup memory leaks;
5050                            avoid discarding last frame of audio data
5051     1.07    - 2015/01/16 - fixed some warnings, fix mingw, const-correct API
5052                            some more crash fixes when out of memory or with corrupt files
5053     1.06    - 2015/08/31 - full, correct support for seeking API (Dougall Johnson)
5054                            some crash fixes when out of memory or with corrupt files
5055     1.05    - 2015/04/19 - don't define __forceinline if it's redundant
5056     1.04    - 2014/08/27 - fix missing const-correct case in API
5057     1.03    - 2014/08/07 - Warning fixes
5058     1.02    - 2014/07/09 - Declare qsort compare function _cdecl on windows
5059     1.01    - 2014/06/18 - fix stb_vorbis_get_samples_float
5060     1.0     - 2014/05/26 - fix memory leaks; fix warnings; fix bugs in multichannel
5061                            (API change) report sample rate for decode-full-file funcs
5062     0.99996 - bracket #include <malloc.h> for macintosh compilation by Laurent Gomila
5063     0.99995 - use union instead of pointer-cast for fast-float-to-int to avoid alias-optimization problem
5064     0.99994 - change fast-float-to-int to work in single-precision FPU mode, remove endian-dependence
5065     0.99993 - remove assert that fired on legal files with empty tables
5066     0.99992 - rewind-to-start
5067     0.99991 - bugfix to stb_vorbis_get_samples_short by Bernhard Wodo
5068     0.9999 - (should have been 0.99990) fix no-CRT support, compiling as C++
5069     0.9998 - add a full-decode function with a memory source
5070     0.9997 - fix a bug in the read-from-FILE case in 0.9996 addition
5071     0.9996 - query length of vorbis stream in samples/seconds
5072     0.9995 - bugfix to another optimization that only happened in certain files
5073     0.9994 - bugfix to one of the optimizations that caused significant (but inaudible?) errors
5074     0.9993 - performance improvements; runs in 99% to 104% of time of reference implementation
5075     0.9992 - performance improvement of IMDCT; now performs close to reference implementation
5076     0.9991 - performance improvement of IMDCT
5077     0.999 - (should have been 0.9990) performance improvement of IMDCT
5078     0.998 - no-CRT support from Casey Muratori
5079     0.997 - bugfixes for bugs found by Terje Mathisen
5080     0.996 - bugfix: fast-huffman decode initialized incorrectly for sparse codebooks; fixing gives 10% speedup - found by Terje Mathisen
5081     0.995 - bugfix: fix to 'effective' overrun detection - found by Terje Mathisen
5082     0.994 - bugfix: garbage decode on final VQ symbol of a non-multiple - found by Terje Mathisen
5083     0.993 - bugfix: pushdata API required 1 extra byte for empty page (failed to consume final page if empty) - found by Terje Mathisen
5084     0.992 - fixes for MinGW warning
5085     0.991 - turn fast-float-conversion on by default
5086     0.990 - fix push-mode seek recovery if you seek into the headers
5087     0.98b - fix to bad release of 0.98
5088     0.98 - fix push-mode seek recovery; robustify float-to-int and support non-fast mode
5089     0.97 - builds under c++ (typecasting, don't use 'class' keyword)
5090     0.96 - somehow MY 0.95 was right, but the web one was wrong, so here's my 0.95 rereleased as 0.96, fixes a typo in the clamping code
5091     0.95 - clamping code for 16-bit functions
5092     0.94 - not publically released
5093     0.93 - fixed all-zero-floor case (was decoding garbage)
5094     0.92 - fixed a memory leak
5095     0.91 - conditional compiles to omit parts of the API and the infrastructure to support them: STB_VORBIS_NO_PULLDATA_API, STB_VORBIS_NO_PUSHDATA_API, STB_VORBIS_NO_STDIO, STB_VORBIS_NO_INTEGER_CONVERSION
5096     0.90 - first public release
5097 */
5098 
5099 /*
5100 ------------------------------------------------------------------------------
5101 This software is available under 2 licenses -- choose whichever you prefer.
5102 ------------------------------------------------------------------------------
5103 ALTERNATIVE A - MIT License
5104 Copyright (c) 2017 Sean Barrett
5105 Permission is hereby granted, free of charge, to any person obtaining a copy of
5106 this software and associated documentation files (the "Software"), to deal in
5107 the Software without restriction, including without limitation the rights to
5108 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
5109 of the Software, and to permit persons to whom the Software is furnished to do
5110 so, subject to the following conditions:
5111 The above copyright notice and this permission notice shall be included in all
5112 copies or substantial portions of the Software.
5113 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5114 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5115 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5116 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
5117 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
5118 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
5119 SOFTWARE.
5120 ------------------------------------------------------------------------------
5121 ALTERNATIVE B - Public Domain (www.unlicense.org)
5122 This is free and unencumbered software released into the public domain.
5123 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
5124 software, either in source code form or as a compiled binary, for any purpose,
5125 commercial or non-commercial, and by any means.
5126 In jurisdictions that recognize copyright laws, the author or authors of this
5127 software dedicate any and all copyright interest in the software to the public
5128 domain. We make this dedication for the benefit of the public at large and to
5129 the detriment of our heirs and successors. We intend this dedication to be an
5130 overt act of relinquishment in perpetuity of all present and future rights to
5131 this software under copyright law.
5132 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
5133 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
5134 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
5135 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
5136 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
5137 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5138 ------------------------------------------------------------------------------
5139 */
Suggestion Box / Bug Report