arsd.jpeg source code

1 // jpgd.h - C++ class for JPEG decompression.
2 // Rich Geldreich <richgel99@gmail.com>
3 // Alex Evans: Linear memory allocator (taken from jpge.h).
4 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
5 // D translation by Ketmar // Invisible Vector
6 //
7 // This is free and unencumbered software released into the public domain.
8 //
9 // Anyone is free to copy, modify, publish, use, compile, sell, or
10 // distribute this software, either in source code form or as a compiled
11 // binary, for any purpose, commercial or non-commercial, and by any
12 // means.
13 //
14 // In jurisdictions that recognize copyright laws, the author or authors
15 // of this software dedicate any and all copyright interest in the
16 // software to the public domain. We make this dedication for the benefit
17 // of the public at large and to the detriment of our heirs and
18 // successors. We intend this dedication to be an overt act of
19 // relinquishment in perpetuity of all present and future rights to this
20 // software under copyright law.
21 //
22 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
26 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28 // OTHER DEALINGS IN THE SOFTWARE.
29 //
30 // For more information, please refer to <http://unlicense.org/>
31 //
32 // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
33 //
34 // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
35 // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
36 // http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
37 /**
38  * Loads a JPEG image from a memory buffer or a file.
39  *
40  * req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA).
41  * On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB).
42  * Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp.
43  */
44 module arsd.jpeg;
45 
46 // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
47 // This is slower, but results in higher quality on images with highly saturated colors.
48 version = JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING;
49 
50 /// Input stream interface.
51 /// This delegate is called when the internal input buffer is empty.
52 /// Parameters:
53 ///   pBuf - input buffer
54 ///   max_bytes_to_read - maximum bytes that can be written to pBuf
55 ///   pEOF_flag - set this to true if at end of stream (no more bytes remaining)
56 ///   Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
57 ///   Notes: This delegate will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
58 alias JpegStreamReadFunc = int delegate (void* pBuf, int max_bytes_to_read, bool* pEOF_flag);
59 
60 
61 // ////////////////////////////////////////////////////////////////////////// //
62 private:
63 void *jpgd_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
64 void jpgd_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
65 
66 // Success/failure error codes.
67 alias jpgd_status = int;
68 enum /*jpgd_status*/ {
69   JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
70   JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
71   JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
72   JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
73   JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
74   JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
75   JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
76   JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
77   JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM,
78 }
79 
80 enum {
81   JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
82   JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384,
83 }
84 
85 // DCT coefficients are stored in this sequence.
86 static immutable int[64] g_ZAG = [  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
87 
88 alias JPEG_MARKER = int;
89 enum /*JPEG_MARKER*/ {
90   M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
91   M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
92   M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
93   M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
94   M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0,
95 }
96 
97 alias JPEG_SUBSAMPLING = int;
98 enum /*JPEG_SUBSAMPLING*/ { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
99 
100 enum CONST_BITS = 13;
101 enum PASS1_BITS = 2;
102 enum SCALEDONE = cast(int)1;
103 
104 enum FIX_0_298631336 = cast(int)2446;  /* FIX(0.298631336) */
105 enum FIX_0_390180644 = cast(int)3196;  /* FIX(0.390180644) */
106 enum FIX_0_541196100 = cast(int)4433;  /* FIX(0.541196100) */
107 enum FIX_0_765366865 = cast(int)6270;  /* FIX(0.765366865) */
108 enum FIX_0_899976223 = cast(int)7373;  /* FIX(0.899976223) */
109 enum FIX_1_175875602 = cast(int)9633;  /* FIX(1.175875602) */
110 enum FIX_1_501321110 = cast(int)12299; /* FIX(1.501321110) */
111 enum FIX_1_847759065 = cast(int)15137; /* FIX(1.847759065) */
112 enum FIX_1_961570560 = cast(int)16069; /* FIX(1.961570560) */
113 enum FIX_2_053119869 = cast(int)16819; /* FIX(2.053119869) */
114 enum FIX_2_562915447 = cast(int)20995; /* FIX(2.562915447) */
115 enum FIX_3_072711026 = cast(int)25172; /* FIX(3.072711026) */
116 
117 int DESCALE() (int x, int n) { pragma(inline, true); return (((x) + (SCALEDONE << ((n)-1))) >> (n)); }
118 int DESCALE_ZEROSHIFT() (int x, int n) { pragma(inline, true); return (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)); }
119 ubyte CLAMP() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i) >> 31) & 0xFF) : i); }
120 
121 
122 // Compiler creates a fast path 1D IDCT for X non-zero columns
123 struct Row(int NONZERO_COLS) {
124 pure nothrow @trusted @nogc:
125   static void idct(int* pTemp, const(jpeg_decoder.jpgd_block_t)* pSrc) {
126     static if (NONZERO_COLS == 0) {
127       // nothing
128     } else static if (NONZERO_COLS == 1) {
129       immutable int dcval = (pSrc[0] << PASS1_BITS);
130       pTemp[0] = dcval;
131       pTemp[1] = dcval;
132       pTemp[2] = dcval;
133       pTemp[3] = dcval;
134       pTemp[4] = dcval;
135       pTemp[5] = dcval;
136       pTemp[6] = dcval;
137       pTemp[7] = dcval;
138     } else {
139       // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
140       //#define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
141       template ACCESS_COL(int x) {
142         static if (x < NONZERO_COLS) enum ACCESS_COL = "cast(int)pSrc["~x.stringof~"]"; else enum ACCESS_COL = "0";
143       }
144 
145       immutable int z2 = mixin(ACCESS_COL!2), z3 = mixin(ACCESS_COL!6);
146 
147       immutable int z1 = (z2 + z3)*FIX_0_541196100;
148       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
149       immutable int tmp3 = z1 + z2*FIX_0_765366865;
150 
151       immutable int tmp0 = (mixin(ACCESS_COL!0) + mixin(ACCESS_COL!4)) << CONST_BITS;
152       immutable int tmp1 = (mixin(ACCESS_COL!0) - mixin(ACCESS_COL!4)) << CONST_BITS;
153 
154       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
155 
156       immutable int atmp0 = mixin(ACCESS_COL!7), atmp1 = mixin(ACCESS_COL!5), atmp2 = mixin(ACCESS_COL!3), atmp3 = mixin(ACCESS_COL!1);
157 
158       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
159       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
160 
161       immutable int az1 = bz1*(-FIX_0_899976223);
162       immutable int az2 = bz2*(-FIX_2_562915447);
163       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
164       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
165 
166       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
167       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
168       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
169       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
170 
171       pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
172       pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
173       pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
174       pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
175       pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
176       pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
177       pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
178       pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
179     }
180   }
181 }
182 
183 
184 // Compiler creates a fast path 1D IDCT for X non-zero rows
185 struct Col (int NONZERO_ROWS) {
186 pure nothrow @trusted @nogc:
187   static void idct(ubyte* pDst_ptr, const(int)* pTemp) {
188     static assert(NONZERO_ROWS > 0);
189     static if (NONZERO_ROWS == 1) {
190       int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
191       immutable ubyte dcval_clamped = cast(ubyte)CLAMP(dcval);
192       pDst_ptr[0*8] = dcval_clamped;
193       pDst_ptr[1*8] = dcval_clamped;
194       pDst_ptr[2*8] = dcval_clamped;
195       pDst_ptr[3*8] = dcval_clamped;
196       pDst_ptr[4*8] = dcval_clamped;
197       pDst_ptr[5*8] = dcval_clamped;
198       pDst_ptr[6*8] = dcval_clamped;
199       pDst_ptr[7*8] = dcval_clamped;
200     } else {
201       // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
202       //#define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
203       template ACCESS_ROW(int x) {
204         static if (x < NONZERO_ROWS) enum ACCESS_ROW = "pTemp["~(x*8).stringof~"]"; else enum ACCESS_ROW = "0";
205       }
206 
207       immutable int z2 = mixin(ACCESS_ROW!2);
208       immutable int z3 = mixin(ACCESS_ROW!6);
209 
210       immutable int z1 = (z2 + z3)*FIX_0_541196100;
211       immutable int tmp2 = z1 + z3*(-FIX_1_847759065);
212       immutable int tmp3 = z1 + z2*FIX_0_765366865;
213 
214       immutable int tmp0 = (mixin(ACCESS_ROW!0) + mixin(ACCESS_ROW!4)) << CONST_BITS;
215       immutable int tmp1 = (mixin(ACCESS_ROW!0) - mixin(ACCESS_ROW!4)) << CONST_BITS;
216 
217       immutable int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
218 
219       immutable int atmp0 = mixin(ACCESS_ROW!7), atmp1 = mixin(ACCESS_ROW!5), atmp2 = mixin(ACCESS_ROW!3), atmp3 = mixin(ACCESS_ROW!1);
220 
221       immutable int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
222       immutable int bz5 = (bz3 + bz4)*FIX_1_175875602;
223 
224       immutable int az1 = bz1*(-FIX_0_899976223);
225       immutable int az2 = bz2*(-FIX_2_562915447);
226       immutable int az3 = bz3*(-FIX_1_961570560) + bz5;
227       immutable int az4 = bz4*(-FIX_0_390180644) + bz5;
228 
229       immutable int btmp0 = atmp0*FIX_0_298631336 + az1 + az3;
230       immutable int btmp1 = atmp1*FIX_2_053119869 + az2 + az4;
231       immutable int btmp2 = atmp2*FIX_3_072711026 + az2 + az3;
232       immutable int btmp3 = atmp3*FIX_1_501321110 + az1 + az4;
233 
234       int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
235       pDst_ptr[8*0] = cast(ubyte)CLAMP(i);
236 
237       i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
238       pDst_ptr[8*7] = cast(ubyte)CLAMP(i);
239 
240       i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
241       pDst_ptr[8*1] = cast(ubyte)CLAMP(i);
242 
243       i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
244       pDst_ptr[8*6] = cast(ubyte)CLAMP(i);
245 
246       i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
247       pDst_ptr[8*2] = cast(ubyte)CLAMP(i);
248 
249       i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
250       pDst_ptr[8*5] = cast(ubyte)CLAMP(i);
251 
252       i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
253       pDst_ptr[8*3] = cast(ubyte)CLAMP(i);
254 
255       i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
256       pDst_ptr[8*4] = cast(ubyte)CLAMP(i);
257     }
258   }
259 }
260 
261 
262 static immutable ubyte[512] s_idct_row_table = [
263   1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
264   4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
265   6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
266   6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
267   8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
268   8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
269   8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
270   8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
271 ];
272 
273 static immutable ubyte[64] s_idct_col_table = [ 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 ];
274 
275 void idct() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr, int block_max_zag) {
276   assert(block_max_zag >= 1);
277   assert(block_max_zag <= 64);
278 
279   if (block_max_zag <= 1)
280   {
281     int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
282     k = CLAMP(k);
283     k = k | (k<<8);
284     k = k | (k<<16);
285 
286     for (int i = 8; i > 0; i--)
287     {
288       *cast(int*)&pDst_ptr[0] = k;
289       *cast(int*)&pDst_ptr[4] = k;
290       pDst_ptr += 8;
291     }
292     return;
293   }
294 
295   int[64] temp;
296 
297   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
298   int* pTemp = temp.ptr;
299 
300   const(ubyte)* pRow_tab = &s_idct_row_table.ptr[(block_max_zag - 1) * 8];
301   int i;
302   for (i = 8; i > 0; i--, pRow_tab++)
303   {
304     switch (*pRow_tab)
305     {
306       case 0: Row!(0).idct(pTemp, pSrc); break;
307       case 1: Row!(1).idct(pTemp, pSrc); break;
308       case 2: Row!(2).idct(pTemp, pSrc); break;
309       case 3: Row!(3).idct(pTemp, pSrc); break;
310       case 4: Row!(4).idct(pTemp, pSrc); break;
311       case 5: Row!(5).idct(pTemp, pSrc); break;
312       case 6: Row!(6).idct(pTemp, pSrc); break;
313       case 7: Row!(7).idct(pTemp, pSrc); break;
314       case 8: Row!(8).idct(pTemp, pSrc); break;
315       default: assert(0);
316     }
317 
318     pSrc += 8;
319     pTemp += 8;
320   }
321 
322   pTemp = temp.ptr;
323 
324   immutable int nonzero_rows = s_idct_col_table.ptr[block_max_zag - 1];
325   for (i = 8; i > 0; i--)
326   {
327     switch (nonzero_rows)
328     {
329       case 1: Col!(1).idct(pDst_ptr, pTemp); break;
330       case 2: Col!(2).idct(pDst_ptr, pTemp); break;
331       case 3: Col!(3).idct(pDst_ptr, pTemp); break;
332       case 4: Col!(4).idct(pDst_ptr, pTemp); break;
333       case 5: Col!(5).idct(pDst_ptr, pTemp); break;
334       case 6: Col!(6).idct(pDst_ptr, pTemp); break;
335       case 7: Col!(7).idct(pDst_ptr, pTemp); break;
336       case 8: Col!(8).idct(pDst_ptr, pTemp); break;
337       default: assert(0);
338     }
339 
340     pTemp++;
341     pDst_ptr++;
342   }
343 }
344 
345 void idct_4x4() (const(jpeg_decoder.jpgd_block_t)* pSrc_ptr, ubyte* pDst_ptr) {
346   int[64] temp;
347   int* pTemp = temp.ptr;
348   const(jpeg_decoder.jpgd_block_t)* pSrc = pSrc_ptr;
349 
350   for (int i = 4; i > 0; i--)
351   {
352     Row!(4).idct(pTemp, pSrc);
353     pSrc += 8;
354     pTemp += 8;
355   }
356 
357   pTemp = temp.ptr;
358   for (int i = 8; i > 0; i--)
359   {
360     Col!(4).idct(pDst_ptr, pTemp);
361     pTemp++;
362     pDst_ptr++;
363   }
364 }
365 
366 
367 // ////////////////////////////////////////////////////////////////////////// //
368 struct jpeg_decoder {
369 private import core.stdc..string : memcpy, memset;
370 private:
371   static auto JPGD_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
372   static auto JPGD_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
373 
374   alias jpgd_quant_t = short;
375   alias jpgd_block_t = short;
376   alias pDecode_block_func = void function (ref jpeg_decoder, int, int, int);
377 
378   static struct huff_tables {
379     bool ac_table;
380     uint[256] look_up;
381     uint[256] look_up2;
382     ubyte[256] code_size;
383     uint[512] tree;
384   }
385 
386   static struct coeff_buf {
387     ubyte* pData;
388     int block_num_x, block_num_y;
389     int block_len_x, block_len_y;
390     int block_size;
391   }
392 
393   static struct mem_block {
394     mem_block* m_pNext;
395     size_t m_used_count;
396     size_t m_size;
397     char[1] m_data;
398   }
399 
400   mem_block* m_pMem_blocks;
401   int m_image_x_size;
402   int m_image_y_size;
403   JpegStreamReadFunc readfn;
404   int m_progressive_flag;
405   ubyte[JPGD_MAX_HUFF_TABLES] m_huff_ac;
406   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_num;      // pointer to number of Huffman codes per bit size
407   ubyte*[JPGD_MAX_HUFF_TABLES] m_huff_val;      // pointer to Huffman codes per bit size
408   jpgd_quant_t*[JPGD_MAX_QUANT_TABLES] m_quant; // pointer to quantization tables
409   int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
410   int m_comps_in_frame;                         // # of components in frame
411   int[JPGD_MAX_COMPONENTS] m_comp_h_samp;       // component's horizontal sampling factor
412   int[JPGD_MAX_COMPONENTS] m_comp_v_samp;       // component's vertical sampling factor
413   int[JPGD_MAX_COMPONENTS] m_comp_quant;        // component's quantization table selector
414   int[JPGD_MAX_COMPONENTS] m_comp_ident;        // component's ID
415   int[JPGD_MAX_COMPONENTS] m_comp_h_blocks;
416   int[JPGD_MAX_COMPONENTS] m_comp_v_blocks;
417   int m_comps_in_scan;                          // # of components in scan
418   int[JPGD_MAX_COMPS_IN_SCAN] m_comp_list;      // components in this scan
419   int[JPGD_MAX_COMPONENTS] m_comp_dc_tab;       // component's DC Huffman coding table selector
420   int[JPGD_MAX_COMPONENTS] m_comp_ac_tab;       // component's AC Huffman coding table selector
421   int m_spectral_start;                         // spectral selection start
422   int m_spectral_end;                           // spectral selection end
423   int m_successive_low;                         // successive approximation low
424   int m_successive_high;                        // successive approximation high
425   int m_max_mcu_x_size;                         // MCU's max. X size in pixels
426   int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
427   int m_blocks_per_mcu;
428   int m_max_blocks_per_row;
429   int m_mcus_per_row, m_mcus_per_col;
430   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_org;
431   int m_total_lines_left;                       // total # lines left in image
432   int m_mcu_lines_left;                         // total # lines left in this MCU
433   int m_real_dest_bytes_per_scan_line;
434   int m_dest_bytes_per_scan_line;               // rounded up
435   int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
436   huff_tables*[JPGD_MAX_HUFF_TABLES] m_pHuff_tabs;
437   coeff_buf*[JPGD_MAX_COMPONENTS] m_dc_coeffs;
438   coeff_buf*[JPGD_MAX_COMPONENTS] m_ac_coeffs;
439   int m_eob_run;
440   int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
441   ubyte* m_pIn_buf_ofs;
442   int m_in_buf_left;
443   int m_tem_flag;
444   bool m_eof_flag;
445   ubyte[128] m_in_buf_pad_start;
446   ubyte[JPGD_IN_BUF_SIZE+128] m_in_buf;
447   ubyte[128] m_in_buf_pad_end;
448   int m_bits_left;
449   uint m_bit_buf;
450   int m_restart_interval;
451   int m_restarts_left;
452   int m_next_restart_num;
453   int m_max_mcus_per_row;
454   int m_max_blocks_per_mcu;
455   int m_expanded_blocks_per_mcu;
456   int m_expanded_blocks_per_row;
457   int m_expanded_blocks_per_component;
458   bool m_freq_domain_chroma_upsample;
459   int m_max_mcus_per_col;
460   uint[JPGD_MAX_COMPONENTS] m_last_dc_val;
461   jpgd_block_t* m_pMCU_coefficients;
462   int[JPGD_MAX_BLOCKS_PER_MCU] m_mcu_block_max_zag;
463   ubyte* m_pSample_buf;
464   int[256] m_crr;
465   int[256] m_cbb;
466   int[256] m_crg;
467   int[256] m_cbg;
468   ubyte* m_pScan_line_0;
469   ubyte* m_pScan_line_1;
470   jpgd_status m_error_code;
471   bool m_ready_flag;
472   int m_total_bytes_read;
473 
474 public:
475   // Inspect `error_code` after constructing to determine if the stream is valid or not. You may look at the `width`, `height`, etc.
476   // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
477   this (JpegStreamReadFunc rfn) { decode_init(rfn); }
478 
479   ~this () { free_all_blocks(); }
480 
481   @disable this (this); // no copies
482 
483   // Call this method after constructing the object to begin decompression.
484   // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
485   int begin_decoding () {
486     if (m_ready_flag) return JPGD_SUCCESS;
487     if (m_error_code) return JPGD_FAILED;
488     try {
489       decode_start();
490       m_ready_flag = true;
491       return JPGD_SUCCESS;
492     } catch (Exception e) {
493       //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("ERROR: %.*s...\n", cast(int)e.msg.length, e.msg.ptr); }}
494       version(jpegd_test) {{ import std.stdio; stderr.writeln(e.toString); }}
495     }
496     return JPGD_FAILED;
497   }
498 
499   // Returns the next scan line.
500   // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (`bytes_per_pixel` will return 1).
501   // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and `bytes_per_pixel` will return 4).
502   // Returns JPGD_SUCCESS if a scan line has been returned.
503   // Returns JPGD_DONE if all scan lines have been returned.
504   // Returns JPGD_FAILED if an error occurred. Inspect `error_code` for a more info.
505   int decode (/*const void** */void** pScan_line, uint* pScan_line_len) {
506     if (m_error_code || !m_ready_flag) return JPGD_FAILED;
507     if (m_total_lines_left == 0) return JPGD_DONE;
508     try {
509       if (m_mcu_lines_left == 0) {
510         if (m_progressive_flag) load_next_row(); else decode_next_row();
511         // Find the EOI marker if that was the last row.
512         if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
513         m_mcu_lines_left = m_max_mcu_y_size;
514       }
515       if (m_freq_domain_chroma_upsample) {
516         expanded_convert();
517         *pScan_line = m_pScan_line_0;
518       } else {
519         switch (m_scan_type) {
520           case JPGD_YH2V2:
521             if ((m_mcu_lines_left & 1) == 0) {
522               H2V2Convert();
523               *pScan_line = m_pScan_line_0;
524             } else {
525               *pScan_line = m_pScan_line_1;
526             }
527             break;
528           case JPGD_YH2V1:
529             H2V1Convert();
530             *pScan_line = m_pScan_line_0;
531             break;
532           case JPGD_YH1V2:
533             if ((m_mcu_lines_left & 1) == 0) {
534               H1V2Convert();
535               *pScan_line = m_pScan_line_0;
536             } else {
537               *pScan_line = m_pScan_line_1;
538             }
539             break;
540           case JPGD_YH1V1:
541             H1V1Convert();
542             *pScan_line = m_pScan_line_0;
543             break;
544           case JPGD_GRAYSCALE:
545             gray_convert();
546             *pScan_line = m_pScan_line_0;
547             break;
548           default:
549         }
550       }
551       *pScan_line_len = m_real_dest_bytes_per_scan_line;
552       --m_mcu_lines_left;
553       --m_total_lines_left;
554       return JPGD_SUCCESS;
555     } catch (Exception) {}
556     return JPGD_FAILED;
557   }
558 
559   @property const pure nothrow @safe @nogc {
560     jpgd_status error_code () { pragma(inline, true); return m_error_code; }
561 
562     int width () { pragma(inline, true); return m_image_x_size; }
563     int height () { pragma(inline, true); return m_image_y_size; }
564 
565     int num_components () { pragma(inline, true); return m_comps_in_frame; }
566 
567     int bytes_per_pixel () { pragma(inline, true); return m_dest_bytes_per_pixel; }
568     int bytes_per_scan_line () { pragma(inline, true); return m_image_x_size * bytes_per_pixel(); }
569 
570     // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
571     int total_bytes_read () { pragma(inline, true); return m_total_bytes_read; }
572   }
573 
574 private:
575   // Retrieve one character from the input stream.
576   uint get_char () {
577     // Any bytes remaining in buffer?
578     if (!m_in_buf_left) {
579       // Try to get more bytes.
580       prep_in_buffer();
581       // Still nothing to get?
582       if (!m_in_buf_left) {
583         // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
584         int t = m_tem_flag;
585         m_tem_flag ^= 1;
586         return (t ? 0xD9 : 0xFF);
587       }
588     }
589     uint c = *m_pIn_buf_ofs++;
590     --m_in_buf_left;
591     return c;
592   }
593 
594   // Same as previous method, except can indicate if the character is a pad character or not.
595   uint get_char (bool* pPadding_flag) {
596     if (!m_in_buf_left) {
597       prep_in_buffer();
598       if (!m_in_buf_left) {
599         *pPadding_flag = true;
600         int t = m_tem_flag;
601         m_tem_flag ^= 1;
602         return (t ? 0xD9 : 0xFF);
603       }
604     }
605     *pPadding_flag = false;
606     uint c = *m_pIn_buf_ofs++;
607     --m_in_buf_left;
608     return c;
609   }
610 
611   // Inserts a previously retrieved character back into the input buffer.
612   void stuff_char (ubyte q) {
613     *(--m_pIn_buf_ofs) = q;
614     m_in_buf_left++;
615   }
616 
617   // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
618   ubyte get_octet () {
619     bool padding_flag;
620     int c = get_char(&padding_flag);
621     if (c == 0xFF) {
622       if (padding_flag) return 0xFF;
623       c = get_char(&padding_flag);
624       if (padding_flag) { stuff_char(0xFF); return 0xFF; }
625       if (c == 0x00) return 0xFF;
626       stuff_char(cast(ubyte)(c));
627       stuff_char(0xFF);
628       return 0xFF;
629     }
630     return cast(ubyte)(c);
631   }
632 
633   // Retrieves a variable number of bits from the input stream. Does not recognize markers.
634   uint get_bits (int num_bits) {
635     if (!num_bits) return 0;
636     uint i = m_bit_buf >> (32 - num_bits);
637     if ((m_bits_left -= num_bits) <= 0) {
638       m_bit_buf <<= (num_bits += m_bits_left);
639       uint c1 = get_char();
640       uint c2 = get_char();
641       m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
642       m_bit_buf <<= -m_bits_left;
643       m_bits_left += 16;
644       assert(m_bits_left >= 0);
645     } else {
646       m_bit_buf <<= num_bits;
647     }
648     return i;
649   }
650 
651   // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
652   uint get_bits_no_markers (int num_bits) {
653     if (!num_bits) return 0;
654     uint i = m_bit_buf >> (32 - num_bits);
655     if ((m_bits_left -= num_bits) <= 0) {
656       m_bit_buf <<= (num_bits += m_bits_left);
657       if (m_in_buf_left < 2 || m_pIn_buf_ofs[0] == 0xFF || m_pIn_buf_ofs[1] == 0xFF) {
658         uint c1 = get_octet();
659         uint c2 = get_octet();
660         m_bit_buf |= (c1 << 8) | c2;
661       } else {
662         m_bit_buf |= (cast(uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
663         m_in_buf_left -= 2;
664         m_pIn_buf_ofs += 2;
665       }
666       m_bit_buf <<= -m_bits_left;
667       m_bits_left += 16;
668       assert(m_bits_left >= 0);
669     } else {
670       m_bit_buf <<= num_bits;
671     }
672     return i;
673   }
674 
675   // Decodes a Huffman encoded symbol.
676   int huff_decode (huff_tables *pH) {
677     int symbol;
678     // Check first 8-bits: do we have a complete symbol?
679     if ((symbol = pH.look_up.ptr[m_bit_buf >> 24]) < 0) {
680       // Decode more bits, use a tree traversal to find symbol.
681       int ofs = 23;
682       do {
683         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
684         --ofs;
685       } while (symbol < 0);
686       get_bits_no_markers(8 + (23 - ofs));
687     } else {
688       get_bits_no_markers(pH.code_size.ptr[symbol]);
689     }
690     return symbol;
691   }
692 
693   // Decodes a Huffman encoded symbol.
694   int huff_decode (huff_tables *pH, ref int extra_bits) {
695     int symbol;
696     // Check first 8-bits: do we have a complete symbol?
697     if ((symbol = pH.look_up2.ptr[m_bit_buf >> 24]) < 0) {
698       // Use a tree traversal to find symbol.
699       int ofs = 23;
700       do {
701         symbol = pH.tree.ptr[-cast(int)(symbol + ((m_bit_buf >> ofs) & 1))];
702         --ofs;
703       } while (symbol < 0);
704       get_bits_no_markers(8 + (23 - ofs));
705       extra_bits = get_bits_no_markers(symbol & 0xF);
706     } else {
707       assert(((symbol >> 8) & 31) == pH.code_size.ptr[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
708       if (symbol & 0x8000) {
709         get_bits_no_markers((symbol >> 8) & 31);
710         extra_bits = symbol >> 16;
711       } else {
712         int code_size = (symbol >> 8) & 31;
713         int num_extra_bits = symbol & 0xF;
714         int bits = code_size + num_extra_bits;
715         if (bits <= (m_bits_left + 16)) {
716           extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
717         } else {
718           get_bits_no_markers(code_size);
719           extra_bits = get_bits_no_markers(num_extra_bits);
720         }
721       }
722       symbol &= 0xFF;
723     }
724     return symbol;
725   }
726 
727   // Tables and macro used to fully decode the DPCM differences.
728   static immutable int[16] s_extend_test = [ 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 ];
729   static immutable int[16] s_extend_offset = [ 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 ];
730   static immutable int[18] s_extend_mask = [ 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) ];
731   // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
732   //#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
733   static JPGD_HUFF_EXTEND (int x, int s) nothrow @trusted @nogc { pragma(inline, true); return (((x) < s_extend_test.ptr[s & 15]) ? ((x) + s_extend_offset.ptr[s & 15]) : (x)); }
734 
735   // Clamps a value between 0-255.
736   //static ubyte clamp (int i) { if (cast(uint)(i) > 255) i = (((~i) >> 31) & 0xFF); return cast(ubyte)(i); }
737   alias clamp = CLAMP;
738 
739   static struct DCT_Upsample {
740   static:
741     static struct Matrix44 {
742     pure nothrow @trusted @nogc:
743       alias Element_Type = int;
744       enum { NUM_ROWS = 4, NUM_COLS = 4 }
745 
746       Element_Type[NUM_COLS][NUM_ROWS] v;
747 
748       this() (in auto ref Matrix44 m) {
749         foreach (immutable r; 0..NUM_ROWS) v[r][] = m.v[r][];
750       }
751 
752       //@property int rows () const { pragma(inline, true); return NUM_ROWS; }
753       //@property int cols () const { pragma(inline, true); return NUM_COLS; }
754 
755       ref inout(Element_Type) at (int r, int c) inout { pragma(inline, true); return v.ptr[r].ptr[c]; }
756 
757       ref Matrix44 opOpAssign(string op:"+") (in auto ref Matrix44 a) {
758         foreach (int r; 0..NUM_ROWS) {
759           at(r, 0) += a.at(r, 0);
760           at(r, 1) += a.at(r, 1);
761           at(r, 2) += a.at(r, 2);
762           at(r, 3) += a.at(r, 3);
763         }
764         return this;
765       }
766 
767       ref Matrix44 opOpAssign(string op:"-") (in auto ref Matrix44 a) {
768         foreach (int r; 0..NUM_ROWS) {
769           at(r, 0) -= a.at(r, 0);
770           at(r, 1) -= a.at(r, 1);
771           at(r, 2) -= a.at(r, 2);
772           at(r, 3) -= a.at(r, 3);
773         }
774         return this;
775       }
776 
777       Matrix44 opBinary(string op:"+") (in auto ref Matrix44 b) const {
778         alias a = this;
779         Matrix44 ret;
780         foreach (int r; 0..NUM_ROWS) {
781           ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
782           ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
783           ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
784           ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
785         }
786         return ret;
787       }
788 
789       Matrix44 opBinary(string op:"-") (in auto ref Matrix44 b) const {
790         alias a = this;
791         Matrix44 ret;
792         foreach (int r; 0..NUM_ROWS) {
793           ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
794           ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
795           ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
796           ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
797         }
798         return ret;
799       }
800 
801       static void add_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
802         foreach (int r; 0..4) {
803           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) + b.at(r, 0));
804           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) + b.at(r, 1));
805           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) + b.at(r, 2));
806           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) + b.at(r, 3));
807         }
808       }
809 
810       static void sub_and_store() (jpgd_block_t* pDst, in auto ref Matrix44 a, in auto ref Matrix44 b) {
811         foreach (int r; 0..4) {
812           pDst[0*8 + r] = cast(jpgd_block_t)(a.at(r, 0) - b.at(r, 0));
813           pDst[1*8 + r] = cast(jpgd_block_t)(a.at(r, 1) - b.at(r, 1));
814           pDst[2*8 + r] = cast(jpgd_block_t)(a.at(r, 2) - b.at(r, 2));
815           pDst[3*8 + r] = cast(jpgd_block_t)(a.at(r, 3) - b.at(r, 3));
816         }
817       }
818     }
819 
820     enum FRACT_BITS = 10;
821     enum SCALE = 1 << FRACT_BITS;
822 
823     alias Temp_Type = int;
824     //TODO: convert defines to mixins
825     //#define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
826     //#define F(i) ((int)((i) * SCALE + .5f))
827     // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
828     //#define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
829 
830     static int D(T) (T i) { pragma(inline, true); return (((i) + (SCALE >> 1)) >> FRACT_BITS); }
831     enum F(float i) = (cast(int)((i) * SCALE + 0.5f));
832 
833     // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
834     static struct P_Q(int NUM_ROWS, int NUM_COLS) {
835       static void calc (ref Matrix44 P, ref Matrix44 Q, const(jpgd_block_t)* pSrc) {
836         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
837         template AT(int c, int r) {
838           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
839         }
840         // 4x8 = 4x8 times 8x8, matrix 0 is constant
841         immutable Temp_Type X000 = mixin(AT!(0, 0));
842         immutable Temp_Type X001 = mixin(AT!(0, 1));
843         immutable Temp_Type X002 = mixin(AT!(0, 2));
844         immutable Temp_Type X003 = mixin(AT!(0, 3));
845         immutable Temp_Type X004 = mixin(AT!(0, 4));
846         immutable Temp_Type X005 = mixin(AT!(0, 5));
847         immutable Temp_Type X006 = mixin(AT!(0, 6));
848         immutable Temp_Type X007 = mixin(AT!(0, 7));
849         immutable Temp_Type X010 = D(F!(0.415735f) * mixin(AT!(1, 0)) + F!(0.791065f) * mixin(AT!(3, 0)) + F!(-0.352443f) * mixin(AT!(5, 0)) + F!(0.277785f) * mixin(AT!(7, 0)));
850         immutable Temp_Type X011 = D(F!(0.415735f) * mixin(AT!(1, 1)) + F!(0.791065f) * mixin(AT!(3, 1)) + F!(-0.352443f) * mixin(AT!(5, 1)) + F!(0.277785f) * mixin(AT!(7, 1)));
851         immutable Temp_Type X012 = D(F!(0.415735f) * mixin(AT!(1, 2)) + F!(0.791065f) * mixin(AT!(3, 2)) + F!(-0.352443f) * mixin(AT!(5, 2)) + F!(0.277785f) * mixin(AT!(7, 2)));
852         immutable Temp_Type X013 = D(F!(0.415735f) * mixin(AT!(1, 3)) + F!(0.791065f) * mixin(AT!(3, 3)) + F!(-0.352443f) * mixin(AT!(5, 3)) + F!(0.277785f) * mixin(AT!(7, 3)));
853         immutable Temp_Type X014 = D(F!(0.415735f) * mixin(AT!(1, 4)) + F!(0.791065f) * mixin(AT!(3, 4)) + F!(-0.352443f) * mixin(AT!(5, 4)) + F!(0.277785f) * mixin(AT!(7, 4)));
854         immutable Temp_Type X015 = D(F!(0.415735f) * mixin(AT!(1, 5)) + F!(0.791065f) * mixin(AT!(3, 5)) + F!(-0.352443f) * mixin(AT!(5, 5)) + F!(0.277785f) * mixin(AT!(7, 5)));
855         immutable Temp_Type X016 = D(F!(0.415735f) * mixin(AT!(1, 6)) + F!(0.791065f) * mixin(AT!(3, 6)) + F!(-0.352443f) * mixin(AT!(5, 6)) + F!(0.277785f) * mixin(AT!(7, 6)));
856         immutable Temp_Type X017 = D(F!(0.415735f) * mixin(AT!(1, 7)) + F!(0.791065f) * mixin(AT!(3, 7)) + F!(-0.352443f) * mixin(AT!(5, 7)) + F!(0.277785f) * mixin(AT!(7, 7)));
857         immutable Temp_Type X020 = mixin(AT!(4, 0));
858         immutable Temp_Type X021 = mixin(AT!(4, 1));
859         immutable Temp_Type X022 = mixin(AT!(4, 2));
860         immutable Temp_Type X023 = mixin(AT!(4, 3));
861         immutable Temp_Type X024 = mixin(AT!(4, 4));
862         immutable Temp_Type X025 = mixin(AT!(4, 5));
863         immutable Temp_Type X026 = mixin(AT!(4, 6));
864         immutable Temp_Type X027 = mixin(AT!(4, 7));
865         immutable Temp_Type X030 = D(F!(0.022887f) * mixin(AT!(1, 0)) + F!(-0.097545f) * mixin(AT!(3, 0)) + F!(0.490393f) * mixin(AT!(5, 0)) + F!(0.865723f) * mixin(AT!(7, 0)));
866         immutable Temp_Type X031 = D(F!(0.022887f) * mixin(AT!(1, 1)) + F!(-0.097545f) * mixin(AT!(3, 1)) + F!(0.490393f) * mixin(AT!(5, 1)) + F!(0.865723f) * mixin(AT!(7, 1)));
867         immutable Temp_Type X032 = D(F!(0.022887f) * mixin(AT!(1, 2)) + F!(-0.097545f) * mixin(AT!(3, 2)) + F!(0.490393f) * mixin(AT!(5, 2)) + F!(0.865723f) * mixin(AT!(7, 2)));
868         immutable Temp_Type X033 = D(F!(0.022887f) * mixin(AT!(1, 3)) + F!(-0.097545f) * mixin(AT!(3, 3)) + F!(0.490393f) * mixin(AT!(5, 3)) + F!(0.865723f) * mixin(AT!(7, 3)));
869         immutable Temp_Type X034 = D(F!(0.022887f) * mixin(AT!(1, 4)) + F!(-0.097545f) * mixin(AT!(3, 4)) + F!(0.490393f) * mixin(AT!(5, 4)) + F!(0.865723f) * mixin(AT!(7, 4)));
870         immutable Temp_Type X035 = D(F!(0.022887f) * mixin(AT!(1, 5)) + F!(-0.097545f) * mixin(AT!(3, 5)) + F!(0.490393f) * mixin(AT!(5, 5)) + F!(0.865723f) * mixin(AT!(7, 5)));
871         immutable Temp_Type X036 = D(F!(0.022887f) * mixin(AT!(1, 6)) + F!(-0.097545f) * mixin(AT!(3, 6)) + F!(0.490393f) * mixin(AT!(5, 6)) + F!(0.865723f) * mixin(AT!(7, 6)));
872         immutable Temp_Type X037 = D(F!(0.022887f) * mixin(AT!(1, 7)) + F!(-0.097545f) * mixin(AT!(3, 7)) + F!(0.490393f) * mixin(AT!(5, 7)) + F!(0.865723f) * mixin(AT!(7, 7)));
873 
874         // 4x4 = 4x8 times 8x4, matrix 1 is constant
875         P.at(0, 0) = X000;
876         P.at(0, 1) = D(X001 * F!(0.415735f) + X003 * F!(0.791065f) + X005 * F!(-0.352443f) + X007 * F!(0.277785f));
877         P.at(0, 2) = X004;
878         P.at(0, 3) = D(X001 * F!(0.022887f) + X003 * F!(-0.097545f) + X005 * F!(0.490393f) + X007 * F!(0.865723f));
879         P.at(1, 0) = X010;
880         P.at(1, 1) = D(X011 * F!(0.415735f) + X013 * F!(0.791065f) + X015 * F!(-0.352443f) + X017 * F!(0.277785f));
881         P.at(1, 2) = X014;
882         P.at(1, 3) = D(X011 * F!(0.022887f) + X013 * F!(-0.097545f) + X015 * F!(0.490393f) + X017 * F!(0.865723f));
883         P.at(2, 0) = X020;
884         P.at(2, 1) = D(X021 * F!(0.415735f) + X023 * F!(0.791065f) + X025 * F!(-0.352443f) + X027 * F!(0.277785f));
885         P.at(2, 2) = X024;
886         P.at(2, 3) = D(X021 * F!(0.022887f) + X023 * F!(-0.097545f) + X025 * F!(0.490393f) + X027 * F!(0.865723f));
887         P.at(3, 0) = X030;
888         P.at(3, 1) = D(X031 * F!(0.415735f) + X033 * F!(0.791065f) + X035 * F!(-0.352443f) + X037 * F!(0.277785f));
889         P.at(3, 2) = X034;
890         P.at(3, 3) = D(X031 * F!(0.022887f) + X033 * F!(-0.097545f) + X035 * F!(0.490393f) + X037 * F!(0.865723f));
891         // 40 muls 24 adds
892 
893         // 4x4 = 4x8 times 8x4, matrix 1 is constant
894         Q.at(0, 0) = D(X001 * F!(0.906127f) + X003 * F!(-0.318190f) + X005 * F!(0.212608f) + X007 * F!(-0.180240f));
895         Q.at(0, 1) = X002;
896         Q.at(0, 2) = D(X001 * F!(-0.074658f) + X003 * F!(0.513280f) + X005 * F!(0.768178f) + X007 * F!(-0.375330f));
897         Q.at(0, 3) = X006;
898         Q.at(1, 0) = D(X011 * F!(0.906127f) + X013 * F!(-0.318190f) + X015 * F!(0.212608f) + X017 * F!(-0.180240f));
899         Q.at(1, 1) = X012;
900         Q.at(1, 2) = D(X011 * F!(-0.074658f) + X013 * F!(0.513280f) + X015 * F!(0.768178f) + X017 * F!(-0.375330f));
901         Q.at(1, 3) = X016;
902         Q.at(2, 0) = D(X021 * F!(0.906127f) + X023 * F!(-0.318190f) + X025 * F!(0.212608f) + X027 * F!(-0.180240f));
903         Q.at(2, 1) = X022;
904         Q.at(2, 2) = D(X021 * F!(-0.074658f) + X023 * F!(0.513280f) + X025 * F!(0.768178f) + X027 * F!(-0.375330f));
905         Q.at(2, 3) = X026;
906         Q.at(3, 0) = D(X031 * F!(0.906127f) + X033 * F!(-0.318190f) + X035 * F!(0.212608f) + X037 * F!(-0.180240f));
907         Q.at(3, 1) = X032;
908         Q.at(3, 2) = D(X031 * F!(-0.074658f) + X033 * F!(0.513280f) + X035 * F!(0.768178f) + X037 * F!(-0.375330f));
909         Q.at(3, 3) = X036;
910         // 40 muls 24 adds
911       }
912     }
913 
914     static struct R_S(int NUM_ROWS, int NUM_COLS) {
915       static void calc(ref Matrix44 R, ref Matrix44 S, const(jpgd_block_t)* pSrc) {
916         //auto AT (int c, int r) nothrow @trusted @nogc { return (c >= NUM_COLS || r >= NUM_ROWS ? 0 : pSrc[c+r*8]); }
917         template AT(int c, int r) {
918           static if (c >= NUM_COLS || r >= NUM_ROWS) enum AT = "0"; else enum AT = "pSrc["~c.stringof~"+"~r.stringof~"*8]";
919         }
920         // 4x8 = 4x8 times 8x8, matrix 0 is constant
921         immutable Temp_Type X100 = D(F!(0.906127f) * mixin(AT!(1, 0)) + F!(-0.318190f) * mixin(AT!(3, 0)) + F!(0.212608f) * mixin(AT!(5, 0)) + F!(-0.180240f) * mixin(AT!(7, 0)));
922         immutable Temp_Type X101 = D(F!(0.906127f) * mixin(AT!(1, 1)) + F!(-0.318190f) * mixin(AT!(3, 1)) + F!(0.212608f) * mixin(AT!(5, 1)) + F!(-0.180240f) * mixin(AT!(7, 1)));
923         immutable Temp_Type X102 = D(F!(0.906127f) * mixin(AT!(1, 2)) + F!(-0.318190f) * mixin(AT!(3, 2)) + F!(0.212608f) * mixin(AT!(5, 2)) + F!(-0.180240f) * mixin(AT!(7, 2)));
924         immutable Temp_Type X103 = D(F!(0.906127f) * mixin(AT!(1, 3)) + F!(-0.318190f) * mixin(AT!(3, 3)) + F!(0.212608f) * mixin(AT!(5, 3)) + F!(-0.180240f) * mixin(AT!(7, 3)));
925         immutable Temp_Type X104 = D(F!(0.906127f) * mixin(AT!(1, 4)) + F!(-0.318190f) * mixin(AT!(3, 4)) + F!(0.212608f) * mixin(AT!(5, 4)) + F!(-0.180240f) * mixin(AT!(7, 4)));
926         immutable Temp_Type X105 = D(F!(0.906127f) * mixin(AT!(1, 5)) + F!(-0.318190f) * mixin(AT!(3, 5)) + F!(0.212608f) * mixin(AT!(5, 5)) + F!(-0.180240f) * mixin(AT!(7, 5)));
927         immutable Temp_Type X106 = D(F!(0.906127f) * mixin(AT!(1, 6)) + F!(-0.318190f) * mixin(AT!(3, 6)) + F!(0.212608f) * mixin(AT!(5, 6)) + F!(-0.180240f) * mixin(AT!(7, 6)));
928         immutable Temp_Type X107 = D(F!(0.906127f) * mixin(AT!(1, 7)) + F!(-0.318190f) * mixin(AT!(3, 7)) + F!(0.212608f) * mixin(AT!(5, 7)) + F!(-0.180240f) * mixin(AT!(7, 7)));
929         immutable Temp_Type X110 = mixin(AT!(2, 0));
930         immutable Temp_Type X111 = mixin(AT!(2, 1));
931         immutable Temp_Type X112 = mixin(AT!(2, 2));
932         immutable Temp_Type X113 = mixin(AT!(2, 3));
933         immutable Temp_Type X114 = mixin(AT!(2, 4));
934         immutable Temp_Type X115 = mixin(AT!(2, 5));
935         immutable Temp_Type X116 = mixin(AT!(2, 6));
936         immutable Temp_Type X117 = mixin(AT!(2, 7));
937         immutable Temp_Type X120 = D(F!(-0.074658f) * mixin(AT!(1, 0)) + F!(0.513280f) * mixin(AT!(3, 0)) + F!(0.768178f) * mixin(AT!(5, 0)) + F!(-0.375330f) * mixin(AT!(7, 0)));
938         immutable Temp_Type X121 = D(F!(-0.074658f) * mixin(AT!(1, 1)) + F!(0.513280f) * mixin(AT!(3, 1)) + F!(0.768178f) * mixin(AT!(5, 1)) + F!(-0.375330f) * mixin(AT!(7, 1)));
939         immutable Temp_Type X122 = D(F!(-0.074658f) * mixin(AT!(1, 2)) + F!(0.513280f) * mixin(AT!(3, 2)) + F!(0.768178f) * mixin(AT!(5, 2)) + F!(-0.375330f) * mixin(AT!(7, 2)));
940         immutable Temp_Type X123 = D(F!(-0.074658f) * mixin(AT!(1, 3)) + F!(0.513280f) * mixin(AT!(3, 3)) + F!(0.768178f) * mixin(AT!(5, 3)) + F!(-0.375330f) * mixin(AT!(7, 3)));
941         immutable Temp_Type X124 = D(F!(-0.074658f) * mixin(AT!(1, 4)) + F!(0.513280f) * mixin(AT!(3, 4)) + F!(0.768178f) * mixin(AT!(5, 4)) + F!(-0.375330f) * mixin(AT!(7, 4)));
942         immutable Temp_Type X125 = D(F!(-0.074658f) * mixin(AT!(1, 5)) + F!(0.513280f) * mixin(AT!(3, 5)) + F!(0.768178f) * mixin(AT!(5, 5)) + F!(-0.375330f) * mixin(AT!(7, 5)));
943         immutable Temp_Type X126 = D(F!(-0.074658f) * mixin(AT!(1, 6)) + F!(0.513280f) * mixin(AT!(3, 6)) + F!(0.768178f) * mixin(AT!(5, 6)) + F!(-0.375330f) * mixin(AT!(7, 6)));
944         immutable Temp_Type X127 = D(F!(-0.074658f) * mixin(AT!(1, 7)) + F!(0.513280f) * mixin(AT!(3, 7)) + F!(0.768178f) * mixin(AT!(5, 7)) + F!(-0.375330f) * mixin(AT!(7, 7)));
945         immutable Temp_Type X130 = mixin(AT!(6, 0));
946         immutable Temp_Type X131 = mixin(AT!(6, 1));
947         immutable Temp_Type X132 = mixin(AT!(6, 2));
948         immutable Temp_Type X133 = mixin(AT!(6, 3));
949         immutable Temp_Type X134 = mixin(AT!(6, 4));
950         immutable Temp_Type X135 = mixin(AT!(6, 5));
951         immutable Temp_Type X136 = mixin(AT!(6, 6));
952         immutable Temp_Type X137 = mixin(AT!(6, 7));
953         // 80 muls 48 adds
954 
955         // 4x4 = 4x8 times 8x4, matrix 1 is constant
956         R.at(0, 0) = X100;
957         R.at(0, 1) = D(X101 * F!(0.415735f) + X103 * F!(0.791065f) + X105 * F!(-0.352443f) + X107 * F!(0.277785f));
958         R.at(0, 2) = X104;
959         R.at(0, 3) = D(X101 * F!(0.022887f) + X103 * F!(-0.097545f) + X105 * F!(0.490393f) + X107 * F!(0.865723f));
960         R.at(1, 0) = X110;
961         R.at(1, 1) = D(X111 * F!(0.415735f) + X113 * F!(0.791065f) + X115 * F!(-0.352443f) + X117 * F!(0.277785f));
962         R.at(1, 2) = X114;
963         R.at(1, 3) = D(X111 * F!(0.022887f) + X113 * F!(-0.097545f) + X115 * F!(0.490393f) + X117 * F!(0.865723f));
964         R.at(2, 0) = X120;
965         R.at(2, 1) = D(X121 * F!(0.415735f) + X123 * F!(0.791065f) + X125 * F!(-0.352443f) + X127 * F!(0.277785f));
966         R.at(2, 2) = X124;
967         R.at(2, 3) = D(X121 * F!(0.022887f) + X123 * F!(-0.097545f) + X125 * F!(0.490393f) + X127 * F!(0.865723f));
968         R.at(3, 0) = X130;
969         R.at(3, 1) = D(X131 * F!(0.415735f) + X133 * F!(0.791065f) + X135 * F!(-0.352443f) + X137 * F!(0.277785f));
970         R.at(3, 2) = X134;
971         R.at(3, 3) = D(X131 * F!(0.022887f) + X133 * F!(-0.097545f) + X135 * F!(0.490393f) + X137 * F!(0.865723f));
972         // 40 muls 24 adds
973         // 4x4 = 4x8 times 8x4, matrix 1 is constant
974         S.at(0, 0) = D(X101 * F!(0.906127f) + X103 * F!(-0.318190f) + X105 * F!(0.212608f) + X107 * F!(-0.180240f));
975         S.at(0, 1) = X102;
976         S.at(0, 2) = D(X101 * F!(-0.074658f) + X103 * F!(0.513280f) + X105 * F!(0.768178f) + X107 * F!(-0.375330f));
977         S.at(0, 3) = X106;
978         S.at(1, 0) = D(X111 * F!(0.906127f) + X113 * F!(-0.318190f) + X115 * F!(0.212608f) + X117 * F!(-0.180240f));
979         S.at(1, 1) = X112;
980         S.at(1, 2) = D(X111 * F!(-0.074658f) + X113 * F!(0.513280f) + X115 * F!(0.768178f) + X117 * F!(-0.375330f));
981         S.at(1, 3) = X116;
982         S.at(2, 0) = D(X121 * F!(0.906127f) + X123 * F!(-0.318190f) + X125 * F!(0.212608f) + X127 * F!(-0.180240f));
983         S.at(2, 1) = X122;
984         S.at(2, 2) = D(X121 * F!(-0.074658f) + X123 * F!(0.513280f) + X125 * F!(0.768178f) + X127 * F!(-0.375330f));
985         S.at(2, 3) = X126;
986         S.at(3, 0) = D(X131 * F!(0.906127f) + X133 * F!(-0.318190f) + X135 * F!(0.212608f) + X137 * F!(-0.180240f));
987         S.at(3, 1) = X132;
988         S.at(3, 2) = D(X131 * F!(-0.074658f) + X133 * F!(0.513280f) + X135 * F!(0.768178f) + X137 * F!(-0.375330f));
989         S.at(3, 3) = X136;
990         // 40 muls 24 adds
991       }
992     }
993   } // end namespace DCT_Upsample
994 
995   // Unconditionally frees all allocated m_blocks.
996   void free_all_blocks () {
997     //m_pStream = null;
998     readfn = null;
999     for (mem_block *b = m_pMem_blocks; b; ) {
1000       mem_block* n = b.m_pNext;
1001       jpgd_free(b);
1002       b = n;
1003     }
1004     m_pMem_blocks = null;
1005   }
1006 
1007   // This method handles all errors. It will never return.
1008   // It could easily be changed to use C++ exceptions.
1009   /*JPGD_NORETURN*/ void stop_decoding (jpgd_status status, size_t line=__LINE__) {
1010     m_error_code = status;
1011     free_all_blocks();
1012     //longjmp(m_jmp_state, status);
1013     throw new Exception("jpeg decoding error", __FILE__, line);
1014   }
1015 
1016   void* alloc (size_t nSize, bool zero=false) {
1017     nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
1018     char *rv = null;
1019     for (mem_block *b = m_pMem_blocks; b; b = b.m_pNext)
1020     {
1021       if ((b.m_used_count + nSize) <= b.m_size)
1022       {
1023         rv = b.m_data.ptr + b.m_used_count;
1024         b.m_used_count += nSize;
1025         break;
1026       }
1027     }
1028     if (!rv)
1029     {
1030       size_t capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
1031       mem_block *b = cast(mem_block*)jpgd_malloc(mem_block.sizeof + capacity);
1032       if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); }
1033       b.m_pNext = m_pMem_blocks; m_pMem_blocks = b;
1034       b.m_used_count = nSize;
1035       b.m_size = capacity;
1036       rv = b.m_data.ptr;
1037     }
1038     if (zero) memset(rv, 0, nSize);
1039     return rv;
1040   }
1041 
1042   void word_clear (void *p, ushort c, uint n) {
1043     ubyte *pD = cast(ubyte*)p;
1044     immutable ubyte l = c & 0xFF, h = (c >> 8) & 0xFF;
1045     while (n)
1046     {
1047       pD[0] = l; pD[1] = h; pD += 2;
1048       n--;
1049     }
1050   }
1051 
1052   // Refill the input buffer.
1053   // This method will sit in a loop until (A) the buffer is full or (B)
1054   // the stream's read() method reports and end of file condition.
1055   void prep_in_buffer () {
1056     m_in_buf_left = 0;
1057     m_pIn_buf_ofs = m_in_buf.ptr;
1058 
1059     if (m_eof_flag)
1060       return;
1061 
1062     do
1063     {
1064       int bytes_read = readfn(m_in_buf.ptr + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
1065       if (bytes_read == -1)
1066         stop_decoding(JPGD_STREAM_READ);
1067 
1068       m_in_buf_left += bytes_read;
1069     } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
1070 
1071     m_total_bytes_read += m_in_buf_left;
1072 
1073     // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
1074     // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
1075     word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
1076   }
1077 
1078   // Read a Huffman code table.
1079   void read_dht_marker () {
1080     int i, index, count;
1081     ubyte[17] huff_num;
1082     ubyte[256] huff_val;
1083 
1084     uint num_left = get_bits(16);
1085 
1086     if (num_left < 2)
1087       stop_decoding(JPGD_BAD_DHT_MARKER);
1088 
1089     num_left -= 2;
1090 
1091     while (num_left)
1092     {
1093       index = get_bits(8);
1094 
1095       huff_num.ptr[0] = 0;
1096 
1097       count = 0;
1098 
1099       for (i = 1; i <= 16; i++)
1100       {
1101         huff_num.ptr[i] = cast(ubyte)(get_bits(8));
1102         count += huff_num.ptr[i];
1103       }
1104 
1105       if (count > 255)
1106         stop_decoding(JPGD_BAD_DHT_COUNTS);
1107 
1108       for (i = 0; i < count; i++)
1109         huff_val.ptr[i] = cast(ubyte)(get_bits(8));
1110 
1111       i = 1 + 16 + count;
1112 
1113       if (num_left < cast(uint)i)
1114         stop_decoding(JPGD_BAD_DHT_MARKER);
1115 
1116       num_left -= i;
1117 
1118       if ((index & 0x10) > 0x10)
1119         stop_decoding(JPGD_BAD_DHT_INDEX);
1120 
1121       index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
1122 
1123       if (index >= JPGD_MAX_HUFF_TABLES)
1124         stop_decoding(JPGD_BAD_DHT_INDEX);
1125 
1126       if (!m_huff_num.ptr[index])
1127         m_huff_num.ptr[index] = cast(ubyte*)alloc(17);
1128 
1129       if (!m_huff_val.ptr[index])
1130         m_huff_val.ptr[index] = cast(ubyte*)alloc(256);
1131 
1132       m_huff_ac.ptr[index] = (index & 0x10) != 0;
1133       memcpy(m_huff_num.ptr[index], huff_num.ptr, 17);
1134       memcpy(m_huff_val.ptr[index], huff_val.ptr, 256);
1135     }
1136   }
1137 
1138   // Read a quantization table.
1139   void read_dqt_marker () {
1140     int n, i, prec;
1141     uint num_left;
1142     uint temp;
1143 
1144     num_left = get_bits(16);
1145 
1146     if (num_left < 2)
1147       stop_decoding(JPGD_BAD_DQT_MARKER);
1148 
1149     num_left -= 2;
1150 
1151     while (num_left)
1152     {
1153       n = get_bits(8);
1154       prec = n >> 4;
1155       n &= 0x0F;
1156 
1157       if (n >= JPGD_MAX_QUANT_TABLES)
1158         stop_decoding(JPGD_BAD_DQT_TABLE);
1159 
1160       if (!m_quant.ptr[n])
1161         m_quant.ptr[n] = cast(jpgd_quant_t*)alloc(64 * jpgd_quant_t.sizeof);
1162 
1163       // read quantization entries, in zag order
1164       for (i = 0; i < 64; i++)
1165       {
1166         temp = get_bits(8);
1167 
1168         if (prec)
1169           temp = (temp << 8) + get_bits(8);
1170 
1171         m_quant.ptr[n][i] = cast(jpgd_quant_t)(temp);
1172       }
1173 
1174       i = 64 + 1;
1175 
1176       if (prec)
1177         i += 64;
1178 
1179       if (num_left < cast(uint)i)
1180         stop_decoding(JPGD_BAD_DQT_LENGTH);
1181 
1182       num_left -= i;
1183     }
1184   }
1185 
1186   // Read the start of frame (SOF) marker.
1187   void read_sof_marker () {
1188     int i;
1189     uint num_left;
1190 
1191     num_left = get_bits(16);
1192 
1193     if (get_bits(8) != 8)   /* precision: sorry, only 8-bit precision is supported right now */
1194       stop_decoding(JPGD_BAD_PRECISION);
1195 
1196     m_image_y_size = get_bits(16);
1197 
1198     if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT))
1199       stop_decoding(JPGD_BAD_HEIGHT);
1200 
1201     m_image_x_size = get_bits(16);
1202 
1203     if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH))
1204       stop_decoding(JPGD_BAD_WIDTH);
1205 
1206     m_comps_in_frame = get_bits(8);
1207 
1208     if (m_comps_in_frame > JPGD_MAX_COMPONENTS)
1209       stop_decoding(JPGD_TOO_MANY_COMPONENTS);
1210 
1211     if (num_left != cast(uint)(m_comps_in_frame * 3 + 8))
1212       stop_decoding(JPGD_BAD_SOF_LENGTH);
1213 
1214     for (i = 0; i < m_comps_in_frame; i++)
1215     {
1216       m_comp_ident.ptr[i]  = get_bits(8);
1217       m_comp_h_samp.ptr[i] = get_bits(4);
1218       m_comp_v_samp.ptr[i] = get_bits(4);
1219       m_comp_quant.ptr[i]  = get_bits(8);
1220     }
1221   }
1222 
1223   // Used to skip unrecognized markers.
1224   void skip_variable_marker () {
1225     uint num_left;
1226 
1227     num_left = get_bits(16);
1228 
1229     if (num_left < 2)
1230       stop_decoding(JPGD_BAD_VARIABLE_MARKER);
1231 
1232     num_left -= 2;
1233 
1234     while (num_left)
1235     {
1236       get_bits(8);
1237       num_left--;
1238     }
1239   }
1240 
1241   // Read a define restart interval (DRI) marker.
1242   void read_dri_marker () {
1243     if (get_bits(16) != 4)
1244       stop_decoding(JPGD_BAD_DRI_LENGTH);
1245 
1246     m_restart_interval = get_bits(16);
1247   }
1248 
1249   // Read a start of scan (SOS) marker.
1250   void read_sos_marker () {
1251     uint num_left;
1252     int i, ci, n, c, cc;
1253 
1254     num_left = get_bits(16);
1255 
1256     n = get_bits(8);
1257 
1258     m_comps_in_scan = n;
1259 
1260     num_left -= 3;
1261 
1262     if ( (num_left != cast(uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) )
1263       stop_decoding(JPGD_BAD_SOS_LENGTH);
1264 
1265     for (i = 0; i < n; i++)
1266     {
1267       cc = get_bits(8);
1268       c = get_bits(8);
1269       num_left -= 2;
1270 
1271       for (ci = 0; ci < m_comps_in_frame; ci++)
1272         if (cc == m_comp_ident.ptr[ci])
1273           break;
1274 
1275       if (ci >= m_comps_in_frame)
1276         stop_decoding(JPGD_BAD_SOS_COMP_ID);
1277 
1278       m_comp_list.ptr[i]    = ci;
1279       m_comp_dc_tab.ptr[ci] = (c >> 4) & 15;
1280       m_comp_ac_tab.ptr[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
1281     }
1282 
1283     m_spectral_start  = get_bits(8);
1284     m_spectral_end    = get_bits(8);
1285     m_successive_high = get_bits(4);
1286     m_successive_low  = get_bits(4);
1287 
1288     if (!m_progressive_flag)
1289     {
1290       m_spectral_start = 0;
1291       m_spectral_end = 63;
1292     }
1293 
1294     num_left -= 3;
1295 
1296     /* read past whatever is num_left */
1297     while (num_left)
1298     {
1299       get_bits(8);
1300       num_left--;
1301     }
1302   }
1303 
1304   // Finds the next marker.
1305   int next_marker () {
1306     uint c, bytes;
1307 
1308     bytes = 0;
1309 
1310     do
1311     {
1312       do
1313       {
1314         bytes++;
1315         c = get_bits(8);
1316       } while (c != 0xFF);
1317 
1318       do
1319       {
1320         c = get_bits(8);
1321       } while (c == 0xFF);
1322 
1323     } while (c == 0);
1324 
1325     // If bytes > 0 here, there where extra bytes before the marker (not good).
1326 
1327     return c;
1328   }
1329 
1330   // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
1331   // encountered.
1332   int process_markers () {
1333     int c;
1334 
1335     for ( ; ; ) {
1336       c = next_marker();
1337 
1338       switch (c)
1339       {
1340         case M_SOF0:
1341         case M_SOF1:
1342         case M_SOF2:
1343         case M_SOF3:
1344         case M_SOF5:
1345         case M_SOF6:
1346         case M_SOF7:
1347         //case M_JPG:
1348         case M_SOF9:
1349         case M_SOF10:
1350         case M_SOF11:
1351         case M_SOF13:
1352         case M_SOF14:
1353         case M_SOF15:
1354         case M_SOI:
1355         case M_EOI:
1356         case M_SOS:
1357           return c;
1358         case M_DHT:
1359           read_dht_marker();
1360           break;
1361         // No arithmitic support - dumb patents!
1362         case M_DAC:
1363           stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1364           break;
1365         case M_DQT:
1366           read_dqt_marker();
1367           break;
1368         case M_DRI:
1369           read_dri_marker();
1370           break;
1371         //case M_APP0:  /* no need to read the JFIF marker */
1372 
1373         case M_JPG:
1374         case M_RST0:    /* no parameters */
1375         case M_RST1:
1376         case M_RST2:
1377         case M_RST3:
1378         case M_RST4:
1379         case M_RST5:
1380         case M_RST6:
1381         case M_RST7:
1382         case M_TEM:
1383           stop_decoding(JPGD_UNEXPECTED_MARKER);
1384           break;
1385         default:    /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
1386           skip_variable_marker();
1387           break;
1388       }
1389     }
1390   }
1391 
1392   // Finds the start of image (SOI) marker.
1393   // This code is rather defensive: it only checks the first 512 bytes to avoid
1394   // false positives.
1395   void locate_soi_marker () {
1396     uint lastchar, thischar;
1397     uint bytesleft;
1398 
1399     lastchar = get_bits(8);
1400 
1401     thischar = get_bits(8);
1402 
1403     /* ok if it's a normal JPEG file without a special header */
1404 
1405     if ((lastchar == 0xFF) && (thischar == M_SOI))
1406       return;
1407 
1408     bytesleft = 4096; //512;
1409 
1410     for ( ; ; )
1411     {
1412       if (--bytesleft == 0)
1413         stop_decoding(JPGD_NOT_JPEG);
1414 
1415       lastchar = thischar;
1416 
1417       thischar = get_bits(8);
1418 
1419       if (lastchar == 0xFF)
1420       {
1421         if (thischar == M_SOI)
1422           break;
1423         else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end
1424           stop_decoding(JPGD_NOT_JPEG);
1425       }
1426     }
1427 
1428     // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
1429     thischar = (m_bit_buf >> 24) & 0xFF;
1430 
1431     if (thischar != 0xFF)
1432       stop_decoding(JPGD_NOT_JPEG);
1433   }
1434 
1435   // Find a start of frame (SOF) marker.
1436   void locate_sof_marker () {
1437     locate_soi_marker();
1438 
1439     int c = process_markers();
1440 
1441     switch (c)
1442     {
1443       case M_SOF2:
1444         m_progressive_flag = true;
1445         goto case;
1446       case M_SOF0:  /* baseline DCT */
1447       case M_SOF1:  /* extended sequential DCT */
1448         read_sof_marker();
1449         break;
1450       case M_SOF9:  /* Arithmitic coding */
1451         stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT);
1452         break;
1453       default:
1454         stop_decoding(JPGD_UNSUPPORTED_MARKER);
1455         break;
1456     }
1457   }
1458 
1459   // Find a start of scan (SOS) marker.
1460   int locate_sos_marker () {
1461     int c;
1462 
1463     c = process_markers();
1464 
1465     if (c == M_EOI)
1466       return false;
1467     else if (c != M_SOS)
1468       stop_decoding(JPGD_UNEXPECTED_MARKER);
1469 
1470     read_sos_marker();
1471 
1472     return true;
1473   }
1474 
1475   // Reset everything to default/uninitialized state.
1476   void initit (JpegStreamReadFunc rfn) {
1477     m_pMem_blocks = null;
1478     m_error_code = JPGD_SUCCESS;
1479     m_ready_flag = false;
1480     m_image_x_size = m_image_y_size = 0;
1481     readfn = rfn;
1482     m_progressive_flag = false;
1483 
1484     memset(m_huff_ac.ptr, 0, m_huff_ac.sizeof);
1485     memset(m_huff_num.ptr, 0, m_huff_num.sizeof);
1486     memset(m_huff_val.ptr, 0, m_huff_val.sizeof);
1487     memset(m_quant.ptr, 0, m_quant.sizeof);
1488 
1489     m_scan_type = 0;
1490     m_comps_in_frame = 0;
1491 
1492     memset(m_comp_h_samp.ptr, 0, m_comp_h_samp.sizeof);
1493     memset(m_comp_v_samp.ptr, 0, m_comp_v_samp.sizeof);
1494     memset(m_comp_quant.ptr, 0, m_comp_quant.sizeof);
1495     memset(m_comp_ident.ptr, 0, m_comp_ident.sizeof);
1496     memset(m_comp_h_blocks.ptr, 0, m_comp_h_blocks.sizeof);
1497     memset(m_comp_v_blocks.ptr, 0, m_comp_v_blocks.sizeof);
1498 
1499     m_comps_in_scan = 0;
1500     memset(m_comp_list.ptr, 0, m_comp_list.sizeof);
1501     memset(m_comp_dc_tab.ptr, 0, m_comp_dc_tab.sizeof);
1502     memset(m_comp_ac_tab.ptr, 0, m_comp_ac_tab.sizeof);
1503 
1504     m_spectral_start = 0;
1505     m_spectral_end = 0;
1506     m_successive_low = 0;
1507     m_successive_high = 0;
1508     m_max_mcu_x_size = 0;
1509     m_max_mcu_y_size = 0;
1510     m_blocks_per_mcu = 0;
1511     m_max_blocks_per_row = 0;
1512     m_mcus_per_row = 0;
1513     m_mcus_per_col = 0;
1514     m_expanded_blocks_per_component = 0;
1515     m_expanded_blocks_per_mcu = 0;
1516     m_expanded_blocks_per_row = 0;
1517     m_freq_domain_chroma_upsample = false;
1518 
1519     memset(m_mcu_org.ptr, 0, m_mcu_org.sizeof);
1520 
1521     m_total_lines_left = 0;
1522     m_mcu_lines_left = 0;
1523     m_real_dest_bytes_per_scan_line = 0;
1524     m_dest_bytes_per_scan_line = 0;
1525     m_dest_bytes_per_pixel = 0;
1526 
1527     memset(m_pHuff_tabs.ptr, 0, m_pHuff_tabs.sizeof);
1528 
1529     memset(m_dc_coeffs.ptr, 0, m_dc_coeffs.sizeof);
1530     memset(m_ac_coeffs.ptr, 0, m_ac_coeffs.sizeof);
1531     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1532 
1533     m_eob_run = 0;
1534 
1535     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
1536 
1537     m_pIn_buf_ofs = m_in_buf.ptr;
1538     m_in_buf_left = 0;
1539     m_eof_flag = false;
1540     m_tem_flag = 0;
1541 
1542     memset(m_in_buf_pad_start.ptr, 0, m_in_buf_pad_start.sizeof);
1543     memset(m_in_buf.ptr, 0, m_in_buf.sizeof);
1544     memset(m_in_buf_pad_end.ptr, 0, m_in_buf_pad_end.sizeof);
1545 
1546     m_restart_interval = 0;
1547     m_restarts_left    = 0;
1548     m_next_restart_num = 0;
1549 
1550     m_max_mcus_per_row = 0;
1551     m_max_blocks_per_mcu = 0;
1552     m_max_mcus_per_col = 0;
1553 
1554     memset(m_last_dc_val.ptr, 0, m_last_dc_val.sizeof);
1555     m_pMCU_coefficients = null;
1556     m_pSample_buf = null;
1557 
1558     m_total_bytes_read = 0;
1559 
1560     m_pScan_line_0 = null;
1561     m_pScan_line_1 = null;
1562 
1563     // Ready the input buffer.
1564     prep_in_buffer();
1565 
1566     // Prime the bit buffer.
1567     m_bits_left = 16;
1568     m_bit_buf = 0;
1569 
1570     get_bits(16);
1571     get_bits(16);
1572 
1573     for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
1574       m_mcu_block_max_zag.ptr[i] = 64;
1575   }
1576 
1577   enum SCALEBITS = 16;
1578   enum ONE_HALF = (cast(int) 1 << (SCALEBITS-1));
1579   enum FIX(float x) = (cast(int)((x) * (1L<<SCALEBITS) + 0.5f));
1580 
1581   // Create a few tables that allow us to quickly convert YCbCr to RGB.
1582   void create_look_ups () {
1583     for (int i = 0; i <= 255; i++)
1584     {
1585       int k = i - 128;
1586       m_crr.ptr[i] = ( FIX!(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
1587       m_cbb.ptr[i] = ( FIX!(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
1588       m_crg.ptr[i] = (-FIX!(0.71414f)) * k;
1589       m_cbg.ptr[i] = (-FIX!(0.34414f)) * k + ONE_HALF;
1590     }
1591   }
1592 
1593   // This method throws back into the stream any bytes that where read
1594   // into the bit buffer during initial marker scanning.
1595   void fix_in_buffer () {
1596     // In case any 0xFF's where pulled into the buffer during marker scanning.
1597     assert((m_bits_left & 7) == 0);
1598 
1599     if (m_bits_left == 16)
1600       stuff_char(cast(ubyte)(m_bit_buf & 0xFF));
1601 
1602     if (m_bits_left >= 8)
1603       stuff_char(cast(ubyte)((m_bit_buf >> 8) & 0xFF));
1604 
1605     stuff_char(cast(ubyte)((m_bit_buf >> 16) & 0xFF));
1606     stuff_char(cast(ubyte)((m_bit_buf >> 24) & 0xFF));
1607 
1608     m_bits_left = 16;
1609     get_bits_no_markers(16);
1610     get_bits_no_markers(16);
1611   }
1612 
1613   void transform_mcu (int mcu_row) {
1614     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1615     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
1616 
1617     for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1618     {
1619       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1620       pSrc_ptr += 64;
1621       pDst_ptr += 64;
1622     }
1623   }
1624 
1625   static immutable ubyte[64] s_max_rc = [
1626     17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
1627     102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
1628     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
1629     136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
1630   ];
1631 
1632   void transform_mcu_expand (int mcu_row) {
1633     jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
1634     ubyte* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
1635 
1636     // Y IDCT
1637     int mcu_block;
1638     for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++)
1639     {
1640       idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag.ptr[mcu_block]);
1641       pSrc_ptr += 64;
1642       pDst_ptr += 64;
1643     }
1644 
1645     // Chroma IDCT, with upsampling
1646     jpgd_block_t[64] temp_block;
1647 
1648     for (int i = 0; i < 2; i++)
1649     {
1650       DCT_Upsample.Matrix44 P, Q, R, S;
1651 
1652       assert(m_mcu_block_max_zag.ptr[mcu_block] >= 1);
1653       assert(m_mcu_block_max_zag.ptr[mcu_block] <= 64);
1654 
1655       int max_zag = m_mcu_block_max_zag.ptr[mcu_block++] - 1;
1656       if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
1657       switch (s_max_rc.ptr[max_zag])
1658       {
1659       case 1*16+1:
1660         DCT_Upsample.P_Q!(1, 1).calc(P, Q, pSrc_ptr);
1661         DCT_Upsample.R_S!(1, 1).calc(R, S, pSrc_ptr);
1662         break;
1663       case 1*16+2:
1664         DCT_Upsample.P_Q!(1, 2).calc(P, Q, pSrc_ptr);
1665         DCT_Upsample.R_S!(1, 2).calc(R, S, pSrc_ptr);
1666         break;
1667       case 2*16+2:
1668         DCT_Upsample.P_Q!(2, 2).calc(P, Q, pSrc_ptr);
1669         DCT_Upsample.R_S!(2, 2).calc(R, S, pSrc_ptr);
1670         break;
1671       case 3*16+2:
1672         DCT_Upsample.P_Q!(3, 2).calc(P, Q, pSrc_ptr);
1673         DCT_Upsample.R_S!(3, 2).calc(R, S, pSrc_ptr);
1674         break;
1675       case 3*16+3:
1676         DCT_Upsample.P_Q!(3, 3).calc(P, Q, pSrc_ptr);
1677         DCT_Upsample.R_S!(3, 3).calc(R, S, pSrc_ptr);
1678         break;
1679       case 3*16+4:
1680         DCT_Upsample.P_Q!(3, 4).calc(P, Q, pSrc_ptr);
1681         DCT_Upsample.R_S!(3, 4).calc(R, S, pSrc_ptr);
1682         break;
1683       case 4*16+4:
1684         DCT_Upsample.P_Q!(4, 4).calc(P, Q, pSrc_ptr);
1685         DCT_Upsample.R_S!(4, 4).calc(R, S, pSrc_ptr);
1686         break;
1687       case 5*16+4:
1688         DCT_Upsample.P_Q!(5, 4).calc(P, Q, pSrc_ptr);
1689         DCT_Upsample.R_S!(5, 4).calc(R, S, pSrc_ptr);
1690         break;
1691       case 5*16+5:
1692         DCT_Upsample.P_Q!(5, 5).calc(P, Q, pSrc_ptr);
1693         DCT_Upsample.R_S!(5, 5).calc(R, S, pSrc_ptr);
1694         break;
1695       case 5*16+6:
1696         DCT_Upsample.P_Q!(5, 6).calc(P, Q, pSrc_ptr);
1697         DCT_Upsample.R_S!(5, 6).calc(R, S, pSrc_ptr);
1698         break;
1699       case 6*16+6:
1700         DCT_Upsample.P_Q!(6, 6).calc(P, Q, pSrc_ptr);
1701         DCT_Upsample.R_S!(6, 6).calc(R, S, pSrc_ptr);
1702         break;
1703       case 7*16+6:
1704         DCT_Upsample.P_Q!(7, 6).calc(P, Q, pSrc_ptr);
1705         DCT_Upsample.R_S!(7, 6).calc(R, S, pSrc_ptr);
1706         break;
1707       case 7*16+7:
1708         DCT_Upsample.P_Q!(7, 7).calc(P, Q, pSrc_ptr);
1709         DCT_Upsample.R_S!(7, 7).calc(R, S, pSrc_ptr);
1710         break;
1711       case 7*16+8:
1712         DCT_Upsample.P_Q!(7, 8).calc(P, Q, pSrc_ptr);
1713         DCT_Upsample.R_S!(7, 8).calc(R, S, pSrc_ptr);
1714         break;
1715       case 8*16+8:
1716         DCT_Upsample.P_Q!(8, 8).calc(P, Q, pSrc_ptr);
1717         DCT_Upsample.R_S!(8, 8).calc(R, S, pSrc_ptr);
1718         break;
1719       default:
1720         assert(false);
1721       }
1722 
1723       auto a = DCT_Upsample.Matrix44(P + Q);
1724       P -= Q;
1725       DCT_Upsample.Matrix44* b = &P;
1726       auto c = DCT_Upsample.Matrix44(R + S);
1727       R -= S;
1728       DCT_Upsample.Matrix44* d = &R;
1729 
1730       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, a, c);
1731       idct_4x4(temp_block.ptr, pDst_ptr);
1732       pDst_ptr += 64;
1733 
1734       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, a, c);
1735       idct_4x4(temp_block.ptr, pDst_ptr);
1736       pDst_ptr += 64;
1737 
1738       DCT_Upsample.Matrix44.add_and_store(temp_block.ptr, *b, *d);
1739       idct_4x4(temp_block.ptr, pDst_ptr);
1740       pDst_ptr += 64;
1741 
1742       DCT_Upsample.Matrix44.sub_and_store(temp_block.ptr, *b, *d);
1743       idct_4x4(temp_block.ptr, pDst_ptr);
1744       pDst_ptr += 64;
1745 
1746       pSrc_ptr += 64;
1747     }
1748   }
1749 
1750   // Loads and dequantizes the next row of (already decoded) coefficients.
1751   // Progressive images only.
1752   void load_next_row () {
1753     int i;
1754     jpgd_block_t *p;
1755     jpgd_quant_t *q;
1756     int mcu_row, mcu_block, row_block = 0;
1757     int component_num, component_id;
1758     int[JPGD_MAX_COMPONENTS] block_x_mcu;
1759 
1760     memset(block_x_mcu.ptr, 0, JPGD_MAX_COMPONENTS * int.sizeof);
1761 
1762     for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1763     {
1764       int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
1765 
1766       for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
1767       {
1768         component_id = m_mcu_org.ptr[mcu_block];
1769         q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1770 
1771         p = m_pMCU_coefficients + 64 * mcu_block;
1772 
1773         jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1774         jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs.ptr[component_id], block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
1775         p[0] = pDC[0];
1776         memcpy(&p[1], &pAC[1], 63 * jpgd_block_t.sizeof);
1777 
1778         for (i = 63; i > 0; i--)
1779           if (p[g_ZAG[i]])
1780             break;
1781 
1782         m_mcu_block_max_zag.ptr[mcu_block] = i + 1;
1783 
1784         for ( ; i >= 0; i--)
1785           if (p[g_ZAG[i]])
1786             p[g_ZAG[i]] = cast(jpgd_block_t)(p[g_ZAG[i]] * q[i]);
1787 
1788         row_block++;
1789 
1790         if (m_comps_in_scan == 1)
1791           block_x_mcu.ptr[component_id]++;
1792         else
1793         {
1794           if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
1795           {
1796             block_x_mcu_ofs = 0;
1797 
1798             if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
1799             {
1800               block_y_mcu_ofs = 0;
1801 
1802               block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
1803             }
1804           }
1805         }
1806       }
1807 
1808       if (m_freq_domain_chroma_upsample)
1809         transform_mcu_expand(mcu_row);
1810       else
1811         transform_mcu(mcu_row);
1812     }
1813 
1814     if (m_comps_in_scan == 1)
1815       m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
1816     else
1817     {
1818       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
1819       {
1820         component_id = m_comp_list.ptr[component_num];
1821 
1822         m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
1823       }
1824     }
1825   }
1826 
1827   // Restart interval processing.
1828   void process_restart () {
1829     int i;
1830     int c = 0;
1831 
1832     // Align to a byte boundry
1833     // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
1834     //get_bits_no_markers(m_bits_left & 7);
1835 
1836     // Let's scan a little bit to find the marker, but not _too_ far.
1837     // 1536 is a "fudge factor" that determines how much to scan.
1838     for (i = 1536; i > 0; i--)
1839       if (get_char() == 0xFF)
1840         break;
1841 
1842     if (i == 0)
1843       stop_decoding(JPGD_BAD_RESTART_MARKER);
1844 
1845     for ( ; i > 0; i--)
1846       if ((c = get_char()) != 0xFF)
1847         break;
1848 
1849     if (i == 0)
1850       stop_decoding(JPGD_BAD_RESTART_MARKER);
1851 
1852     // Is it the expected marker? If not, something bad happened.
1853     if (c != (m_next_restart_num + M_RST0))
1854       stop_decoding(JPGD_BAD_RESTART_MARKER);
1855 
1856     // Reset each component's DC prediction values.
1857     memset(&m_last_dc_val, 0, m_comps_in_frame * uint.sizeof);
1858 
1859     m_eob_run = 0;
1860 
1861     m_restarts_left = m_restart_interval;
1862 
1863     m_next_restart_num = (m_next_restart_num + 1) & 7;
1864 
1865     // Get the bit buffer going again...
1866 
1867     m_bits_left = 16;
1868     get_bits_no_markers(16);
1869     get_bits_no_markers(16);
1870   }
1871 
1872   static int dequantize_ac (int c, int q) { pragma(inline, true); c *= q; return c; }
1873 
1874   // Decodes and dequantizes the next row of coefficients.
1875   void decode_next_row () {
1876     int row_block = 0;
1877 
1878     for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
1879     {
1880       if ((m_restart_interval) && (m_restarts_left == 0))
1881         process_restart();
1882 
1883       jpgd_block_t* p = m_pMCU_coefficients;
1884       for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
1885       {
1886         int component_id = m_mcu_org.ptr[mcu_block];
1887         jpgd_quant_t* q = m_quant.ptr[m_comp_quant.ptr[component_id]];
1888 
1889         int r, s;
1890         s = huff_decode(m_pHuff_tabs.ptr[m_comp_dc_tab.ptr[component_id]], r);
1891         s = JPGD_HUFF_EXTEND(r, s);
1892 
1893         m_last_dc_val.ptr[component_id] = (s += m_last_dc_val.ptr[component_id]);
1894 
1895         p[0] = cast(jpgd_block_t)(s * q[0]);
1896 
1897         int prev_num_set = m_mcu_block_max_zag.ptr[mcu_block];
1898 
1899         huff_tables *pH = m_pHuff_tabs.ptr[m_comp_ac_tab.ptr[component_id]];
1900 
1901         int k;
1902         for (k = 1; k < 64; k++)
1903         {
1904           int extra_bits;
1905           s = huff_decode(pH, extra_bits);
1906 
1907           r = s >> 4;
1908           s &= 15;
1909 
1910           if (s)
1911           {
1912             if (r)
1913             {
1914               if ((k + r) > 63)
1915                 stop_decoding(JPGD_DECODE_ERROR);
1916 
1917               if (k < prev_num_set)
1918               {
1919                 int n = JPGD_MIN(r, prev_num_set - k);
1920                 int kt = k;
1921                 while (n--)
1922                   p[g_ZAG[kt++]] = 0;
1923               }
1924 
1925               k += r;
1926             }
1927 
1928             s = JPGD_HUFF_EXTEND(extra_bits, s);
1929 
1930             assert(k < 64);
1931 
1932             p[g_ZAG[k]] = cast(jpgd_block_t)(dequantize_ac(s, q[k])); //s * q[k];
1933           }
1934           else
1935           {
1936             if (r == 15)
1937             {
1938               if ((k + 16) > 64)
1939                 stop_decoding(JPGD_DECODE_ERROR);
1940 
1941               if (k < prev_num_set)
1942               {
1943                 int n = JPGD_MIN(16, prev_num_set - k);
1944                 int kt = k;
1945                 while (n--)
1946                 {
1947                   assert(kt <= 63);
1948                   p[g_ZAG[kt++]] = 0;
1949                 }
1950               }
1951 
1952               k += 16 - 1; // - 1 because the loop counter is k
1953               assert(p[g_ZAG[k]] == 0);
1954             }
1955             else
1956               break;
1957           }
1958         }
1959 
1960         if (k < prev_num_set)
1961         {
1962           int kt = k;
1963           while (kt < prev_num_set)
1964             p[g_ZAG[kt++]] = 0;
1965         }
1966 
1967         m_mcu_block_max_zag.ptr[mcu_block] = k;
1968 
1969         row_block++;
1970       }
1971 
1972       if (m_freq_domain_chroma_upsample)
1973         transform_mcu_expand(mcu_row);
1974       else
1975         transform_mcu(mcu_row);
1976 
1977       m_restarts_left--;
1978     }
1979   }
1980 
1981   // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
1982   void H1V1Convert () {
1983     int row = m_max_mcu_y_size - m_mcu_lines_left;
1984     ubyte *d = m_pScan_line_0;
1985     ubyte *s = m_pSample_buf + row * 8;
1986 
1987     for (int i = m_max_mcus_per_row; i > 0; i--)
1988     {
1989       for (int j = 0; j < 8; j++)
1990       {
1991         int y = s[j];
1992         int cb = s[64+j];
1993         int cr = s[128+j];
1994 
1995         d[0] = clamp(y + m_crr.ptr[cr]);
1996         d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
1997         d[2] = clamp(y + m_cbb.ptr[cb]);
1998         d[3] = 255;
1999 
2000         d += 4;
2001       }
2002 
2003       s += 64*3;
2004     }
2005   }
2006 
2007   // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
2008   void H2V1Convert () {
2009     int row = m_max_mcu_y_size - m_mcu_lines_left;
2010     ubyte *d0 = m_pScan_line_0;
2011     ubyte *y = m_pSample_buf + row * 8;
2012     ubyte *c = m_pSample_buf + 2*64 + row * 8;
2013 
2014     for (int i = m_max_mcus_per_row; i > 0; i--)
2015     {
2016       for (int l = 0; l < 2; l++)
2017       {
2018         for (int j = 0; j < 4; j++)
2019         {
2020           int cb = c[0];
2021           int cr = c[64];
2022 
2023           int rc = m_crr.ptr[cr];
2024           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2025           int bc = m_cbb.ptr[cb];
2026 
2027           int yy = y[j<<1];
2028           d0[0] = clamp(yy+rc);
2029           d0[1] = clamp(yy+gc);
2030           d0[2] = clamp(yy+bc);
2031           d0[3] = 255;
2032 
2033           yy = y[(j<<1)+1];
2034           d0[4] = clamp(yy+rc);
2035           d0[5] = clamp(yy+gc);
2036           d0[6] = clamp(yy+bc);
2037           d0[7] = 255;
2038 
2039           d0 += 8;
2040 
2041           c++;
2042         }
2043         y += 64;
2044       }
2045 
2046       y += 64*4 - 64*2;
2047       c += 64*4 - 8;
2048     }
2049   }
2050 
2051   // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
2052   void H1V2Convert () {
2053     int row = m_max_mcu_y_size - m_mcu_lines_left;
2054     ubyte *d0 = m_pScan_line_0;
2055     ubyte *d1 = m_pScan_line_1;
2056     ubyte *y;
2057     ubyte *c;
2058 
2059     if (row < 8)
2060       y = m_pSample_buf + row * 8;
2061     else
2062       y = m_pSample_buf + 64*1 + (row & 7) * 8;
2063 
2064     c = m_pSample_buf + 64*2 + (row >> 1) * 8;
2065 
2066     for (int i = m_max_mcus_per_row; i > 0; i--)
2067     {
2068       for (int j = 0; j < 8; j++)
2069       {
2070         int cb = c[0+j];
2071         int cr = c[64+j];
2072 
2073         int rc = m_crr.ptr[cr];
2074         int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2075         int bc = m_cbb.ptr[cb];
2076 
2077         int yy = y[j];
2078         d0[0] = clamp(yy+rc);
2079         d0[1] = clamp(yy+gc);
2080         d0[2] = clamp(yy+bc);
2081         d0[3] = 255;
2082 
2083         yy = y[8+j];
2084         d1[0] = clamp(yy+rc);
2085         d1[1] = clamp(yy+gc);
2086         d1[2] = clamp(yy+bc);
2087         d1[3] = 255;
2088 
2089         d0 += 4;
2090         d1 += 4;
2091       }
2092 
2093       y += 64*4;
2094       c += 64*4;
2095     }
2096   }
2097 
2098   // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
2099   void H2V2Convert () {
2100     int row = m_max_mcu_y_size - m_mcu_lines_left;
2101     ubyte *d0 = m_pScan_line_0;
2102     ubyte *d1 = m_pScan_line_1;
2103     ubyte *y;
2104     ubyte *c;
2105 
2106     if (row < 8)
2107       y = m_pSample_buf + row * 8;
2108     else
2109       y = m_pSample_buf + 64*2 + (row & 7) * 8;
2110 
2111     c = m_pSample_buf + 64*4 + (row >> 1) * 8;
2112 
2113     for (int i = m_max_mcus_per_row; i > 0; i--)
2114     {
2115       for (int l = 0; l < 2; l++)
2116       {
2117         for (int j = 0; j < 8; j += 2)
2118         {
2119           int cb = c[0];
2120           int cr = c[64];
2121 
2122           int rc = m_crr.ptr[cr];
2123           int gc = ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16);
2124           int bc = m_cbb.ptr[cb];
2125 
2126           int yy = y[j];
2127           d0[0] = clamp(yy+rc);
2128           d0[1] = clamp(yy+gc);
2129           d0[2] = clamp(yy+bc);
2130           d0[3] = 255;
2131 
2132           yy = y[j+1];
2133           d0[4] = clamp(yy+rc);
2134           d0[5] = clamp(yy+gc);
2135           d0[6] = clamp(yy+bc);
2136           d0[7] = 255;
2137 
2138           yy = y[j+8];
2139           d1[0] = clamp(yy+rc);
2140           d1[1] = clamp(yy+gc);
2141           d1[2] = clamp(yy+bc);
2142           d1[3] = 255;
2143 
2144           yy = y[j+8+1];
2145           d1[4] = clamp(yy+rc);
2146           d1[5] = clamp(yy+gc);
2147           d1[6] = clamp(yy+bc);
2148           d1[7] = 255;
2149 
2150           d0 += 8;
2151           d1 += 8;
2152 
2153           c++;
2154         }
2155         y += 64;
2156       }
2157 
2158       y += 64*6 - 64*2;
2159       c += 64*6 - 8;
2160     }
2161   }
2162 
2163   // Y (1 block per MCU) to 8-bit grayscale
2164   void gray_convert () {
2165     int row = m_max_mcu_y_size - m_mcu_lines_left;
2166     ubyte *d = m_pScan_line_0;
2167     ubyte *s = m_pSample_buf + row * 8;
2168 
2169     for (int i = m_max_mcus_per_row; i > 0; i--)
2170     {
2171       *cast(uint*)d = *cast(uint*)s;
2172       *cast(uint*)(&d[4]) = *cast(uint*)(&s[4]);
2173 
2174       s += 64;
2175       d += 8;
2176     }
2177   }
2178 
2179   void expanded_convert () {
2180     int row = m_max_mcu_y_size - m_mcu_lines_left;
2181 
2182     ubyte* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp.ptr[0] + (row & 7) * 8;
2183 
2184     ubyte* d = m_pScan_line_0;
2185 
2186     for (int i = m_max_mcus_per_row; i > 0; i--)
2187     {
2188       for (int k = 0; k < m_max_mcu_x_size; k += 8)
2189       {
2190         immutable int Y_ofs = k * 8;
2191         immutable int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
2192         immutable int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
2193         for (int j = 0; j < 8; j++)
2194         {
2195           int y = Py[Y_ofs + j];
2196           int cb = Py[Cb_ofs + j];
2197           int cr = Py[Cr_ofs + j];
2198 
2199           d[0] = clamp(y + m_crr.ptr[cr]);
2200           d[1] = clamp(y + ((m_crg.ptr[cr] + m_cbg.ptr[cb]) >> 16));
2201           d[2] = clamp(y + m_cbb.ptr[cb]);
2202           d[3] = 255;
2203 
2204           d += 4;
2205         }
2206       }
2207 
2208       Py += 64 * m_expanded_blocks_per_mcu;
2209     }
2210   }
2211 
2212   // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
2213   void find_eoi () {
2214     if (!m_progressive_flag)
2215     {
2216       // Attempt to read the EOI marker.
2217       //get_bits_no_markers(m_bits_left & 7);
2218 
2219       // Prime the bit buffer
2220       m_bits_left = 16;
2221       get_bits(16);
2222       get_bits(16);
2223 
2224       // The next marker _should_ be EOI
2225       process_markers();
2226     }
2227 
2228     m_total_bytes_read -= m_in_buf_left;
2229   }
2230 
2231   // Creates the tables needed for efficient Huffman decoding.
2232   void make_huff_table (int index, huff_tables *pH) {
2233     int p, i, l, si;
2234     ubyte[257] huffsize;
2235     uint[257] huffcode;
2236     uint code;
2237     uint subtree;
2238     int code_size;
2239     int lastp;
2240     int nextfreeentry;
2241     int currententry;
2242 
2243     pH.ac_table = m_huff_ac.ptr[index] != 0;
2244 
2245     p = 0;
2246 
2247     for (l = 1; l <= 16; l++)
2248     {
2249       for (i = 1; i <= m_huff_num.ptr[index][l]; i++)
2250         huffsize.ptr[p++] = cast(ubyte)(l);
2251     }
2252 
2253     huffsize.ptr[p] = 0;
2254 
2255     lastp = p;
2256 
2257     code = 0;
2258     si = huffsize.ptr[0];
2259     p = 0;
2260 
2261     while (huffsize.ptr[p])
2262     {
2263       while (huffsize.ptr[p] == si)
2264       {
2265         huffcode.ptr[p++] = code;
2266         code++;
2267       }
2268 
2269       code <<= 1;
2270       si++;
2271     }
2272 
2273     memset(pH.look_up.ptr, 0, pH.look_up.sizeof);
2274     memset(pH.look_up2.ptr, 0, pH.look_up2.sizeof);
2275     memset(pH.tree.ptr, 0, pH.tree.sizeof);
2276     memset(pH.code_size.ptr, 0, pH.code_size.sizeof);
2277 
2278     nextfreeentry = -1;
2279 
2280     p = 0;
2281 
2282     while (p < lastp)
2283     {
2284       i = m_huff_val.ptr[index][p];
2285       code = huffcode.ptr[p];
2286       code_size = huffsize.ptr[p];
2287 
2288       pH.code_size.ptr[i] = cast(ubyte)(code_size);
2289 
2290       if (code_size <= 8)
2291       {
2292         code <<= (8 - code_size);
2293 
2294         for (l = 1 << (8 - code_size); l > 0; l--)
2295         {
2296           assert(i < 256);
2297 
2298           pH.look_up.ptr[code] = i;
2299 
2300           bool has_extrabits = false;
2301           int extra_bits = 0;
2302           int num_extra_bits = i & 15;
2303 
2304           int bits_to_fetch = code_size;
2305           if (num_extra_bits)
2306           {
2307             int total_codesize = code_size + num_extra_bits;
2308             if (total_codesize <= 8)
2309             {
2310               has_extrabits = true;
2311               extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
2312               assert(extra_bits <= 0x7FFF);
2313               bits_to_fetch += num_extra_bits;
2314             }
2315           }
2316 
2317           if (!has_extrabits)
2318             pH.look_up2.ptr[code] = i | (bits_to_fetch << 8);
2319           else
2320             pH.look_up2.ptr[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
2321 
2322           code++;
2323         }
2324       }
2325       else
2326       {
2327         subtree = (code >> (code_size - 8)) & 0xFF;
2328 
2329         currententry = pH.look_up.ptr[subtree];
2330 
2331         if (currententry == 0)
2332         {
2333           pH.look_up.ptr[subtree] = currententry = nextfreeentry;
2334           pH.look_up2.ptr[subtree] = currententry = nextfreeentry;
2335 
2336           nextfreeentry -= 2;
2337         }
2338 
2339         code <<= (16 - (code_size - 8));
2340 
2341         for (l = code_size; l > 9; l--)
2342         {
2343           if ((code & 0x8000) == 0)
2344             currententry--;
2345 
2346           if (pH.tree.ptr[-currententry - 1] == 0)
2347           {
2348             pH.tree.ptr[-currententry - 1] = nextfreeentry;
2349 
2350             currententry = nextfreeentry;
2351 
2352             nextfreeentry -= 2;
2353           }
2354           else
2355             currententry = pH.tree.ptr[-currententry - 1];
2356 
2357           code <<= 1;
2358         }
2359 
2360         if ((code & 0x8000) == 0)
2361           currententry--;
2362 
2363         pH.tree.ptr[-currententry - 1] = i;
2364       }
2365 
2366       p++;
2367     }
2368   }
2369 
2370   // Verifies the quantization tables needed for this scan are available.
2371   void check_quant_tables () {
2372     for (int i = 0; i < m_comps_in_scan; i++)
2373       if (m_quant.ptr[m_comp_quant.ptr[m_comp_list.ptr[i]]] == null)
2374         stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
2375   }
2376 
2377   // Verifies that all the Huffman tables needed for this scan are available.
2378   void check_huff_tables () {
2379     for (int i = 0; i < m_comps_in_scan; i++)
2380     {
2381       if ((m_spectral_start == 0) && (m_huff_num.ptr[m_comp_dc_tab.ptr[m_comp_list.ptr[i]]] == null))
2382         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2383 
2384       if ((m_spectral_end > 0) && (m_huff_num.ptr[m_comp_ac_tab.ptr[m_comp_list.ptr[i]]] == null))
2385         stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
2386     }
2387 
2388     for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++)
2389       if (m_huff_num.ptr[i])
2390       {
2391         if (!m_pHuff_tabs.ptr[i])
2392           m_pHuff_tabs.ptr[i] = cast(huff_tables*)alloc(huff_tables.sizeof);
2393 
2394         make_huff_table(i, m_pHuff_tabs.ptr[i]);
2395       }
2396   }
2397 
2398   // Determines the component order inside each MCU.
2399   // Also calcs how many MCU's are on each row, etc.
2400   void calc_mcu_block_order () {
2401     int component_num, component_id;
2402     int max_h_samp = 0, max_v_samp = 0;
2403 
2404     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2405     {
2406       if (m_comp_h_samp.ptr[component_id] > max_h_samp)
2407         max_h_samp = m_comp_h_samp.ptr[component_id];
2408 
2409       if (m_comp_v_samp.ptr[component_id] > max_v_samp)
2410         max_v_samp = m_comp_v_samp.ptr[component_id];
2411     }
2412 
2413     for (component_id = 0; component_id < m_comps_in_frame; component_id++)
2414     {
2415       m_comp_h_blocks.ptr[component_id] = ((((m_image_x_size * m_comp_h_samp.ptr[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
2416       m_comp_v_blocks.ptr[component_id] = ((((m_image_y_size * m_comp_v_samp.ptr[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
2417     }
2418 
2419     if (m_comps_in_scan == 1)
2420     {
2421       m_mcus_per_row = m_comp_h_blocks.ptr[m_comp_list.ptr[0]];
2422       m_mcus_per_col = m_comp_v_blocks.ptr[m_comp_list.ptr[0]];
2423     }
2424     else
2425     {
2426       m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
2427       m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
2428     }
2429 
2430     if (m_comps_in_scan == 1)
2431     {
2432       m_mcu_org.ptr[0] = m_comp_list.ptr[0];
2433 
2434       m_blocks_per_mcu = 1;
2435     }
2436     else
2437     {
2438       m_blocks_per_mcu = 0;
2439 
2440       for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2441       {
2442         int num_blocks;
2443 
2444         component_id = m_comp_list.ptr[component_num];
2445 
2446         num_blocks = m_comp_h_samp.ptr[component_id] * m_comp_v_samp.ptr[component_id];
2447 
2448         while (num_blocks--)
2449           m_mcu_org.ptr[m_blocks_per_mcu++] = component_id;
2450       }
2451     }
2452   }
2453 
2454   // Starts a new scan.
2455   int init_scan () {
2456     if (!locate_sos_marker())
2457       return false;
2458 
2459     calc_mcu_block_order();
2460 
2461     check_huff_tables();
2462 
2463     check_quant_tables();
2464 
2465     memset(m_last_dc_val.ptr, 0, m_comps_in_frame * uint.sizeof);
2466 
2467     m_eob_run = 0;
2468 
2469     if (m_restart_interval)
2470     {
2471       m_restarts_left = m_restart_interval;
2472       m_next_restart_num = 0;
2473     }
2474 
2475     fix_in_buffer();
2476 
2477     return true;
2478   }
2479 
2480   // Starts a frame. Determines if the number of components or sampling factors
2481   // are supported.
2482   void init_frame () {
2483     int i;
2484 
2485     if (m_comps_in_frame == 1)
2486     {
2487       version(jpegd_test) {{ import std.stdio; stderr.writeln("m_comp_h_samp=", m_comp_h_samp.ptr[0], "; m_comp_v_samp=", m_comp_v_samp.ptr[0]); }}
2488 
2489       //if ((m_comp_h_samp.ptr[0] != 1) || (m_comp_v_samp.ptr[0] != 1))
2490       //  stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2491 
2492       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2493       {
2494         m_scan_type = JPGD_GRAYSCALE;
2495         m_max_blocks_per_mcu = 1;
2496         m_max_mcu_x_size = 8;
2497         m_max_mcu_y_size = 8;
2498       }
2499       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2500       {
2501         //k8: i added this, and i absolutely don't know what it means; but it decoded two sample images i found
2502         m_scan_type = JPGD_GRAYSCALE;
2503         m_max_blocks_per_mcu = 4;
2504         m_max_mcu_x_size = 8;
2505         m_max_mcu_y_size = 8;
2506       }
2507       else
2508         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2509     }
2510     else if (m_comps_in_frame == 3)
2511     {
2512       if ( ((m_comp_h_samp.ptr[1] != 1) || (m_comp_v_samp.ptr[1] != 1)) ||
2513            ((m_comp_h_samp.ptr[2] != 1) || (m_comp_v_samp.ptr[2] != 1)) )
2514         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2515 
2516       if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 1))
2517       {
2518         m_scan_type = JPGD_YH1V1;
2519 
2520         m_max_blocks_per_mcu = 3;
2521         m_max_mcu_x_size = 8;
2522         m_max_mcu_y_size = 8;
2523       }
2524       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 1))
2525       {
2526         m_scan_type = JPGD_YH2V1;
2527         m_max_blocks_per_mcu = 4;
2528         m_max_mcu_x_size = 16;
2529         m_max_mcu_y_size = 8;
2530       }
2531       else if ((m_comp_h_samp.ptr[0] == 1) && (m_comp_v_samp.ptr[0] == 2))
2532       {
2533         m_scan_type = JPGD_YH1V2;
2534         m_max_blocks_per_mcu = 4;
2535         m_max_mcu_x_size = 8;
2536         m_max_mcu_y_size = 16;
2537       }
2538       else if ((m_comp_h_samp.ptr[0] == 2) && (m_comp_v_samp.ptr[0] == 2))
2539       {
2540         m_scan_type = JPGD_YH2V2;
2541         m_max_blocks_per_mcu = 6;
2542         m_max_mcu_x_size = 16;
2543         m_max_mcu_y_size = 16;
2544       }
2545       else
2546         stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
2547     }
2548     else
2549       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2550 
2551     m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
2552     m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
2553 
2554     // These values are for the *destination* pixels: after conversion.
2555     if (m_scan_type == JPGD_GRAYSCALE)
2556       m_dest_bytes_per_pixel = 1;
2557     else
2558       m_dest_bytes_per_pixel = 4;
2559 
2560     m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
2561 
2562     m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
2563 
2564     // Initialize two scan line buffers.
2565     m_pScan_line_0 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2566     if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
2567       m_pScan_line_1 = cast(ubyte*)alloc(m_dest_bytes_per_scan_line, true);
2568 
2569     m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
2570 
2571     // Should never happen
2572     if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW)
2573       stop_decoding(JPGD_ASSERTION_ERROR);
2574 
2575     // Allocate the coefficient buffer, enough for one MCU
2576     m_pMCU_coefficients = cast(jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * jpgd_block_t.sizeof);
2577 
2578     for (i = 0; i < m_max_blocks_per_mcu; i++)
2579       m_mcu_block_max_zag.ptr[i] = 64;
2580 
2581     m_expanded_blocks_per_component = m_comp_h_samp.ptr[0] * m_comp_v_samp.ptr[0];
2582     m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
2583     m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
2584     // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
2585     m_freq_domain_chroma_upsample = false;
2586     version(JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING) {
2587       m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
2588     }
2589 
2590     if (m_freq_domain_chroma_upsample)
2591       m_pSample_buf = cast(ubyte*)alloc(m_expanded_blocks_per_row * 64);
2592     else
2593       m_pSample_buf = cast(ubyte*)alloc(m_max_blocks_per_row * 64);
2594 
2595     m_total_lines_left = m_image_y_size;
2596 
2597     m_mcu_lines_left = 0;
2598 
2599     create_look_ups();
2600   }
2601 
2602   // The coeff_buf series of methods originally stored the coefficients
2603   // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
2604   // was used to make this process more efficient. Now, we can store the entire
2605   // thing in RAM.
2606   coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) {
2607     coeff_buf* cb = cast(coeff_buf*)alloc(coeff_buf.sizeof);
2608 
2609     cb.block_num_x = block_num_x;
2610     cb.block_num_y = block_num_y;
2611     cb.block_len_x = block_len_x;
2612     cb.block_len_y = block_len_y;
2613     cb.block_size = cast(int)((block_len_x * block_len_y) * jpgd_block_t.sizeof);
2614     cb.pData = cast(ubyte*)alloc(cb.block_size * block_num_x * block_num_y, true);
2615     return cb;
2616   }
2617 
2618   jpgd_block_t* coeff_buf_getp (coeff_buf *cb, int block_x, int block_y) {
2619     assert((block_x < cb.block_num_x) && (block_y < cb.block_num_y));
2620     return cast(jpgd_block_t*)(cb.pData + block_x * cb.block_size + block_y * (cb.block_size * cb.block_num_x));
2621   }
2622 
2623   // The following methods decode the various types of m_blocks encountered
2624   // in progressively encoded images.
2625   static void decode_block_dc_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2626     int s, r;
2627     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2628 
2629     if ((s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_dc_tab.ptr[component_id]])) != 0)
2630     {
2631       r = pD.get_bits_no_markers(s);
2632       s = JPGD_HUFF_EXTEND(r, s);
2633     }
2634 
2635     pD.m_last_dc_val.ptr[component_id] = (s += pD.m_last_dc_val.ptr[component_id]);
2636 
2637     p[0] = cast(jpgd_block_t)(s << pD.m_successive_low);
2638   }
2639 
2640   static void decode_block_dc_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2641     if (pD.get_bits_no_markers(1))
2642     {
2643       jpgd_block_t *p = pD.coeff_buf_getp(pD.m_dc_coeffs.ptr[component_id], block_x, block_y);
2644 
2645       p[0] |= (1 << pD.m_successive_low);
2646     }
2647   }
2648 
2649   static void decode_block_ac_first (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2650     int k, s, r;
2651 
2652     if (pD.m_eob_run)
2653     {
2654       pD.m_eob_run--;
2655       return;
2656     }
2657 
2658     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2659 
2660     for (k = pD.m_spectral_start; k <= pD.m_spectral_end; k++)
2661     {
2662       s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2663 
2664       r = s >> 4;
2665       s &= 15;
2666 
2667       if (s)
2668       {
2669         if ((k += r) > 63)
2670           pD.stop_decoding(JPGD_DECODE_ERROR);
2671 
2672         r = pD.get_bits_no_markers(s);
2673         s = JPGD_HUFF_EXTEND(r, s);
2674 
2675         p[g_ZAG[k]] = cast(jpgd_block_t)(s << pD.m_successive_low);
2676       }
2677       else
2678       {
2679         if (r == 15)
2680         {
2681           if ((k += 15) > 63)
2682             pD.stop_decoding(JPGD_DECODE_ERROR);
2683         }
2684         else
2685         {
2686           pD.m_eob_run = 1 << r;
2687 
2688           if (r)
2689             pD.m_eob_run += pD.get_bits_no_markers(r);
2690 
2691           pD.m_eob_run--;
2692 
2693           break;
2694         }
2695       }
2696     }
2697   }
2698 
2699   static void decode_block_ac_refine (ref jpeg_decoder pD, int component_id, int block_x, int block_y) {
2700     int s, k, r;
2701     int p1 = 1 << pD.m_successive_low;
2702     int m1 = (-1) << pD.m_successive_low;
2703     jpgd_block_t *p = pD.coeff_buf_getp(pD.m_ac_coeffs.ptr[component_id], block_x, block_y);
2704 
2705     assert(pD.m_spectral_end <= 63);
2706 
2707     k = pD.m_spectral_start;
2708 
2709     if (pD.m_eob_run == 0)
2710     {
2711       for ( ; k <= pD.m_spectral_end; k++)
2712       {
2713         s = pD.huff_decode(pD.m_pHuff_tabs.ptr[pD.m_comp_ac_tab.ptr[component_id]]);
2714 
2715         r = s >> 4;
2716         s &= 15;
2717 
2718         if (s)
2719         {
2720           if (s != 1)
2721             pD.stop_decoding(JPGD_DECODE_ERROR);
2722 
2723           if (pD.get_bits_no_markers(1))
2724             s = p1;
2725           else
2726             s = m1;
2727         }
2728         else
2729         {
2730           if (r != 15)
2731           {
2732             pD.m_eob_run = 1 << r;
2733 
2734             if (r)
2735               pD.m_eob_run += pD.get_bits_no_markers(r);
2736 
2737             break;
2738           }
2739         }
2740 
2741         do
2742         {
2743           jpgd_block_t *this_coef = p + g_ZAG[k & 63];
2744 
2745           if (*this_coef != 0)
2746           {
2747             if (pD.get_bits_no_markers(1))
2748             {
2749               if ((*this_coef & p1) == 0)
2750               {
2751                 if (*this_coef >= 0)
2752                   *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2753                 else
2754                   *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2755               }
2756             }
2757           }
2758           else
2759           {
2760             if (--r < 0)
2761               break;
2762           }
2763 
2764           k++;
2765 
2766         } while (k <= pD.m_spectral_end);
2767 
2768         if ((s) && (k < 64))
2769         {
2770           p[g_ZAG[k]] = cast(jpgd_block_t)(s);
2771         }
2772       }
2773     }
2774 
2775     if (pD.m_eob_run > 0)
2776     {
2777       for ( ; k <= pD.m_spectral_end; k++)
2778       {
2779         jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
2780 
2781         if (*this_coef != 0)
2782         {
2783           if (pD.get_bits_no_markers(1))
2784           {
2785             if ((*this_coef & p1) == 0)
2786             {
2787               if (*this_coef >= 0)
2788                 *this_coef = cast(jpgd_block_t)(*this_coef + p1);
2789               else
2790                 *this_coef = cast(jpgd_block_t)(*this_coef + m1);
2791             }
2792           }
2793         }
2794       }
2795 
2796       pD.m_eob_run--;
2797     }
2798   }
2799 
2800   // Decode a scan in a progressively encoded image.
2801   void decode_scan (pDecode_block_func decode_block_func) {
2802     int mcu_row, mcu_col, mcu_block;
2803     int[JPGD_MAX_COMPONENTS] block_x_mcu;
2804     int[JPGD_MAX_COMPONENTS] m_block_y_mcu;
2805 
2806     memset(m_block_y_mcu.ptr, 0, m_block_y_mcu.sizeof);
2807 
2808     for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++)
2809     {
2810       int component_num, component_id;
2811 
2812       memset(block_x_mcu.ptr, 0, block_x_mcu.sizeof);
2813 
2814       for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
2815       {
2816         int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
2817 
2818         if ((m_restart_interval) && (m_restarts_left == 0))
2819           process_restart();
2820 
2821         for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
2822         {
2823           component_id = m_mcu_org.ptr[mcu_block];
2824 
2825           decode_block_func(this, component_id, block_x_mcu.ptr[component_id] + block_x_mcu_ofs, m_block_y_mcu.ptr[component_id] + block_y_mcu_ofs);
2826 
2827           if (m_comps_in_scan == 1)
2828             block_x_mcu.ptr[component_id]++;
2829           else
2830           {
2831             if (++block_x_mcu_ofs == m_comp_h_samp.ptr[component_id])
2832             {
2833               block_x_mcu_ofs = 0;
2834 
2835               if (++block_y_mcu_ofs == m_comp_v_samp.ptr[component_id])
2836               {
2837                 block_y_mcu_ofs = 0;
2838                 block_x_mcu.ptr[component_id] += m_comp_h_samp.ptr[component_id];
2839               }
2840             }
2841           }
2842         }
2843 
2844         m_restarts_left--;
2845       }
2846 
2847       if (m_comps_in_scan == 1)
2848         m_block_y_mcu.ptr[m_comp_list.ptr[0]]++;
2849       else
2850       {
2851         for (component_num = 0; component_num < m_comps_in_scan; component_num++)
2852         {
2853           component_id = m_comp_list.ptr[component_num];
2854           m_block_y_mcu.ptr[component_id] += m_comp_v_samp.ptr[component_id];
2855         }
2856       }
2857     }
2858   }
2859 
2860   // Decode a progressively encoded image.
2861   void init_progressive () {
2862     int i;
2863 
2864     if (m_comps_in_frame == 4)
2865       stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
2866 
2867     // Allocate the coefficient buffers.
2868     for (i = 0; i < m_comps_in_frame; i++)
2869     {
2870       m_dc_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 1, 1);
2871       m_ac_coeffs.ptr[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp.ptr[i], m_max_mcus_per_col * m_comp_v_samp.ptr[i], 8, 8);
2872     }
2873 
2874     for ( ; ; )
2875     {
2876       int dc_only_scan, refinement_scan;
2877       pDecode_block_func decode_block_func;
2878 
2879       if (!init_scan())
2880         break;
2881 
2882       dc_only_scan = (m_spectral_start == 0);
2883       refinement_scan = (m_successive_high != 0);
2884 
2885       if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63))
2886         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2887 
2888       if (dc_only_scan)
2889       {
2890         if (m_spectral_end)
2891           stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2892       }
2893       else if (m_comps_in_scan != 1)  /* AC scans can only contain one component */
2894         stop_decoding(JPGD_BAD_SOS_SPECTRAL);
2895 
2896       if ((refinement_scan) && (m_successive_low != m_successive_high - 1))
2897         stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
2898 
2899       if (dc_only_scan)
2900       {
2901         if (refinement_scan)
2902           decode_block_func = &decode_block_dc_refine;
2903         else
2904           decode_block_func = &decode_block_dc_first;
2905       }
2906       else
2907       {
2908         if (refinement_scan)
2909           decode_block_func = &decode_block_ac_refine;
2910         else
2911           decode_block_func = &decode_block_ac_first;
2912       }
2913 
2914       decode_scan(decode_block_func);
2915 
2916       m_bits_left = 16;
2917       get_bits(16);
2918       get_bits(16);
2919     }
2920 
2921     m_comps_in_scan = m_comps_in_frame;
2922 
2923     for (i = 0; i < m_comps_in_frame; i++)
2924       m_comp_list.ptr[i] = i;
2925 
2926     calc_mcu_block_order();
2927   }
2928 
2929   void init_sequential () {
2930     if (!init_scan())
2931       stop_decoding(JPGD_UNEXPECTED_MARKER);
2932   }
2933 
2934   void decode_start () {
2935     init_frame();
2936 
2937     if (m_progressive_flag)
2938       init_progressive();
2939     else
2940       init_sequential();
2941   }
2942 
2943   void decode_init (JpegStreamReadFunc rfn) {
2944     initit(rfn);
2945     locate_sof_marker();
2946   }
2947 }
2948 
2949 
2950 // ////////////////////////////////////////////////////////////////////////// //
2951 /// read JPEG image header, determine dimensions and number of components.
2952 /// return `false` if image is not JPEG (i hope).
2953 public bool detect_jpeg_image_from_stream (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps) {
2954   if (rfn is null) return false;
2955   auto decoder = jpeg_decoder(rfn);
2956   version(jpegd_test) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
2957   if (decoder.error_code != JPGD_SUCCESS) return false;
2958   width = decoder.width;
2959   height = decoder.height;
2960   actual_comps = decoder.num_components;
2961   return true;
2962 }
2963 
2964 
2965 // ////////////////////////////////////////////////////////////////////////// //
2966 /// read JPEG image header, determine dimensions and number of components.
2967 /// return `false` if image is not JPEG (i hope).
2968 public bool detect_jpeg_image_from_file (const(char)[] filename, out int width, out int height, out int actual_comps) {
2969   import core.stdc.stdio;
2970 
2971   FILE* m_pFile;
2972   bool m_eof_flag, m_error_flag;
2973 
2974   if (filename.length == 0) throw new Exception("cannot open unnamed file");
2975   if (filename.length < 512) {
2976     char[513] buffer;
2977     //import core.stdc.stdlib : alloca;
2978     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
2979     tfn[0..filename.length] = filename[];
2980     tfn[filename.length] = 0;
2981     m_pFile = fopen(tfn.ptr, "rb");
2982   } else {
2983     import core.stdc.stdlib : malloc, free;
2984     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
2985     if (tfn !is null) {
2986       scope(exit) free(tfn.ptr);
2987       m_pFile = fopen(tfn.ptr, "rb");
2988     }
2989   }
2990   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
2991   scope(exit) if (m_pFile) fclose(m_pFile);
2992 
2993   return detect_jpeg_image_from_stream(
2994     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
2995       if (m_pFile is null) return -1;
2996       if (m_eof_flag) {
2997         *pEOF_flag = true;
2998         return 0;
2999       }
3000       if (m_error_flag) return -1;
3001       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3002       if (bytes_read < max_bytes_to_read) {
3003         if (ferror(m_pFile)) {
3004           m_error_flag = true;
3005           return -1;
3006         }
3007         m_eof_flag = true;
3008         *pEOF_flag = true;
3009       }
3010       return bytes_read;
3011     },
3012     width, height, actual_comps);
3013 }
3014 
3015 
3016 // ////////////////////////////////////////////////////////////////////////// //
3017 /// read JPEG image header, determine dimensions and number of components.
3018 /// return `false` if image is not JPEG (i hope).
3019 public bool detect_jpeg_image_from_memory (const(void)[] buf, out int width, out int height, out int actual_comps) {
3020   size_t bufpos;
3021   return detect_jpeg_image_from_stream(
3022     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3023       import core.stdc..string : memcpy;
3024       if (bufpos >= buf.length) {
3025         *pEOF_flag = true;
3026         return 0;
3027       }
3028       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3029       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3030       bufpos += max_bytes_to_read;
3031       return max_bytes_to_read;
3032     },
3033     width, height, actual_comps);
3034 }
3035 
3036 
3037 // ////////////////////////////////////////////////////////////////////////// //
3038 /// decompress JPEG image, what else?
3039 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3040 public ubyte[] decompress_jpeg_image_from_stream(bool useMalloc=false) (scope JpegStreamReadFunc rfn, out int width, out int height, out int actual_comps, int req_comps=-1) {
3041   import core.stdc..string : memcpy;
3042 
3043   //actual_comps = 0;
3044   if (rfn is null) return null;
3045   if (req_comps != -1 && req_comps != 1 && req_comps != 3 && req_comps != 4) return null;
3046 
3047   auto decoder = jpeg_decoder(rfn);
3048   if (decoder.error_code != JPGD_SUCCESS) return null;
3049   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3050 
3051   immutable int image_width = decoder.width;
3052   immutable int image_height = decoder.height;
3053   width = image_width;
3054   height = image_height;
3055   actual_comps = decoder.num_components;
3056   if (req_comps < 0) req_comps = decoder.num_components;
3057 
3058   if (decoder.begin_decoding() != JPGD_SUCCESS) return null;
3059 
3060   immutable int dst_bpl = image_width*req_comps;
3061 
3062   static if (useMalloc) {
3063     ubyte* pImage_data = cast(ubyte*)jpgd_malloc(dst_bpl*image_height);
3064     if (pImage_data is null) return null;
3065     auto idata = pImage_data[0..dst_bpl*image_height];
3066   } else {
3067     auto idata = new ubyte[](dst_bpl*image_height);
3068     auto pImage_data = idata.ptr;
3069   }
3070 
3071   scope(failure) {
3072     static if (useMalloc) {
3073       jpgd_free(pImage_data);
3074     } else {
3075       import core.memory : GC;
3076       GC.free(idata.ptr);
3077       idata = null;
3078     }
3079   }
3080 
3081   for (int y = 0; y < image_height; ++y) {
3082     const(ubyte)* pScan_line;
3083     uint scan_line_len;
3084     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3085       static if (useMalloc) {
3086         jpgd_free(pImage_data);
3087       } else {
3088         import core.memory : GC;
3089         GC.free(idata.ptr);
3090         idata = null;
3091       }
3092       return null;
3093     }
3094 
3095     ubyte* pDst = pImage_data+y*dst_bpl;
3096 
3097     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3098       memcpy(pDst, pScan_line, dst_bpl);
3099     } else if (decoder.num_components == 1) {
3100       if (req_comps == 3) {
3101         for (int x = 0; x < image_width; ++x) {
3102           ubyte luma = pScan_line[x];
3103           pDst[0] = luma;
3104           pDst[1] = luma;
3105           pDst[2] = luma;
3106           pDst += 3;
3107         }
3108       } else {
3109         for (int x = 0; x < image_width; ++x) {
3110           ubyte luma = pScan_line[x];
3111           pDst[0] = luma;
3112           pDst[1] = luma;
3113           pDst[2] = luma;
3114           pDst[3] = 255;
3115           pDst += 4;
3116         }
3117       }
3118     } else if (decoder.num_components == 3) {
3119       if (req_comps == 1) {
3120         immutable int YR = 19595, YG = 38470, YB = 7471;
3121         for (int x = 0; x < image_width; ++x) {
3122           int r = pScan_line[x*4+0];
3123           int g = pScan_line[x*4+1];
3124           int b = pScan_line[x*4+2];
3125           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3126         }
3127       } else {
3128         for (int x = 0; x < image_width; ++x) {
3129           pDst[0] = pScan_line[x*4+0];
3130           pDst[1] = pScan_line[x*4+1];
3131           pDst[2] = pScan_line[x*4+2];
3132           pDst += 3;
3133         }
3134       }
3135     }
3136   }
3137 
3138   return idata;
3139 }
3140 
3141 
3142 // ////////////////////////////////////////////////////////////////////////// //
3143 /// decompress JPEG image from disk file.
3144 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3145 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (const(char)[] filename, out int width, out int height, out int actual_comps, int req_comps=-1) {
3146   import core.stdc.stdio;
3147 
3148   FILE* m_pFile;
3149   bool m_eof_flag, m_error_flag;
3150 
3151   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3152   if (filename.length < 512) {
3153 	char[513] buffer;
3154     //import core.stdc.stdlib : alloca;
3155     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3156     tfn[0..filename.length] = filename[];
3157     tfn[filename.length] = 0;
3158     m_pFile = fopen(tfn.ptr, "rb");
3159   } else {
3160     import core.stdc.stdlib : malloc, free;
3161     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3162     if (tfn !is null) {
3163       scope(exit) free(tfn.ptr);
3164       m_pFile = fopen(tfn.ptr, "rb");
3165     }
3166   }
3167   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3168   scope(exit) if (m_pFile) fclose(m_pFile);
3169 
3170   return decompress_jpeg_image_from_stream!useMalloc(
3171     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3172       if (m_pFile is null) return -1;
3173       if (m_eof_flag) {
3174         *pEOF_flag = true;
3175         return 0;
3176       }
3177       if (m_error_flag) return -1;
3178       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3179       if (bytes_read < max_bytes_to_read) {
3180         if (ferror(m_pFile)) {
3181           m_error_flag = true;
3182           return -1;
3183         }
3184         m_eof_flag = true;
3185         *pEOF_flag = true;
3186       }
3187       return bytes_read;
3188     },
3189     width, height, actual_comps, req_comps);
3190 }
3191 
3192 
3193 // ////////////////////////////////////////////////////////////////////////// //
3194 /// decompress JPEG image from memory buffer.
3195 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3196 public ubyte[] decompress_jpeg_image_from_memory(bool useMalloc=false) (const(void)[] buf, out int width, out int height, out int actual_comps, int req_comps=-1) {
3197   size_t bufpos;
3198   return decompress_jpeg_image_from_stream!useMalloc(
3199     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3200       import core.stdc..string : memcpy;
3201       if (bufpos >= buf.length) {
3202         *pEOF_flag = true;
3203         return 0;
3204       }
3205       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3206       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3207       bufpos += max_bytes_to_read;
3208       return max_bytes_to_read;
3209     },
3210     width, height, actual_comps, req_comps);
3211 }
3212 
3213 
3214 // ////////////////////////////////////////////////////////////////////////// //
3215 // if we have access "iv.vfs", add some handy API
3216 static if (__traits(compiles, { import iv.vfs; })) enum JpegHasIVVFS = true; else enum JpegHasIVVFS = false;
3217 
3218 static if (JpegHasIVVFS) {
3219 import iv.vfs;
3220 
3221 // ////////////////////////////////////////////////////////////////////////// //
3222 /// decompress JPEG image from disk file.
3223 /// you can specify required color components in `req_comps` (3 for RGB or 4 for RGBA), or leave it as is to use image value.
3224 public ubyte[] decompress_jpeg_image_from_file(bool useMalloc=false) (VFile fl, out int width, out int height, out int actual_comps, int req_comps=-1) {
3225   return decompress_jpeg_image_from_stream!useMalloc(
3226     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3227       if (!fl.isOpen) return -1;
3228       if (fl.eof) {
3229         *pEOF_flag = true;
3230         return 0;
3231       }
3232       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3233       if (fl.eof) *pEOF_flag = true;
3234       return cast(int)rd.length;
3235     },
3236     width, height, actual_comps, req_comps);
3237 }
3238 // vfs API
3239 }
3240 
3241 
3242 // ////////////////////////////////////////////////////////////////////////// //
3243 // if we have access "arsd.color", add some handy API
3244 static if (__traits(compiles, { import arsd.color; })) enum JpegHasArsd = true; else enum JpegHasArsd = false;
3245 
3246 static if (JpegHasArsd) {
3247 import arsd.color;
3248 
3249 // ////////////////////////////////////////////////////////////////////////// //
3250 /// decompress JPEG image, what else?
3251 public MemoryImage readJpegFromStream (scope JpegStreamReadFunc rfn) {
3252   import core.stdc..string : memcpy;
3253   enum req_comps = 4;
3254 
3255   if (rfn is null) return null;
3256 
3257   auto decoder = jpeg_decoder(rfn);
3258   if (decoder.error_code != JPGD_SUCCESS) return null;
3259   version(jpegd_test) scope(exit) { import core.stdc.stdio : printf; printf("%u bytes read.\n", cast(uint)decoder.total_bytes_read); }
3260 
3261   immutable int image_width = decoder.width;
3262   immutable int image_height = decoder.height;
3263   //width = image_width;
3264   //height = image_height;
3265   //actual_comps = decoder.num_components;
3266 
3267   version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("starting (%dx%d)...\n", image_width, image_height); }}
3268 
3269   if (decoder.begin_decoding() != JPGD_SUCCESS || image_width < 1 || image_height < 1) return null;
3270 
3271   immutable int dst_bpl = image_width*req_comps;
3272   auto img = new TrueColorImage(image_width, image_height);
3273   scope(failure) { img.clearInternal(); img = null; }
3274   ubyte* pImage_data = img.imageData.bytes.ptr;
3275 
3276   for (int y = 0; y < image_height; ++y) {
3277     //version(jpegd_test) {{ import core.stdc.stdio; stderr.fprintf("loading line %d...\n", y); }}
3278 
3279     const(ubyte)* pScan_line;
3280     uint scan_line_len;
3281     if (decoder.decode(/*(const void**)*/cast(void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
3282       img.clearInternal();
3283       img = null;
3284       //jpgd_free(pImage_data);
3285       return null;
3286     }
3287 
3288     ubyte* pDst = pImage_data+y*dst_bpl;
3289 
3290     if ((req_comps == 1 && decoder.num_components == 1) || (req_comps == 4 && decoder.num_components == 3)) {
3291       memcpy(pDst, pScan_line, dst_bpl);
3292     } else if (decoder.num_components == 1) {
3293       if (req_comps == 3) {
3294         for (int x = 0; x < image_width; ++x) {
3295           ubyte luma = pScan_line[x];
3296           pDst[0] = luma;
3297           pDst[1] = luma;
3298           pDst[2] = luma;
3299           pDst += 3;
3300         }
3301       } else {
3302         for (int x = 0; x < image_width; ++x) {
3303           ubyte luma = pScan_line[x];
3304           pDst[0] = luma;
3305           pDst[1] = luma;
3306           pDst[2] = luma;
3307           pDst[3] = 255;
3308           pDst += 4;
3309         }
3310       }
3311     } else if (decoder.num_components == 3) {
3312       if (req_comps == 1) {
3313         immutable int YR = 19595, YG = 38470, YB = 7471;
3314         for (int x = 0; x < image_width; ++x) {
3315           int r = pScan_line[x*4+0];
3316           int g = pScan_line[x*4+1];
3317           int b = pScan_line[x*4+2];
3318           *pDst++ = cast(ubyte)((r * YR + g * YG + b * YB + 32768) >> 16);
3319         }
3320       } else {
3321         for (int x = 0; x < image_width; ++x) {
3322           pDst[0] = pScan_line[x*4+0];
3323           pDst[1] = pScan_line[x*4+1];
3324           pDst[2] = pScan_line[x*4+2];
3325           pDst += 3;
3326         }
3327       }
3328     }
3329   }
3330 
3331   return img;
3332 }
3333 
3334 
3335 // ////////////////////////////////////////////////////////////////////////// //
3336 /// decompress JPEG image from disk file.
3337 public MemoryImage readJpeg (const(char)[] filename) {
3338   import core.stdc.stdio;
3339 
3340   FILE* m_pFile;
3341   bool m_eof_flag, m_error_flag;
3342 
3343   if (filename.length == 0) throw new Exception("cannot open unnamed file");
3344   if (filename.length < 512) {
3345 	char[513] buffer;
3346     //import core.stdc.stdlib : alloca;
3347     auto tfn = buffer[0 .. filename.length + 1]; // (cast(char*)alloca(filename.length+1))[0..filename.length+1];
3348     tfn[0..filename.length] = filename[];
3349     tfn[filename.length] = 0;
3350     m_pFile = fopen(tfn.ptr, "rb");
3351   } else {
3352     import core.stdc.stdlib : malloc, free;
3353     auto tfn = (cast(char*)malloc(filename.length+1))[0..filename.length+1];
3354     if (tfn !is null) {
3355       scope(exit) free(tfn.ptr);
3356       m_pFile = fopen(tfn.ptr, "rb");
3357     }
3358   }
3359   if (m_pFile is null) throw new Exception("cannot open file '"~filename.idup~"'");
3360   scope(exit) if (m_pFile) fclose(m_pFile);
3361 
3362   return readJpegFromStream(
3363     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3364       if (m_pFile is null) return -1;
3365       if (m_eof_flag) {
3366         *pEOF_flag = true;
3367         return 0;
3368       }
3369       if (m_error_flag) return -1;
3370       int bytes_read = cast(int)(fread(pBuf, 1, max_bytes_to_read, m_pFile));
3371       if (bytes_read < max_bytes_to_read) {
3372         if (ferror(m_pFile)) {
3373           m_error_flag = true;
3374           return -1;
3375         }
3376         m_eof_flag = true;
3377         *pEOF_flag = true;
3378       }
3379       return bytes_read;
3380     }
3381   );
3382 }
3383 
3384 /++
3385 	History:
3386 		Added January 22, 2021 (release version 9.2)
3387 +/
3388 public void writeJpeg(const(char)[] filename, TrueColorImage img, JpegParams params = JpegParams.init) {
3389 	if(!compress_image_to_jpeg_file(filename, img.width, img.height, 4, img.imageData.bytes, params))
3390 		throw new Exception("jpeg write failed"); // FIXME: check errno?
3391 }
3392 
3393 /++
3394   	Encodes an image as jpeg in memory.
3395 
3396 	History:
3397 		Added January 22, 2021 (release version 9.2)
3398 +/
3399 public ubyte[] encodeJpeg(TrueColorImage img, JpegParams params = JpegParams.init) {
3400   	ubyte[] data;
3401 	encodeJpeg((const scope ubyte[] i) {
3402 		data ~= i;
3403 		return true;
3404 	}, img, params);
3405 
3406 	return data;
3407 }
3408 
3409 /// ditto
3410 public void encodeJpeg(scope bool delegate(const scope ubyte[]) dg, TrueColorImage img, JpegParams params = JpegParams.init) {
3411 	if(!compress_image_to_jpeg_stream(
3412 		dg,
3413 		img.width, img.height, 4, img.imageData.bytes, params))
3414 		throw new Exception("encode");
3415 }
3416 
3417 
3418 // ////////////////////////////////////////////////////////////////////////// //
3419 /// decompress JPEG image from memory buffer.
3420 public MemoryImage readJpegFromMemory (const(void)[] buf) {
3421   size_t bufpos;
3422   return readJpegFromStream(
3423     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3424       import core.stdc..string : memcpy;
3425       if (bufpos >= buf.length) {
3426         *pEOF_flag = true;
3427         return 0;
3428       }
3429       if (buf.length-bufpos < max_bytes_to_read) max_bytes_to_read = cast(int)(buf.length-bufpos);
3430       memcpy(pBuf, (cast(const(ubyte)*)buf.ptr)+bufpos, max_bytes_to_read);
3431       bufpos += max_bytes_to_read;
3432       return max_bytes_to_read;
3433     }
3434   );
3435 }
3436 // done with arsd API
3437 }
3438 
3439 
3440 static if (JpegHasIVVFS) {
3441 public MemoryImage readJpeg (VFile fl) {
3442   return readJpegFromStream(
3443     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3444       if (!fl.isOpen) return -1;
3445       if (fl.eof) {
3446         *pEOF_flag = true;
3447         return 0;
3448       }
3449       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3450       if (fl.eof) *pEOF_flag = true;
3451       return cast(int)rd.length;
3452     }
3453   );
3454 }
3455 
3456 public bool detectJpeg (VFile fl, out int width, out int height, out int actual_comps) {
3457   return detect_jpeg_image_from_stream(
3458     delegate int (void* pBuf, int max_bytes_to_read, bool *pEOF_flag) {
3459       if (!fl.isOpen) return -1;
3460       if (fl.eof) {
3461         *pEOF_flag = true;
3462         return 0;
3463       }
3464       auto rd = fl.rawRead(pBuf[0..max_bytes_to_read]);
3465       if (fl.eof) *pEOF_flag = true;
3466       return cast(int)rd.length;
3467     },
3468     width, height, actual_comps);
3469 }
3470 // vfs API
3471 }
3472 
3473 
3474 // ////////////////////////////////////////////////////////////////////////// //
3475 version(jpegd_test) {
3476 import arsd.color;
3477 import arsd.png;
3478 
3479 void main (string[] args) {
3480   import std.stdio;
3481   int width, height, comps;
3482   {
3483     assert(detect_jpeg_image_from_file((args.length > 1 ? args[1] : "image.jpg"), width, height, comps));
3484     writeln(width, "x", height, "x", comps);
3485     auto img = readJpeg((args.length > 1 ? args[1] : "image.jpg"));
3486     writeln(img.width, "x", img.height);
3487     writePng("z00.png", img);
3488   }
3489   {
3490     ubyte[] file;
3491     {
3492       auto fl = File(args.length > 1 ? args[1] : "image.jpg");
3493       file.length = cast(int)fl.size;
3494       fl.rawRead(file[]);
3495     }
3496     assert(detect_jpeg_image_from_memory(file[], width, height, comps));
3497     writeln(width, "x", height, "x", comps);
3498     auto img = readJpegFromMemory(file[]);
3499     writeln(img.width, "x", img.height);
3500     writePng("z01.png", img);
3501   }
3502 }
3503 }
3504 
3505 // jpge.cpp - C++ class for JPEG compression.
3506 // Public domain, Rich Geldreich <richgel99@gmail.com>
3507 // Alex Evans: Added RGBA support, linear memory allocator.
3508 // v1.01, Dec. 18, 2010 - Initial release
3509 // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
3510 // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
3511 //                        Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
3512 // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to null in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
3513 //                       Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
3514 //                       Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
3515 // D translation by Ketmar // Invisible Vector
3516 //
3517 // This is free and unencumbered software released into the public domain.
3518 //
3519 // Anyone is free to copy, modify, publish, use, compile, sell, or
3520 // distribute this software, either in source code form or as a compiled
3521 // binary, for any purpose, commercial or non-commercial, and by any
3522 // means.
3523 //
3524 // In jurisdictions that recognize copyright laws, the author or authors
3525 // of this software dedicate any and all copyright interest in the
3526 // software to the public domain. We make this dedication for the benefit
3527 // of the public at large and to the detriment of our heirs and
3528 // successors. We intend this dedication to be an overt act of
3529 // relinquishment in perpetuity of all present and future rights to this
3530 // software under copyright law.
3531 //
3532 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3533 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3534 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
3535 // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
3536 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
3537 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
3538 // OTHER DEALINGS IN THE SOFTWARE.
3539 //
3540 // For more information, please refer to <http://unlicense.org/>
3541 /**
3542  * Writes a JPEG image to a file or stream.
3543  * num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3544  * note that alpha will not be stored in jpeg file.
3545  */
3546 
3547 public:
3548 // ////////////////////////////////////////////////////////////////////////// //
3549 // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
3550 enum JpegSubsampling { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 }
3551 
3552 /// JPEG compression parameters structure.
3553 public struct JpegParams {
3554   /// Quality: 1-100, higher is better. Typical values are around 50-95.
3555   int quality = 85;
3556 
3557   /// subsampling:
3558   /// 0 = Y (grayscale) only
3559   /// 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
3560   /// 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
3561   /// 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
3562   JpegSubsampling subsampling = JpegSubsampling.H2V2;
3563 
3564   /// Disables CbCr discrimination - only intended for testing.
3565   /// If true, the Y quantization table is also used for the CbCr channels.
3566   bool noChromaDiscrimFlag = false;
3567 
3568   ///
3569   bool twoPass = true;
3570 
3571   ///
3572   bool check () const pure nothrow @safe @nogc {
3573     if (quality < 1 || quality > 100) return false;
3574     if (cast(uint)subsampling > cast(uint)JpegSubsampling.H2V2) return false;
3575     return true;
3576   }
3577 }
3578 
3579 
3580 // ////////////////////////////////////////////////////////////////////////// //
3581 /// Writes JPEG image to file.
3582 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3583 /// note that alpha will not be stored in jpeg file.
3584 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_stream(wfn, width, height, num_channels, pImage_data, JpegParams()); }
3585 
3586 /// Writes JPEG image to file.
3587 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3588 /// note that alpha will not be stored in jpeg file.
3589 bool compress_image_to_jpeg_stream (scope jpeg_encoder.WriteFunc wfn, int width, int height, int num_channels, const(ubyte)[] pImage_data, in JpegParams comp_params) {
3590   jpeg_encoder dst_image;
3591   if (!dst_image.setup(wfn, width, height, num_channels, comp_params)) return false;
3592   for (uint pass_index = 0; pass_index < dst_image.total_passes(); pass_index++) {
3593     for (int i = 0; i < height; i++) {
3594       const(ubyte)* pBuf = pImage_data.ptr+i*width*num_channels;
3595       if (!dst_image.process_scanline(pBuf)) return false;
3596     }
3597     if (!dst_image.process_scanline(null)) return false;
3598   }
3599   dst_image.deinit();
3600   //return dst_stream.close();
3601   return true;
3602 }
3603 
3604 
3605 /// Writes JPEG image to file.
3606 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3607 /// note that alpha will not be stored in jpeg file.
3608 bool compress_image_to_jpeg_file (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data) { return compress_image_to_jpeg_file(fname, width, height, num_channels, pImage_data, JpegParams()); }
3609 
3610 /// Writes JPEG image to file.
3611 /// num_channels must be 1 (Y), 3 (RGB), 4 (RGBA), image pitch must be width*num_channels.
3612 /// note that alpha will not be stored in jpeg file.
3613 bool compress_image_to_jpeg_file() (const(char)[] fname, int width, int height, int num_channels, const(ubyte)[] pImage_data, in auto ref JpegParams comp_params) {
3614   import std.internal.cstring;
3615   import core.stdc.stdio : FILE, fopen, fclose, fwrite;
3616   FILE* fl = fopen(fname.tempCString, "wb");
3617   if (fl is null) return false;
3618   scope(exit) if (fl !is null) fclose(fl);
3619   auto res = compress_image_to_jpeg_stream(
3620     delegate bool (scope const(ubyte)[] buf) {
3621       if (fwrite(buf.ptr, 1, buf.length, fl) != buf.length) return false;
3622       return true;
3623     }, width, height, num_channels, pImage_data, comp_params);
3624   if (res) {
3625     if (fclose(fl) != 0) res = false;
3626     fl = null;
3627   }
3628   return res;
3629 }
3630 
3631 
3632 // ////////////////////////////////////////////////////////////////////////// //
3633 private:
3634 nothrow @trusted @nogc {
3635 auto JPGE_MIN(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a < b ? a : b); }
3636 auto JPGE_MAX(T) (T a, T b) pure nothrow @safe @nogc { pragma(inline, true); return (a > b ? a : b); }
3637 
3638 void *jpge_malloc (size_t nSize) { import core.stdc.stdlib : malloc; return malloc(nSize); }
3639 void jpge_free (void *p) { import core.stdc.stdlib : free; if (p !is null) free(p); }
3640 
3641 
3642 // Various JPEG enums and tables.
3643 enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }
3644 
3645 static immutable ubyte[64] s_zag = [ 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 ];
3646 static immutable short[64] s_std_lum_quant = [ 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 ];
3647 static immutable short[64] s_std_croma_quant = [ 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 ];
3648 static immutable ubyte[17] s_dc_lum_bits = [ 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 ];
3649 static immutable ubyte[DC_LUM_CODES] s_dc_lum_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3650 static immutable ubyte[17] s_ac_lum_bits = [ 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d ];
3651 static immutable ubyte[AC_LUM_CODES] s_ac_lum_val = [
3652   0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
3653   0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
3654   0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
3655   0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
3656   0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
3657   0xf9,0xfa
3658 ];
3659 static immutable ubyte[17] s_dc_chroma_bits = [ 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 ];
3660 static immutable ubyte[DC_CHROMA_CODES] s_dc_chroma_val = [ 0,1,2,3,4,5,6,7,8,9,10,11 ];
3661 static immutable ubyte[17] s_ac_chroma_bits = [ 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 ];
3662 static immutable ubyte[AC_CHROMA_CODES] s_ac_chroma_val = [
3663   0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
3664   0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
3665   0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
3666   0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
3667   0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
3668   0xf9,0xfa
3669 ];
3670 
3671 // Low-level helper functions.
3672 //template <class T> inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); }
3673 
3674 enum YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; // int
3675 //ubyte clamp (int i) { if (cast(uint)(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return cast(ubyte)(i); }
3676 ubyte clamp() (int i) { pragma(inline, true); return cast(ubyte)(cast(uint)i > 255 ? (((~i)>>31)&0xFF) : i); }
3677 
3678 void RGB_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3679   for (; num_pixels; pDst += 3, pSrc += 3, --num_pixels) {
3680     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
3681     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
3682     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
3683     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
3684   }
3685 }
3686 
3687 void RGB_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3688   for (; num_pixels; ++pDst, pSrc += 3, --num_pixels) {
3689     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
3690   }
3691 }
3692 
3693 void RGBA_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3694   for (; num_pixels; pDst += 3, pSrc += 4, --num_pixels) {
3695     immutable int r = pSrc[0], g = pSrc[1], b = pSrc[2];
3696     pDst[0] = cast(ubyte)((r*YR+g*YG+b*YB+32768)>>16);
3697     pDst[1] = clamp(128+((r*CB_R+g*CB_G+b*CB_B+32768)>>16));
3698     pDst[2] = clamp(128+((r*CR_R+g*CR_G+b*CR_B+32768)>>16));
3699   }
3700 }
3701 
3702 void RGBA_to_Y (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3703   for (; num_pixels; ++pDst, pSrc += 4, --num_pixels) {
3704     pDst[0] = cast(ubyte)((pSrc[0]*YR+pSrc[1]*YG+pSrc[2]*YB+32768)>>16);
3705   }
3706 }
3707 
3708 void Y_to_YCC (ubyte* pDst, const(ubyte)* pSrc, int num_pixels) {
3709   for (; num_pixels; pDst += 3, ++pSrc, --num_pixels) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
3710 }
3711 
3712 // Forward DCT - DCT derived from jfdctint.
3713 enum { ROW_BITS = 2 }
3714 //#define DCT_DESCALE(x, n) (((x)+(((int)1)<<((n)-1)))>>(n))
3715 int DCT_DESCALE() (int x, int n) { pragma(inline, true); return (((x)+((cast(int)1)<<((n)-1)))>>(n)); }
3716 //#define DCT_MUL(var, c) (cast(short)(var)*cast(int)(c))
3717 
3718 //#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7)
3719 enum DCT1D = q{{
3720   int t0 = s0+s7, t7 = s0-s7, t1 = s1+s6, t6 = s1-s6, t2 = s2+s5, t5 = s2-s5, t3 = s3+s4, t4 = s3-s4;
3721   int t10 = t0+t3, t13 = t0-t3, t11 = t1+t2, t12 = t1-t2;
3722   int u1 = (cast(short)(t12+t13)*cast(int)(4433));
3723   s2 = u1+(cast(short)(t13)*cast(int)(6270));
3724   s6 = u1+(cast(short)(t12)*cast(int)(-15137));
3725   u1 = t4+t7;
3726   int u2 = t5+t6, u3 = t4+t6, u4 = t5+t7;
3727   int z5 = (cast(short)(u3+u4)*cast(int)(9633));
3728   t4 = (cast(short)(t4)*cast(int)(2446)); t5 = (cast(short)(t5)*cast(int)(16819));
3729   t6 = (cast(short)(t6)*cast(int)(25172)); t7 = (cast(short)(t7)*cast(int)(12299));
3730   u1 = (cast(short)(u1)*cast(int)(-7373)); u2 = (cast(short)(u2)*cast(int)(-20995));
3731   u3 = (cast(short)(u3)*cast(int)(-16069)); u4 = (cast(short)(u4)*cast(int)(-3196));
3732   u3 += z5; u4 += z5;
3733   s0 = t10+t11; s1 = t7+u1+u4; s3 = t6+u2+u3; s4 = t10-t11; s5 = t5+u2+u4; s7 = t4+u1+u3;
3734 }};
3735 
3736 void DCT2D (int* p) {
3737   int c;
3738   int* q = p;
3739   for (c = 7; c >= 0; --c, q += 8) {
3740     int s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
3741     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
3742     mixin(DCT1D);
3743     q[0] = s0<<ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS);
3744     q[4] = s4<<ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS);
3745   }
3746   for (q = p, c = 7; c >= 0; --c, ++q) {
3747     int s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8];
3748     //DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
3749     mixin(DCT1D);
3750     q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3);
3751     q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3);
3752   }
3753 }
3754 
3755 struct sym_freq { uint m_key, m_sym_index; }
3756 
3757 // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
3758 sym_freq* radix_sort_syms (uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) {
3759   const uint cMaxPasses = 4;
3760   uint[256*cMaxPasses] hist;
3761   //clear_obj(hist);
3762   for (uint i = 0; i < num_syms; i++) {
3763     uint freq = pSyms0[i].m_key;
3764     ++hist[freq&0xFF];
3765     ++hist[256+((freq>>8)&0xFF)];
3766     ++hist[256*2+((freq>>16)&0xFF)];
3767     ++hist[256*3+((freq>>24)&0xFF)];
3768   }
3769   sym_freq* pCur_syms = pSyms0;
3770   sym_freq* pNew_syms = pSyms1;
3771   uint total_passes = cMaxPasses; while (total_passes > 1 && num_syms == hist[(total_passes-1)*256]) --total_passes;
3772   uint[256] offsets;
3773   for (uint pass_shift = 0, pass = 0; pass < total_passes; ++pass, pass_shift += 8) {
3774     const(uint)* pHist = &hist[pass<<8];
3775     uint cur_ofs = 0;
3776     for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
3777     for (uint i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key>>pass_shift)&0xFF]++] = pCur_syms[i];
3778     sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
3779   }
3780   return pCur_syms;
3781 }
3782 
3783 // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
3784 void calculate_minimum_redundancy (sym_freq* A, int n) {
3785   int root, leaf, next, avbl, used, dpth;
3786   if (n == 0) return;
3787   if (n == 1) { A[0].m_key = 1; return; }
3788   A[0].m_key += A[1].m_key; root = 0; leaf = 2;
3789   for (next=1; next < n-1; next++)
3790   {
3791     if (leaf>=n || A[root].m_key<A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; } else A[next].m_key = A[leaf++].m_key;
3792     if (leaf>=n || (root<next && A[root].m_key<A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; } else A[next].m_key += A[leaf++].m_key;
3793   }
3794   A[n-2].m_key = 0;
3795   for (next=n-3; next>=0; next--) A[next].m_key = A[A[next].m_key].m_key+1;
3796   avbl = 1; used = dpth = 0; root = n-2; next = n-1;
3797   while (avbl>0)
3798   {
3799     while (root >= 0 && cast(int)A[root].m_key == dpth) { used++; root--; }
3800     while (avbl>used) { A[next--].m_key = dpth; avbl--; }
3801     avbl = 2*used; dpth++; used = 0;
3802   }
3803 }
3804 
3805 // Limits canonical Huffman code table's max code size to max_code_size.
3806 void huffman_enforce_max_code_size (int* pNum_codes, int code_list_len, int max_code_size) {
3807   if (code_list_len <= 1) return;
3808   for (int i = max_code_size+1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
3809   uint total = 0;
3810   for (int i = max_code_size; i > 0; i--) total += ((cast(uint)pNum_codes[i])<<(max_code_size-i));
3811   while (total != (1UL<<max_code_size)) {
3812     pNum_codes[max_code_size]--;
3813     for (int i = max_code_size-1; i > 0; i--) {
3814       if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i+1] += 2; break; }
3815     }
3816     total--;
3817   }
3818 }
3819 }
3820 
3821 
3822 // ////////////////////////////////////////////////////////////////////////// //
3823 // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
3824 struct jpeg_encoder {
3825 public:
3826   alias WriteFunc = bool delegate (scope const(ubyte)[] buf);
3827 
3828 nothrow /*@trusted @nogc*/:
3829 private:
3830   alias sample_array_t = int;
3831 
3832   WriteFunc m_pStream;
3833   JpegParams m_params;
3834   ubyte m_num_components;
3835   ubyte[3] m_comp_h_samp;
3836   ubyte[3] m_comp_v_samp;
3837   int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
3838   int m_image_x_mcu, m_image_y_mcu;
3839   int m_image_bpl_xlt, m_image_bpl_mcu;
3840   int m_mcus_per_row;
3841   int m_mcu_x, m_mcu_y;
3842   ubyte*[16] m_mcu_lines;
3843   ubyte m_mcu_y_ofs;
3844   sample_array_t[64] m_sample_array;
3845   short[64] m_coefficient_array;
3846   int[64][2] m_quantization_tables;
3847   uint[256][4] m_huff_codes;
3848   ubyte[256][4] m_huff_code_sizes;
3849   ubyte[17][4] m_huff_bits;
3850   ubyte[256][4] m_huff_val;
3851   uint[256][4] m_huff_count;
3852   int[3] m_last_dc_val;
3853   enum JPGE_OUT_BUF_SIZE = 2048;
3854   ubyte[JPGE_OUT_BUF_SIZE] m_out_buf;
3855   ubyte* m_pOut_buf;
3856   uint m_out_buf_left;
3857   uint m_bit_buffer;
3858   uint m_bits_in;
3859   ubyte m_pass_num;
3860   bool m_all_stream_writes_succeeded = true;
3861 
3862 private:
3863   // Generates an optimized offman table.
3864   void optimize_huffman_table (int table_num, int table_len) {
3865     sym_freq[MAX_HUFF_SYMBOLS] syms0;
3866     sym_freq[MAX_HUFF_SYMBOLS] syms1;
3867     syms0[0].m_key = 1; syms0[0].m_sym_index = 0;  // dummy symbol, assures that no valid code contains all 1's
3868     int num_used_syms = 1;
3869     const uint *pSym_count = &m_huff_count[table_num][0];
3870     for (int i = 0; i < table_len; i++) {
3871       if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i+1; }
3872     }
3873     sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0.ptr, syms1.ptr);
3874     calculate_minimum_redundancy(pSyms, num_used_syms);
3875 
3876     // Count the # of symbols of each code size.
3877     int[1+MAX_HUFF_CODESIZE] num_codes;
3878     //clear_obj(num_codes);
3879     for (int i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++;
3880 
3881     enum JPGE_CODE_SIZE_LIMIT = 16u; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
3882     huffman_enforce_max_code_size(num_codes.ptr, num_used_syms, JPGE_CODE_SIZE_LIMIT);
3883 
3884     // Compute m_huff_bits array, which contains the # of symbols per code size.
3885     //clear_obj(m_huff_bits[table_num]);
3886     m_huff_bits[table_num][] = 0;
3887     for (int i = 1; i <= cast(int)JPGE_CODE_SIZE_LIMIT; i++) m_huff_bits[table_num][i] = cast(ubyte)(num_codes[i]);
3888 
3889     // Remove the dummy symbol added above, which must be in largest bucket.
3890     for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) {
3891       if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
3892     }
3893 
3894     // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
3895     for (int i = num_used_syms-1; i >= 1; i--) m_huff_val[table_num][num_used_syms-1-i] = cast(ubyte)(pSyms[i].m_sym_index-1);
3896   }
3897 
3898   bool put_obj(T) (T v) {
3899     try {
3900       return (m_pStream !is null && m_pStream((&v)[0..1]));
3901     } catch (Exception) {}
3902     return false;
3903   }
3904 
3905   bool put_buf() (const(void)* v, uint len) {
3906     try {
3907       return (m_pStream !is null && m_pStream((cast(ubyte*)v)[0..len]));
3908     } catch (Exception) {}
3909     return false;
3910   }
3911 
3912   // JPEG marker generation.
3913   void emit_byte (ubyte i) {
3914     m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_obj(i);
3915   }
3916 
3917   void emit_word(uint i) {
3918     emit_byte(cast(ubyte)(i>>8));
3919     emit_byte(cast(ubyte)(i&0xFF));
3920   }
3921 
3922   void emit_marker (int marker) {
3923     emit_byte(cast(ubyte)(0xFF));
3924     emit_byte(cast(ubyte)(marker));
3925   }
3926 
3927   // Emit JFIF marker
3928   void emit_jfif_app0 () {
3929     emit_marker(M_APP0);
3930     emit_word(2+4+1+2+1+2+2+1+1);
3931     emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
3932     emit_byte(0);
3933     emit_byte(1); /* Major version */
3934     emit_byte(1); /* Minor version */
3935     emit_byte(0); /* Density unit */
3936     emit_word(1);
3937     emit_word(1);
3938     emit_byte(0); /* No thumbnail image */
3939     emit_byte(0);
3940   }
3941 
3942   // Emit quantization tables
3943   void emit_dqt () {
3944     for (int i = 0; i < (m_num_components == 3 ? 2 : 1); i++) {
3945       emit_marker(M_DQT);
3946       emit_word(64+1+2);
3947       emit_byte(cast(ubyte)(i));
3948       for (int j = 0; j < 64; j++) emit_byte(cast(ubyte)(m_quantization_tables[i][j]));
3949     }
3950   }
3951 
3952   // Emit start of frame marker
3953   void emit_sof () {
3954     emit_marker(M_SOF0); /* baseline */
3955     emit_word(3*m_num_components+2+5+1);
3956     emit_byte(8); /* precision */
3957     emit_word(m_image_y);
3958     emit_word(m_image_x);
3959     emit_byte(m_num_components);
3960     for (int i = 0; i < m_num_components; i++) {
3961       emit_byte(cast(ubyte)(i+1)); /* component ID */
3962       emit_byte(cast(ubyte)((m_comp_h_samp[i]<<4)+m_comp_v_samp[i])); /* h and v sampling */
3963       emit_byte(i > 0); /* quant. table num */
3964     }
3965   }
3966 
3967   // Emit Huffman table.
3968   void emit_dht (ubyte* bits, ubyte* val, int index, bool ac_flag) {
3969     emit_marker(M_DHT);
3970     int length = 0;
3971     for (int i = 1; i <= 16; i++) length += bits[i];
3972     emit_word(length+2+1+16);
3973     emit_byte(cast(ubyte)(index+(ac_flag<<4)));
3974     for (int i = 1; i <= 16; i++) emit_byte(bits[i]);
3975     for (int i = 0; i < length; i++) emit_byte(val[i]);
3976   }
3977 
3978   // Emit all Huffman tables.
3979   void emit_dhts () {
3980     emit_dht(m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr, 0, false);
3981     emit_dht(m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr, 0, true);
3982     if (m_num_components == 3) {
3983       emit_dht(m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr, 1, false);
3984       emit_dht(m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr, 1, true);
3985     }
3986   }
3987 
3988   // emit start of scan
3989   void emit_sos () {
3990     emit_marker(M_SOS);
3991     emit_word(2*m_num_components+2+1+3);
3992     emit_byte(m_num_components);
3993     for (int i = 0; i < m_num_components; i++) {
3994       emit_byte(cast(ubyte)(i+1));
3995       if (i == 0)
3996         emit_byte((0<<4)+0);
3997       else
3998         emit_byte((1<<4)+1);
3999     }
4000     emit_byte(0); /* spectral selection */
4001     emit_byte(63);
4002     emit_byte(0);
4003   }
4004 
4005   // Emit all markers at beginning of image file.
4006   void emit_markers () {
4007     emit_marker(M_SOI);
4008     emit_jfif_app0();
4009     emit_dqt();
4010     emit_sof();
4011     emit_dhts();
4012     emit_sos();
4013   }
4014 
4015   // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
4016   void compute_huffman_table (uint* codes, ubyte* code_sizes, ubyte* bits, ubyte* val) {
4017     import core.stdc..string : memset;
4018 
4019     int i, l, last_p, si;
4020     ubyte[257] huff_size;
4021     uint[257] huff_code;
4022     uint code;
4023 
4024     int p = 0;
4025     for (l = 1; l <= 16; l++)
4026       for (i = 1; i <= bits[l]; i++)
4027         huff_size[p++] = cast(ubyte)l;
4028 
4029     huff_size[p] = 0; last_p = p; // write sentinel
4030 
4031     code = 0; si = huff_size[0]; p = 0;
4032 
4033     while (huff_size[p])
4034     {
4035       while (huff_size[p] == si)
4036         huff_code[p++] = code++;
4037       code <<= 1;
4038       si++;
4039     }
4040 
4041     memset(codes, 0, codes[0].sizeof*256);
4042     memset(code_sizes, 0, code_sizes[0].sizeof*256);
4043     for (p = 0; p < last_p; p++)
4044     {
4045       codes[val[p]]      = huff_code[p];
4046       code_sizes[val[p]] = huff_size[p];
4047     }
4048   }
4049 
4050   // Quantization table generation.
4051   void compute_quant_table (int* pDst, const(short)* pSrc) {
4052     int q;
4053     if (m_params.quality < 50)
4054       q = 5000/m_params.quality;
4055     else
4056       q = 200-m_params.quality*2;
4057     for (int i = 0; i < 64; i++) {
4058       int j = *pSrc++; j = (j*q+50L)/100L;
4059       *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
4060     }
4061   }
4062 
4063   // Higher-level methods.
4064   void first_pass_init () {
4065     import core.stdc..string : memset;
4066     m_bit_buffer = 0; m_bits_in = 0;
4067     memset(m_last_dc_val.ptr, 0, 3*m_last_dc_val[0].sizeof);
4068     m_mcu_y_ofs = 0;
4069     m_pass_num = 1;
4070   }
4071 
4072   bool second_pass_init () {
4073     compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0].ptr, m_huff_val[0+0].ptr);
4074     compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0].ptr, m_huff_val[2+0].ptr);
4075     if (m_num_components > 1)
4076     {
4077       compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1].ptr, m_huff_val[0+1].ptr);
4078       compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1].ptr, m_huff_val[2+1].ptr);
4079     }
4080     first_pass_init();
4081     emit_markers();
4082     m_pass_num = 2;
4083     return true;
4084   }
4085 
4086   bool jpg_open (int p_x_res, int p_y_res, int src_channels) {
4087     m_num_components = 3;
4088     switch (m_params.subsampling) {
4089       case JpegSubsampling.Y_ONLY:
4090         m_num_components = 1;
4091         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4092         m_mcu_x          = 8; m_mcu_y          = 8;
4093         break;
4094       case JpegSubsampling.H1V1:
4095         m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
4096         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4097         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4098         m_mcu_x          = 8; m_mcu_y          = 8;
4099         break;
4100       case JpegSubsampling.H2V1:
4101         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
4102         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4103         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4104         m_mcu_x          = 16; m_mcu_y         = 8;
4105         break;
4106       case JpegSubsampling.H2V2:
4107         m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
4108         m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
4109         m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
4110         m_mcu_x          = 16; m_mcu_y         = 16;
4111         break;
4112       default: assert(0);
4113     }
4114 
4115     m_image_x        = p_x_res; m_image_y = p_y_res;
4116     m_image_bpp      = src_channels;
4117     m_image_bpl      = m_image_x*src_channels;
4118     m_image_x_mcu    = (m_image_x+m_mcu_x-1)&(~(m_mcu_x-1));
4119     m_image_y_mcu    = (m_image_y+m_mcu_y-1)&(~(m_mcu_y-1));
4120     m_image_bpl_xlt  = m_image_x*m_num_components;
4121     m_image_bpl_mcu  = m_image_x_mcu*m_num_components;
4122     m_mcus_per_row   = m_image_x_mcu/m_mcu_x;
4123 
4124     if ((m_mcu_lines[0] = cast(ubyte*)(jpge_malloc(m_image_bpl_mcu*m_mcu_y))) is null) return false;
4125     for (int i = 1; i < m_mcu_y; i++)
4126       m_mcu_lines[i] = m_mcu_lines[i-1]+m_image_bpl_mcu;
4127 
4128     compute_quant_table(m_quantization_tables[0].ptr, s_std_lum_quant.ptr);
4129     compute_quant_table(m_quantization_tables[1].ptr, (m_params.noChromaDiscrimFlag ? s_std_lum_quant.ptr : s_std_croma_quant.ptr));
4130 
4131     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4132     m_pOut_buf = m_out_buf.ptr;
4133 
4134     if (m_params.twoPass)
4135     {
4136       //clear_obj(m_huff_count);
4137       import core.stdc..string : memset;
4138       memset(m_huff_count.ptr, 0, m_huff_count.sizeof);
4139       first_pass_init();
4140     }
4141     else
4142     {
4143       import core.stdc..string : memcpy;
4144       memcpy(m_huff_bits[0+0].ptr, s_dc_lum_bits.ptr, 17);    memcpy(m_huff_val[0+0].ptr, s_dc_lum_val.ptr, DC_LUM_CODES);
4145       memcpy(m_huff_bits[2+0].ptr, s_ac_lum_bits.ptr, 17);    memcpy(m_huff_val[2+0].ptr, s_ac_lum_val.ptr, AC_LUM_CODES);
4146       memcpy(m_huff_bits[0+1].ptr, s_dc_chroma_bits.ptr, 17); memcpy(m_huff_val[0+1].ptr, s_dc_chroma_val.ptr, DC_CHROMA_CODES);
4147       memcpy(m_huff_bits[2+1].ptr, s_ac_chroma_bits.ptr, 17); memcpy(m_huff_val[2+1].ptr, s_ac_chroma_val.ptr, AC_CHROMA_CODES);
4148       if (!second_pass_init()) return false;   // in effect, skip over the first pass
4149     }
4150     return m_all_stream_writes_succeeded;
4151   }
4152 
4153   void load_block_8_8_grey (int x) {
4154     ubyte *pSrc;
4155     sample_array_t *pDst = m_sample_array.ptr;
4156     x <<= 3;
4157     for (int i = 0; i < 8; i++, pDst += 8)
4158     {
4159       pSrc = m_mcu_lines[i]+x;
4160       pDst[0] = pSrc[0]-128; pDst[1] = pSrc[1]-128; pDst[2] = pSrc[2]-128; pDst[3] = pSrc[3]-128;
4161       pDst[4] = pSrc[4]-128; pDst[5] = pSrc[5]-128; pDst[6] = pSrc[6]-128; pDst[7] = pSrc[7]-128;
4162     }
4163   }
4164 
4165   void load_block_8_8 (int x, int y, int c) {
4166     ubyte *pSrc;
4167     sample_array_t *pDst = m_sample_array.ptr;
4168     x = (x*(8*3))+c;
4169     y <<= 3;
4170     for (int i = 0; i < 8; i++, pDst += 8)
4171     {
4172       pSrc = m_mcu_lines[y+i]+x;
4173       pDst[0] = pSrc[0*3]-128; pDst[1] = pSrc[1*3]-128; pDst[2] = pSrc[2*3]-128; pDst[3] = pSrc[3*3]-128;
4174       pDst[4] = pSrc[4*3]-128; pDst[5] = pSrc[5*3]-128; pDst[6] = pSrc[6*3]-128; pDst[7] = pSrc[7*3]-128;
4175     }
4176   }
4177 
4178   void load_block_16_8 (int x, int c) {
4179     ubyte* pSrc1;
4180     ubyte* pSrc2;
4181     sample_array_t *pDst = m_sample_array.ptr;
4182     x = (x*(16*3))+c;
4183     int a = 0, b = 2;
4184     for (int i = 0; i < 16; i += 2, pDst += 8)
4185     {
4186       pSrc1 = m_mcu_lines[i+0]+x;
4187       pSrc2 = m_mcu_lines[i+1]+x;
4188       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3]+pSrc2[ 0*3]+pSrc2[ 1*3]+a)>>2)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3]+pSrc2[ 2*3]+pSrc2[ 3*3]+b)>>2)-128;
4189       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3]+pSrc2[ 4*3]+pSrc2[ 5*3]+a)>>2)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3]+pSrc2[ 6*3]+pSrc2[ 7*3]+b)>>2)-128;
4190       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3]+pSrc2[ 8*3]+pSrc2[ 9*3]+a)>>2)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3]+pSrc2[10*3]+pSrc2[11*3]+b)>>2)-128;
4191       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3]+pSrc2[12*3]+pSrc2[13*3]+a)>>2)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3]+pSrc2[14*3]+pSrc2[15*3]+b)>>2)-128;
4192       int temp = a; a = b; b = temp;
4193     }
4194   }
4195 
4196   void load_block_16_8_8 (int x, int c) {
4197     ubyte *pSrc1;
4198     sample_array_t *pDst = m_sample_array.ptr;
4199     x = (x*(16*3))+c;
4200     for (int i = 0; i < 8; i++, pDst += 8) {
4201       pSrc1 = m_mcu_lines[i+0]+x;
4202       pDst[0] = ((pSrc1[ 0*3]+pSrc1[ 1*3])>>1)-128; pDst[1] = ((pSrc1[ 2*3]+pSrc1[ 3*3])>>1)-128;
4203       pDst[2] = ((pSrc1[ 4*3]+pSrc1[ 5*3])>>1)-128; pDst[3] = ((pSrc1[ 6*3]+pSrc1[ 7*3])>>1)-128;
4204       pDst[4] = ((pSrc1[ 8*3]+pSrc1[ 9*3])>>1)-128; pDst[5] = ((pSrc1[10*3]+pSrc1[11*3])>>1)-128;
4205       pDst[6] = ((pSrc1[12*3]+pSrc1[13*3])>>1)-128; pDst[7] = ((pSrc1[14*3]+pSrc1[15*3])>>1)-128;
4206     }
4207   }
4208 
4209   void load_quantized_coefficients (int component_num) {
4210     int *q = m_quantization_tables[component_num > 0].ptr;
4211     short *pDst = m_coefficient_array.ptr;
4212     for (int i = 0; i < 64; i++)
4213     {
4214       sample_array_t j = m_sample_array[s_zag[i]];
4215       if (j < 0)
4216       {
4217         if ((j = -j+(*q>>1)) < *q)
4218           *pDst++ = 0;
4219         else
4220           *pDst++ = cast(short)(-(j/ *q));
4221       }
4222       else
4223       {
4224         if ((j = j+(*q>>1)) < *q)
4225           *pDst++ = 0;
4226         else
4227           *pDst++ = cast(short)((j/ *q));
4228       }
4229       q++;
4230     }
4231   }
4232 
4233   void flush_output_buffer () {
4234     if (m_out_buf_left != JPGE_OUT_BUF_SIZE) m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && put_buf(m_out_buf.ptr, JPGE_OUT_BUF_SIZE-m_out_buf_left);
4235     m_pOut_buf = m_out_buf.ptr;
4236     m_out_buf_left = JPGE_OUT_BUF_SIZE;
4237   }
4238 
4239   void put_bits (uint bits, uint len) {
4240     m_bit_buffer |= (cast(uint)bits<<(24-(m_bits_in += len)));
4241     while (m_bits_in >= 8) {
4242       ubyte c;
4243       //#define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
4244       //JPGE_PUT_BYTE(c = (ubyte)((m_bit_buffer>>16)&0xFF));
4245       //if (c == 0xFF) JPGE_PUT_BYTE(0);
4246       c = cast(ubyte)((m_bit_buffer>>16)&0xFF);
4247       *m_pOut_buf++ = c;
4248       if (--m_out_buf_left == 0) flush_output_buffer();
4249       if (c == 0xFF) {
4250         *m_pOut_buf++ = 0;
4251         if (--m_out_buf_left == 0) flush_output_buffer();
4252       }
4253       m_bit_buffer <<= 8;
4254       m_bits_in -= 8;
4255     }
4256   }
4257 
4258   void code_coefficients_pass_one (int component_num) {
4259     if (component_num >= 3) return; // just to shut up static analysis
4260     int i, run_len, nbits, temp1;
4261     short *src = m_coefficient_array.ptr;
4262     uint *dc_count = (component_num ? m_huff_count[0+1].ptr : m_huff_count[0+0].ptr);
4263     uint *ac_count = (component_num ? m_huff_count[2+1].ptr : m_huff_count[2+0].ptr);
4264 
4265     temp1 = src[0]-m_last_dc_val[component_num];
4266     m_last_dc_val[component_num] = src[0];
4267     if (temp1 < 0) temp1 = -temp1;
4268 
4269     nbits = 0;
4270     while (temp1)
4271     {
4272       nbits++; temp1 >>= 1;
4273     }
4274 
4275     dc_count[nbits]++;
4276     for (run_len = 0, i = 1; i < 64; i++)
4277     {
4278       if ((temp1 = m_coefficient_array[i]) == 0)
4279         run_len++;
4280       else
4281       {
4282         while (run_len >= 16)
4283         {
4284           ac_count[0xF0]++;
4285           run_len -= 16;
4286         }
4287         if (temp1 < 0) temp1 = -temp1;
4288         nbits = 1;
4289         while (temp1 >>= 1) nbits++;
4290         ac_count[(run_len<<4)+nbits]++;
4291         run_len = 0;
4292       }
4293     }
4294     if (run_len) ac_count[0]++;
4295   }
4296 
4297   void code_coefficients_pass_two (int component_num) {
4298     int i, j, run_len, nbits, temp1, temp2;
4299     short *pSrc = m_coefficient_array.ptr;
4300     uint*[2] codes;
4301     ubyte*[2] code_sizes;
4302 
4303     if (component_num == 0)
4304     {
4305       codes[0] = m_huff_codes[0+0].ptr; codes[1] = m_huff_codes[2+0].ptr;
4306       code_sizes[0] = m_huff_code_sizes[0+0].ptr; code_sizes[1] = m_huff_code_sizes[2+0].ptr;
4307     }
4308     else
4309     {
4310       codes[0] = m_huff_codes[0+1].ptr; codes[1] = m_huff_codes[2+1].ptr;
4311       code_sizes[0] = m_huff_code_sizes[0+1].ptr; code_sizes[1] = m_huff_code_sizes[2+1].ptr;
4312     }
4313 
4314     temp1 = temp2 = pSrc[0]-m_last_dc_val[component_num];
4315     m_last_dc_val[component_num] = pSrc[0];
4316 
4317     if (temp1 < 0)
4318     {
4319       temp1 = -temp1; temp2--;
4320     }
4321 
4322     nbits = 0;
4323     while (temp1)
4324     {
4325       nbits++; temp1 >>= 1;
4326     }
4327 
4328     put_bits(codes[0][nbits], code_sizes[0][nbits]);
4329     if (nbits) put_bits(temp2&((1<<nbits)-1), nbits);
4330 
4331     for (run_len = 0, i = 1; i < 64; i++)
4332     {
4333       if ((temp1 = m_coefficient_array[i]) == 0)
4334         run_len++;
4335       else
4336       {
4337         while (run_len >= 16)
4338         {
4339           put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
4340           run_len -= 16;
4341         }
4342         if ((temp2 = temp1) < 0)
4343         {
4344           temp1 = -temp1;
4345           temp2--;
4346         }
4347         nbits = 1;
4348         while (temp1 >>= 1)
4349           nbits++;
4350         j = (run_len<<4)+nbits;
4351         put_bits(codes[1][j], code_sizes[1][j]);
4352         put_bits(temp2&((1<<nbits)-1), nbits);
4353         run_len = 0;
4354       }
4355     }
4356     if (run_len)
4357       put_bits(codes[1][0], code_sizes[1][0]);
4358   }
4359 
4360   void code_block (int component_num) {
4361     DCT2D(m_sample_array.ptr);
4362     load_quantized_coefficients(component_num);
4363     if (m_pass_num == 1)
4364       code_coefficients_pass_one(component_num);
4365     else
4366       code_coefficients_pass_two(component_num);
4367   }
4368 
4369   void process_mcu_row () {
4370     if (m_num_components == 1)
4371     {
4372       for (int i = 0; i < m_mcus_per_row; i++)
4373       {
4374         load_block_8_8_grey(i); code_block(0);
4375       }
4376     }
4377     else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
4378     {
4379       for (int i = 0; i < m_mcus_per_row; i++)
4380       {
4381         load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
4382       }
4383     }
4384     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
4385     {
4386       for (int i = 0; i < m_mcus_per_row; i++)
4387       {
4388         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4389         load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
4390       }
4391     }
4392     else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
4393     {
4394       for (int i = 0; i < m_mcus_per_row; i++)
4395       {
4396         load_block_8_8(i*2+0, 0, 0); code_block(0); load_block_8_8(i*2+1, 0, 0); code_block(0);
4397         load_block_8_8(i*2+0, 1, 0); code_block(0); load_block_8_8(i*2+1, 1, 0); code_block(0);
4398         load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
4399       }
4400     }
4401   }
4402 
4403   bool terminate_pass_one () {
4404     optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES);
4405     if (m_num_components > 1)
4406     {
4407       optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES);
4408     }
4409     return second_pass_init();
4410   }
4411 
4412   bool terminate_pass_two () {
4413     put_bits(0x7F, 7);
4414     flush_output_buffer();
4415     emit_marker(M_EOI);
4416     m_pass_num++; // purposely bump up m_pass_num, for debugging
4417     return true;
4418   }
4419 
4420   bool process_end_of_image () {
4421     if (m_mcu_y_ofs)
4422     {
4423       if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
4424       {
4425         for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) {
4426           import core.stdc..string : memcpy;
4427           memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs-1], m_image_bpl_mcu);
4428         }
4429       }
4430       process_mcu_row();
4431     }
4432 
4433     if (m_pass_num == 1)
4434       return terminate_pass_one();
4435     else
4436       return terminate_pass_two();
4437   }
4438 
4439   void load_mcu (const(void)* pSrc) {
4440     import core.stdc..string : memcpy;
4441     const(ubyte)* Psrc = cast(const(ubyte)*)(pSrc);
4442 
4443     ubyte* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
4444 
4445     if (m_num_components == 1)
4446     {
4447       if (m_image_bpp == 4)
4448         RGBA_to_Y(pDst, Psrc, m_image_x);
4449       else if (m_image_bpp == 3)
4450         RGB_to_Y(pDst, Psrc, m_image_x);
4451       else
4452         memcpy(pDst, Psrc, m_image_x);
4453     }
4454     else
4455     {
4456       if (m_image_bpp == 4)
4457         RGBA_to_YCC(pDst, Psrc, m_image_x);
4458       else if (m_image_bpp == 3)
4459         RGB_to_YCC(pDst, Psrc, m_image_x);
4460       else
4461         Y_to_YCC(pDst, Psrc, m_image_x);
4462     }
4463 
4464     // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
4465     if (m_num_components == 1) {
4466       import core.stdc..string : memset;
4467       memset(m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt, pDst[m_image_bpl_xlt-1], m_image_x_mcu-m_image_x);
4468     } else
4469     {
4470       const ubyte y = pDst[m_image_bpl_xlt-3+0], cb = pDst[m_image_bpl_xlt-3+1], cr = pDst[m_image_bpl_xlt-3+2];
4471       ubyte *q = m_mcu_lines[m_mcu_y_ofs]+m_image_bpl_xlt;
4472       for (int i = m_image_x; i < m_image_x_mcu; i++)
4473       {
4474         *q++ = y; *q++ = cb; *q++ = cr;
4475       }
4476     }
4477 
4478     if (++m_mcu_y_ofs == m_mcu_y)
4479     {
4480       process_mcu_row();
4481       m_mcu_y_ofs = 0;
4482     }
4483   }
4484 
4485   void clear() {
4486     m_mcu_lines[0] = null;
4487     m_pass_num = 0;
4488     m_all_stream_writes_succeeded = true;
4489   }
4490 
4491 
4492 public:
4493   //this () { clear(); }
4494   ~this () { deinit(); }
4495 
4496   @disable this (this); // no copies
4497 
4498   // Initializes the compressor.
4499   // pStream: The stream object to use for writing compressed data.
4500   // comp_params - Compression parameters structure, defined above.
4501   // width, height  - Image dimensions.
4502   // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
4503   // Returns false on out of memory or if a stream write fails.
4504   bool setup() (WriteFunc pStream, int width, int height, int src_channels, in auto ref JpegParams comp_params) {
4505     deinit();
4506     if ((pStream is null || width < 1 || height < 1) || (src_channels != 1 && src_channels != 3 && src_channels != 4) || !comp_params.check()) return false;
4507     m_pStream = pStream;
4508     m_params = comp_params;
4509     return jpg_open(width, height, src_channels);
4510   }
4511 
4512   bool setup() (WriteFunc pStream, int width, int height, int src_channels) { return setup(pStream, width, height, src_channels, JpegParams()); }
4513 
4514   @property ref inout(JpegParams) params () return inout pure nothrow @safe @nogc { pragma(inline, true); return m_params; }
4515 
4516   // Deinitializes the compressor, freeing any allocated memory. May be called at any time.
4517   void deinit () {
4518     jpge_free(m_mcu_lines[0]);
4519     clear();
4520   }
4521 
4522   @property uint total_passes () const pure nothrow @safe @nogc { pragma(inline, true); return (m_params.twoPass ? 2 : 1); }
4523   @property uint cur_pass () const pure nothrow @safe @nogc { pragma(inline, true); return m_pass_num; }
4524 
4525   // Call this method with each source scanline.
4526   // width*src_channels bytes per scanline is expected (RGB or Y format).
4527   // You must call with null after all scanlines are processed to finish compression.
4528   // Returns false on out of memory or if a stream write fails.
4529   bool process_scanline (const(void)* pScanline) {
4530     if (m_pass_num < 1 || m_pass_num > 2) return false;
4531     if (m_all_stream_writes_succeeded) {
4532       if (pScanline is null) {
4533         if (!process_end_of_image()) return false;
4534       } else {
4535         load_mcu(pScanline);
4536       }
4537     }
4538     return m_all_stream_writes_succeeded;
4539   }
4540 }