Basic image/jpeg decoder.

This is not a complete JPEG implementation (e.g. it does not handle progressive JPEGs or restart markers), but I was able to take a photo with my phone, and view the resultant JPEG in pure Go. The decoder is simple, but slow. The Huffman decoder in particular should be easily improvable, but optimization is left to future changelists. Being able to inline functions in the inner loop should also help performance. The output is not pixel-for-pixel identical to libjpeg, although identical behavior isn't necessarily a goal, since JPEG is a lossy codec. There are at least two reasons for the discrepancy. First, the inverse DCT algorithm used is the same as Plan9's src/cmd/jpg, which has different rounding errors from libjpeg's default IDCT implementation. Note that libjpeg actually has three different IDCT implementations: one floating point, and two fixed point. Out of those four, Plan9's seemed the simplest to understand, partly because it has no #ifdef's or C macros. Second, for 4:2:2 or 4:2:0 chroma sampling, this implementation does nearest neighbor upsampling, compared to libjpeg's triangle filter (e.g. see h2v1_fancy_upsample in jdsample.c). The difference from the first reason is typically zero, but sometimes 1 (out of 256) in YCbCr space, or double that in RGB space. The difference from the second reason can be as large as 8/256 in YCbCr space, in regions of steep chroma gradients. Informal eyeballing suggests that the net difference is typically imperceptible, though. R=r CC=golang-dev, rsc https://golang.org/cl/164056

Basic image/jpeg decoder.
This is not a complete JPEG implementation (e.g. it does not handle progressive JPEGs or restart markers), but I was able to take a photo with my phone, and view the resultant JPEG in pure Go. The decoder is simple, but slow. The Huffman decoder in particular should be easily improvable, but optimization is left to future changelists. Being able to inline functions in the inner loop should also help performance. The output is not pixel-for-pixel identical to libjpeg, although identical behavior isn't necessarily a goal, since JPEG is a lossy codec. There are at least two reasons for the discrepancy. First, the inverse DCT algorithm used is the same as Plan9's src/cmd/jpg, which has different rounding errors from libjpeg's default IDCT implementation. Note that libjpeg actually has three different IDCT implementations: one floating point, and two fixed point. Out of those four, Plan9's seemed the simplest to understand, partly because it has no #ifdef's or C macros. Second, for 4:2:2 or 4:2:0 chroma sampling, this implementation does nearest neighbor upsampling, compared to libjpeg's triangle filter (e.g. see h2v1_fancy_upsample in jdsample.c). The difference from the first reason is typically zero, but sometimes 1 (out of 256) in YCbCr space, or double that in RGB space. The difference from the second reason can be as large as 8/256 in YCbCr space, in regions of steep chroma gradients. Informal eyeballing suggests that the net difference is typically imperceptible, though. R=r CC=golang-dev, rsc https://golang.org/cl/164056
8bf58725 · Nigel Tao · 2e5a7206 · 8bf58725 · 8bf58725 · 8bf58725
Commit 8bf58725 authored Dec 17, 2009 by Nigel Tao
5 changed files
--- a/src/pkg/Makefile
+++ b/src/pkg/Makefile
@@ -72,6 +72,7 @@ DIRS=\
 	hash/crc32\
 	http\
 	image\
+	image/jpeg\
 	image/png\
 	io\
 	io/ioutil\
@@ -117,6 +118,7 @@ NOTEST=\
 	go/token\
 	hash\
 	image\
+	image/jpeg\
 	malloc\
 	rand\
 	runtime\

--- a/src/pkg/image/jpeg/Makefile
+++ b/src/pkg/image/jpeg/Makefile
+# Copyright 2009 The Go Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style
+# license that can be found in the LICENSE file.
+
+include $(GOROOT)/src/Make.$(GOARCH)
+
+TARG=image/jpeg
+GOFILES=\
+	huffman.go\
+	idct.go\
+	reader.go\
+
+include $(GOROOT)/src/Make.pkg
--- a/src/pkg/image/jpeg/huffman.go
+++ b/src/pkg/image/jpeg/huffman.go
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package jpeg
+
+import (
+	"io"
+	"os"
+)
+
+// Each code is at most 16 bits long.
+const maxCodeLength = 16
+
+// Each decoded value is a uint8, so there are at most 256 such values.
+const maxNumValues = 256
+
+// Bit stream for the Huffman decoder.
+// The n least significant bits of a form the unread bits, to be read in MSB to LSB order.
+type bits struct {
+	a int // accumulator.
+	n int // the number of unread bits in a.
+	m int // mask. m==1<<(n-1) when n>0, with m==0 when n==0.
+}
+
+// Huffman table decoder, specified in section C.
+type huffman struct {
+	l        [maxCodeLength]int
+	length   int                 // sum of l[i].
+	val      [maxNumValues]uint8 // the decoded values, as sorted by their encoding.
+	size     [maxNumValues]int   // size[i] is the number of bits to encode val[i].
+	code     [maxNumValues]int   // code[i] is the encoding of val[i].
+	minCode  [maxCodeLength]int  // min codes of length i, or -1 if no codes of that length.
+	maxCode  [maxCodeLength]int  // max codes of length i, or -1 if no codes of that length.
+	valIndex [maxCodeLength]int  // index into val of minCode[i].
+}
+
+// Reads bytes from the io.Reader to ensure that bits.n is at least n.
+func (d *decoder) ensureNBits(n int) os.Error {
+	for d.b.n < n {
+		c, err := d.r.ReadByte()
+		if err != nil {
+			return err
+		}
+		d.b.a = d.b.a<<8 | int(c)
+		d.b.n += 8
+		if d.b.m == 0 {
+			d.b.m = 1 << 7
+		} else {
+			d.b.m <<= 8
+		}
+		// Byte stuffing, specified in section F.1.2.3.
+		if c == 0xff {
+			c, err = d.r.ReadByte()
+			if err != nil {
+				return err
+			}
+			if c != 0x00 {
+				return FormatError("missing 0xff00 sequence")
+			}
+		}
+	}
+	return nil
+}
+
+// The composition of RECEIVE and EXTEND, specified in section F.2.2.1.
+func (d *decoder) receiveExtend(t uint8) (int, os.Error) {
+	err := d.ensureNBits(int(t))
+	if err != nil {
+		return 0, err
+	}
+	d.b.n -= int(t)
+	d.b.m >>= t
+	s := 1 << t
+	x := (d.b.a >> uint8(d.b.n)) & (s - 1)
+	if x < s>>1 {
+		x += ((-1) << t) + 1
+	}
+	return x, nil
+}
+
+// Processes a Define Huffman Table marker, and initializes a huffman struct from its contents.
+// Specified in section B.2.4.2.
+func (d *decoder) processDHT(n int) os.Error {
+	for n > 0 {
+		if n < 17 {
+			return FormatError("DHT has wrong length")
+		}
+		_, err := io.ReadFull(d.r, d.tmp[0:17])
+		if err != nil {
+			return err
+		}
+		tc := d.tmp[0] >> 4
+		if tc > maxTc {
+			return FormatError("bad Tc value")
+		}
+		th := d.tmp[0] & 0x0f
+		const isBaseline = true // Progressive mode is not yet supported.
+		if th > maxTh || isBaseline && th > 1 {
+			return FormatError("bad Th value")
+		}
+		h := &d.huff[tc][th]
+
+		// Read l and val (and derive length).
+		h.length = 0
+		for i := 0; i < maxCodeLength; i++ {
+			h.l[i] = int(d.tmp[i+1])
+			h.length += h.l[i]
+		}
+		if h.length == 0 {
+			return FormatError("Huffman table has zero length")
+		}
+		if h.length > maxNumValues {
+			return FormatError("Huffman table has excessive length")
+		}
+		n -= h.length + 17
+		if n < 0 {
+			return FormatError("DHT has wrong length")
+		}
+		_, err = io.ReadFull(d.r, h.val[0:h.length])
+		if err != nil {
+			return err
+		}
+
+		// Derive size.
+		k := 0
+		for i := 0; i < maxCodeLength; i++ {
+			for j := 0; j < h.l[i]; j++ {
+				h.size[k] = i + 1
+				k++
+			}
+		}
+
+		// Derive code.
+		code := 0
+		size := h.size[0]
+		for i := 0; i < h.length; i++ {
+			if size != h.size[i] {
+				code <<= uint8(h.size[i] - size)
+				size = h.size[i]
+			}
+			h.code[i] = code
+			code++
+		}
+
+		// Derive minCode, maxCode, and valIndex.
+		k = 0
+		index := 0
+		for i := 0; i < maxCodeLength; i++ {
+			if h.l[i] == 0 {
+				h.minCode[i] = -1
+				h.maxCode[i] = -1
+				h.valIndex[i] = -1
+			} else {
+				h.minCode[i] = k
+				h.maxCode[i] = k + h.l[i] - 1
+				h.valIndex[i] = index
+				k += h.l[i]
+				index += h.l[i]
+			}
+			k <<= 1
+		}
+	}
+	return nil
+}
+
+// Returns the next Huffman-coded value from the bit stream, decoded according to h.
+// TODO(nigeltao): This decoding algorithm is simple, but slow. A lookahead table, instead of always
+// peeling off only 1 bit at at time, ought to be faster.
+func (d *decoder) decodeHuffman(h *huffman) (uint8, os.Error) {
+	if h.length == 0 {
+		return 0, FormatError("uninitialized Huffman table")
+	}
+	for i, code := 0, 0; i < maxCodeLength; i++ {
+		err := d.ensureNBits(1)
+		if err != nil {
+			return 0, err
+		}
+		if d.b.a&d.b.m != 0 {
+			code |= 1
+		}
+		d.b.n--
+		d.b.m >>= 1
+		if code <= h.maxCode[i] {
+			return h.val[h.valIndex[i]+code-h.minCode[i]], nil
+		}
+		code <<= 1
+	}
+	return 0, FormatError("bad Huffman code")
+}
--- a/src/pkg/image/jpeg/idct.go
+++ b/src/pkg/image/jpeg/idct.go
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This is a Go translation of idct.c from
+//
+// http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/Video/verifier/mpeg2decode_960109.tar.gz
+//
+// which carries the following notice:
+
+/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
+
+/*
+ * Disclaimer of Warranty
+ *
+ * These software programs are available to the user without any license fee or
+ * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
+ * any and all warranties, whether express, implied, or statuary, including any
+ * implied warranties or merchantability or of fitness for a particular
+ * purpose.  In no event shall the copyright-holder be liable for any
+ * incidental, punitive, or consequential damages of any kind whatsoever
+ * arising from the use of these programs.
+ *
+ * This disclaimer of warranty extends to the user of these programs and user's
+ * customers, employees, agents, transferees, successors, and assigns.
+ *
+ * The MPEG Software Simulation Group does not represent or warrant that the
+ * programs furnished hereunder are free of infringement of any third-party
+ * patents.
+ *
+ * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
+ * are subject to royalty fees to patent holders.  Many of these patents are
+ * general enough such that they are unavoidable regardless of implementation
+ * design.
+ *
+ */
+
+package jpeg
+
+const (
+	w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
+	w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16)
+	w3 = 2408 // 2048*sqrt(2)*cos(3*pi/16)
+	w5 = 1609 // 2048*sqrt(2)*cos(5*pi/16)
+	w6 = 1108 // 2048*sqrt(2)*cos(6*pi/16)
+	w7 = 565  // 2048*sqrt(2)*cos(7*pi/16)
+
+	w1pw7 = w1 + w7
+	w1mw7 = w1 - w7
+	w2pw6 = w2 + w6
+	w2mw6 = w2 - w6
+	w3pw5 = w3 + w5
+	w3mw5 = w3 - w5
+
+	r2 = 181 // 256/sqrt(2)
+)
+
+// 2-D Inverse Discrete Cosine Transformation, followed by a +128 level shift.
+//
+// The input coefficients should already have been multiplied by the appropriate quantization table.
+// We use fixed-point computation, with the number of bits for the fractional component varying over the
+// intermediate stages. The final values are expected to range within [0, 255], after a +128 level shift.
+//
+// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the discrete W transform and
+// for the discrete Fourier transform", IEEE Trans. on ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
+func idct(b *[blockSize]int) {
+	// Horizontal 1-D IDCT.
+	for y := 0; y < 8; y++ {
+		// If all the AC components are zero, then the IDCT is trivial.
+		if b[y*8+1] == 0 && b[y*8+2] == 0 && b[y*8+3] == 0 &&
+			b[y*8+4] == 0 && b[y*8+5] == 0 && b[y*8+6] == 0 && b[y*8+7] == 0 {
+			dc := b[y*8+0] << 3
+			b[y*8+0] = dc
+			b[y*8+1] = dc
+			b[y*8+2] = dc
+			b[y*8+3] = dc
+			b[y*8+4] = dc
+			b[y*8+5] = dc
+			b[y*8+6] = dc
+			b[y*8+7] = dc
+			continue
+		}
+
+		// Prescale.
+		x0 := (b[y*8+0] << 11) + 128
+		x1 := b[y*8+4] << 11
+		x2 := b[y*8+6]
+		x3 := b[y*8+2]
+		x4 := b[y*8+1]
+		x5 := b[y*8+7]
+		x6 := b[y*8+5]
+		x7 := b[y*8+3]
+
+		// Stage 1.
+		x8 := w7 * (x4 + x5)
+		x4 = x8 + w1mw7*x4
+		x5 = x8 - w1pw7*x5
+		x8 = w3 * (x6 + x7)
+		x6 = x8 - w3mw5*x6
+		x7 = x8 - w3pw5*x7
+
+		// Stage 2.
+		x8 = x0 + x1
+		x0 -= x1
+		x1 = w6 * (x3 + x2)
+		x2 = x1 - w2pw6*x2
+		x3 = x1 + w2mw6*x3
+		x1 = x4 + x6
+		x4 -= x6
+		x6 = x5 + x7
+		x5 -= x7
+
+		// Stage 3.
+		x7 = x8 + x3
+		x8 -= x3
+		x3 = x0 + x2
+		x0 -= x2
+		x2 = (r2*(x4+x5) + 128) >> 8
+		x4 = (r2*(x4-x5) + 128) >> 8
+
+		// Stage 4.
+		b[8*y+0] = (x7 + x1) >> 8
+		b[8*y+1] = (x3 + x2) >> 8
+		b[8*y+2] = (x0 + x4) >> 8
+		b[8*y+3] = (x8 + x6) >> 8
+		b[8*y+4] = (x8 - x6) >> 8
+		b[8*y+5] = (x0 - x4) >> 8
+		b[8*y+6] = (x3 - x2) >> 8
+		b[8*y+7] = (x7 - x1) >> 8
+	}
+
+	// Vertical 1-D IDCT.
+	for x := 0; x < 8; x++ {
+		// Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
+		// However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
+		// we do not bother to check for the all-zero case.
+
+		// Prescale.
+		y0 := (b[8*0+x] << 8) + 8192
+		y1 := b[8*4+x] << 8
+		y2 := b[8*6+x]
+		y3 := b[8*2+x]
+		y4 := b[8*1+x]
+		y5 := b[8*7+x]
+		y6 := b[8*5+x]
+		y7 := b[8*3+x]
+
+		// Stage 1.
+		y8 := w7*(y4+y5) + 4
+		y4 = (y8 + w1mw7*y4) >> 3
+		y5 = (y8 - w1pw7*y5) >> 3
+		y8 = w3*(y6+y7) + 4
+		y6 = (y8 - w3mw5*y6) >> 3
+		y7 = (y8 - w3pw5*y7) >> 3
+
+		// Stage 2.
+		y8 = y0 + y1
+		y0 -= y1
+		y1 = w6*(y3+y2) + 4
+		y2 = (y1 - w2pw6*y2) >> 3
+		y3 = (y1 + w2mw6*y3) >> 3
+		y1 = y4 + y6
+		y4 -= y6
+		y6 = y5 + y7
+		y5 -= y7
+
+		// Stage 3.
+		y7 = y8 + y3
+		y8 -= y3
+		y3 = y0 + y2
+		y0 -= y2
+		y2 = (r2*(y4+y5) + 128) >> 8
+		y4 = (r2*(y4-y5) + 128) >> 8
+
+		// Stage 4.
+		b[8*0+x] = (y7 + y1) >> 14
+		b[8*1+x] = (y3 + y2) >> 14
+		b[8*2+x] = (y0 + y4) >> 14
+		b[8*3+x] = (y8 + y6) >> 14
+		b[8*4+x] = (y8 - y6) >> 14
+		b[8*5+x] = (y0 - y4) >> 14
+		b[8*6+x] = (y3 - y2) >> 14
+		b[8*7+x] = (y7 - y1) >> 14
+	}
+
+	// Level shift.
+	for i := range *b {
+		b[i] += 128
+	}
+}
--- a/src/pkg/image/jpeg/reader.go
+++ b/src/pkg/image/jpeg/reader.go