[image] font/sfnt: parse the CFF table.

Nigel Tao (Gerrit)

unread,

Nov 27, 2016, 6:03:12 AM11/27/16

to Dave Day, Ian Lance Taylor, golang-co...@googlegroups.com

Nigel Tao would like Dave Day to review this change.

font/sfnt: parse the CFF table.

Parsing the per-glyph Charstrings will be a follow-up commit.

Change-Id: I52f849a77dd7fa14b6e07420820bdfb4347c2438
---
A font/sfnt/postscript.go
M font/sfnt/sfnt.go
M font/sfnt/sfnt_test.go
A font/testdata/CFFTest.otf
A font/testdata/CFFTest.sfd
A font/testdata/README
6 files changed, 600 insertions(+), 15 deletions(-)

diff --git a/font/sfnt/postscript.go b/font/sfnt/postscript.go
new file mode 100644
index 0000000..b6cf431
--- /dev/null
+++ b/font/sfnt/postscript.go
@@ -0,0 +1,375 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sfnt
+
+// Compact Font Format (CFF) fonts are written in PostScript, a stack-based
+// programming language.
+//
+// A fundamental concept is a DICT, or a key-value map, expressed in reverse
+// Polish notation. For example, this sequence of operations:
+//	- push the number 379
+//	- version operator
+//	- push the number 392
+//	- Notice operator
+//	- etc
+//	- push the number 100
+//	- push the number 0
+//	- push the number 500
+//	- push the number 800
+//	- FontBBox operator
+//	- etc
+// defines a DICT that maps "version" to the String ID (SID) 379, the copyright
+// "Notice" to the SID 392, the font bounding box "FontBBox" to the four
+// numbers [100, 0, 500, 800], etc.
+//
+// The first 391 String IDs (starting at 0) are predefined, e.g. 379 means
+// "001.000". String ID 392 is not predefined, and is mapped by a separate
+// structure, the "String INDEX", inside the CFF data. (String ID 391 is also
+// not predefined. Specifically for ../testdata/CFFTest.otf, 391 means
+// "uni4E2D", as this font contains a glyph for U+4E2D).
+//
+// The actual glyph vectors are similarly encoded (in PostScript), in a format
+// called Type 2 Charstrings. The wire encoding is similar to but not exactly
+// the same as CFF's. For example, the byte 0x05 means FontBBox for CFF DICTs,
+// but means rlineto (relative line-to) for Type 2 Charstrings. See
+// 5176.CFF.pdf Appendix H and 5177.Type2.pdf Appendix A in the PDF files
+// referenced below.
+//
+// CFF is a stand-alone format, but CFF as used in SFNT fonts have further
+// restrictions. For example, a stand-alone CFF can contain multiple fonts, but
+// https://www.microsoft.com/typography/OTSPEC/cff.htm says that "The Name
+// INDEX in the CFF must contain only one entry; that is, there must be only
+// one font in the CFF FontSet".
+//
+// The relevant specifications are:
+// 	- http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
+// 	- http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
+
+import (
+	"fmt"
+)
+
+const (
+	// 5176.CFF.pdf section 4 "DICT Data" says that "An operator may be
+	// preceded by up to a maximum of 48 operands". Similarly, 5177.Type2.pdf
+	// Appendix B "Type 2 Charstring Implementation Limits" says that "Argument
+	// stack 48".
+	stackSize = 48
+)
+
+func bigEndian(b []byte) uint32 {
+	switch len(b) {
+	case 1:
+		return uint32(b[0])
+	case 2:
+		return uint32(b[0])<<8 | uint32(b[1])
+	case 3:
+		return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])
+	case 4:
+		return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])
+	}
+	panic("unreachable")
+}
+
+// cffParser parses the CFF table from an SFNT font.
+type cffParser struct {
+	src    *source
+	base   int
+	offset int
+	end    int
+	buf    []byte
+	err    error
+	locBuf [2]uint32
+
+	instructions []byte
+
+	stack struct {
+		a   [stackSize]int32
+		top int32
+	}
+
+	saved struct {
+		charStrings int32
+	}
+}
+
+func (p *cffParser) parse() (locations []uint32, err error) {
+	// Parse header.
+	{
+		if !p.read(4) {
+			return nil, p.err
+		}
+		if p.buf[0] != 1 || p.buf[1] != 0 || p.buf[2] != 4 {
+			return nil, errUnsupportedCFFVersion
+		}
+	}
+
+	// Parse Name INDEX.
+	{
+		count, offSize, ok := p.parseIndexHeader()
+		if !ok {
+			return nil, p.err
+		}
+		// https://www.microsoft.com/typography/OTSPEC/cff.htm says that "The
+		// Name INDEX in the CFF must contain only one entry".
+		if count != 1 {
+			return nil, errInvalidCFFTable
+		}
+		if !p.parseIndexLocations(p.locBuf[:2], count, offSize) {
+			return nil, p.err
+		}
+		p.offset = int(p.locBuf[1])
+	}
+
+	// Parse Top DICT INDEX.
+	{
+		count, offSize, ok := p.parseIndexHeader()
+		if !ok {
+			return nil, p.err
+		}
+		// 5176.CFF.pdf section 8 "Top DICT INDEX" says that the count here
+		// should match the count of the Name INDEX, which is 1.
+		if count != 1 {
+			return nil, errInvalidCFFTable
+		}
+		if !p.parseIndexLocations(p.locBuf[:2], count, offSize) {
+			return nil, p.err
+		}
+		if !p.read(int(p.locBuf[1] - p.locBuf[0])) {
+			return nil, p.err
+		}
+
+		for p.instructions = p.buf; len(p.instructions) > 0; {
+			p.step()
+			if p.err != nil {
+				return nil, p.err
+			}
+		}
+	}
+
+	// Parse the CharStrings INDEX, whose location was found in the Top DICT.
+	if p.saved.charStrings <= 0 || int32(p.end-p.base) < p.saved.charStrings {
+		return nil, errInvalidCFFTable
+	}
+	p.offset = p.base + int(p.saved.charStrings)
+	count, offSize, ok := p.parseIndexHeader()
+	if !ok {
+		return nil, p.err
+	}
+	if count == 0 {
+		return nil, errInvalidCFFTable
+	}
+	locations = make([]uint32, count+1)
+	if !p.parseIndexLocations(locations, count, offSize) {
+		return nil, p.err
+	}
+	return locations, nil
+}
+
+func (p *cffParser) read(n int) (ok bool) {
+	if p.end-p.offset < n {
+		p.err = errInvalidCFFTable
+		return false
+	}
+	p.buf, p.err = p.src.view(p.buf, p.offset, n)
+	p.offset += n
+	return p.err == nil
+}
+
+func (p *cffParser) parseIndexHeader() (count, offSize int32, ok bool) {
+	if !p.read(2) {
+		return 0, 0, false
+	}
+	count = int32(u16(p.buf[:2]))
+	// 5176.CFF.pdf section 5 "INDEX Data" says that "An empty INDEX is
+	// represented by a count field with a 0 value and no additional fields.
+	// Thus, the total size of an empty INDEX is 2 bytes".
+	if count == 0 {
+		return count, 0, true
+	}
+	if !p.read(1) {
+		return 0, 0, false
+	}
+	offSize = int32(p.buf[0])
+	if offSize < 1 || 4 < offSize {
+		p.err = errInvalidCFFTable
+		return 0, 0, false
+	}
+	return count, offSize, true
+}
+
+func (p *cffParser) parseIndexLocations(dst []uint32, count, offSize int32) (ok bool) {
+	if count == 0 {
+		return true
+	}
+	if len(dst) != int(count+1) {
+		panic("unreachable")
+	}
+	if !p.read(len(dst) * int(offSize)) {
+		return false
+	}
+
+	prev := uint32(0)
+	for i := range dst {
+		loc := bigEndian(p.buf[int32(i+0)*offSize : int32(i+1)*offSize])
+
+		// Locations are off by 1 byte. 5176.CFF.pdf section 5 "INDEX Data"
+		// says that "Offsets in the offset array are relative to the byte that
+		// precedes the object data... This ensures that every object has a
+		// corresponding offset which is always nonzero".
+		if loc == 0 {
+			p.err = errInvalidCFFTable
+			return false
+		}
+		loc--
+
+		// In the same paragraph, "Therefore the first element of the offset
+		// array is always 1" before correcting for the off-by-1.
+		if i == 0 {
+			if loc != 0 {
+				p.err = errInvalidCFFTable
+				break
+			}
+		} else if loc <= prev { // Check that locations are increasing.
+			p.err = errInvalidCFFTable
+			break
+		}
+
+		// Check that locations are in bounds.
+		if uint32(p.end-p.offset) < loc {
+			p.err = errInvalidCFFTable
+			break
+		}
+
+		dst[i] = uint32(p.offset) + loc
+		prev = loc
+	}
+	return p.err == nil
+}
+
+// step executes a single operation, whether pushing a numeric operand onto the
+// stack or executing an operator.
+func (p *cffParser) step() {
+	if number, res := p.parseNumber(); res != prUnknown {
+		if res == prBad || p.stack.top == stackSize {
+			p.err = errInvalidCFFTable
+			return
+		}
+		p.stack.a[p.stack.top] = number
+		p.stack.top++
+		return
+	}
+
+	b0 := p.instructions[0]
+	p.instructions = p.instructions[1:]
+	if int(b0) < len(cff1ByteOperators) {
+		if op := cff1ByteOperators[b0]; op.name != "" {
+			if p.stack.top < op.numPop {
+				p.err = errInvalidCFFTable
+				return
+			}
+			if op.run != nil {
+				op.run(p)
+			}
+			if op.numPop < 0 {
+				p.stack.top = 0
+			} else {
+				p.stack.top -= op.numPop
+			}
+			return
+		}
+	}
+
+	p.err = fmt.Errorf("sfnt: unrecognized CFF 1-byte operator %d", b0)
+}
+
+type parseResult int32
+
+const (
+	prBad     parseResult = -1
+	prUnknown parseResult = +0
+	prGood    parseResult = +1
+)
+
+// See 5176.CFF.pdf section 4 "DICT Data".
+func (p *cffParser) parseNumber() (number int32, res parseResult) {
+	if len(p.instructions) > 0 {
+		switch b0 := p.instructions[0]; {
+		case b0 < 32:
+			switch b0 {
+			case 28:
+				if len(p.instructions) < 3 {
+					return 0, prBad
+				}
+				number = int32(int16(u16(p.instructions[1:])))
+				p.instructions = p.instructions[3:]
+				return number, prGood
+
+			case 29:
+				if len(p.instructions) < 5 {
+					return 0, prBad
+				}
+				number = int32(u32(p.instructions[1:]))
+				p.instructions = p.instructions[5:]
+				return number, prGood
+			}
+
+		case b0 < 247:
+			p.instructions = p.instructions[1:]
+			return int32(b0) - 139, prGood
+
+		case b0 < 251:
+			if len(p.instructions) < 2 {
+				return 0, prBad
+			}
+			b1 := p.instructions[1]
+			p.instructions = p.instructions[2:]
+			return +int32(b0-247)*256 + int32(b1) + 108, prGood
+
+		case b0 < 255:
+			if len(p.instructions) < 2 {
+				return 0, prBad
+			}
+			b1 := p.instructions[1]
+			p.instructions = p.instructions[2:]
+			return -int32(b0-251)*256 - int32(b1) - 108, prGood
+		}
+	}
+
+	return 0, prUnknown
+}
+
+type cffOperator struct {
+	// numPop is the number of stack values to pop. -1 means "array" and -2
+	// means "delta" as per 5176.CFF.pdf Table 6 "Operand Types".
+	numPop int32
+	// name is the operator name. An empty name (i.e. the zero value for the
+	// struct overall) means an unrecognized 1-byte operator.
+	name string
+	// run is the function that implements the operator. Nil means that we
+	// ignore the operator, other than popping its arguments off the stack.
+	run func(*cffParser)
+}
+
+// cff1ByteOperators encodes the subset of 5176.CFF.pdf Table 9 "Top DICT
+// Operator Entries" used by this implementation.
+var cff1ByteOperators = [...]cffOperator{
+	0:  {+1, "version", nil},
+	1:  {+1, "Notice", nil},
+	2:  {+1, "FullName", nil},
+	3:  {+1, "FamilyName", nil},
+	4:  {+1, "Weight", nil},
+	5:  {-1, "FontBBox", nil},
+	13: {+1, "UniqueID", nil},
+	14: {-1, "XUID", nil},
+	15: {+1, "charset", nil},
+	16: {+1, "Encoding", nil},
+	17: {+1, "CharStrings", func(p *cffParser) {
+		p.saved.charStrings = p.stack.a[p.stack.top-1]
+	}},
+	18: {+2, "Private", nil},
+}
+
+// TODO: 2-byte operators.
diff --git a/font/sfnt/sfnt.go b/font/sfnt/sfnt.go
index e0b977f..205031d 100644
--- a/font/sfnt/sfnt.go
+++ b/font/sfnt/sfnt.go
@@ -10,6 +10,9 @@
 // https://www.microsoft.com/en-us/Typography/OpenTypeSpecification.aspx
 // specification. Additional documentation is at
 // http://developer.apple.com/fonts/TTRefMan/
+//
+// The pyftinspect tool from https://github.com/fonttools/fonttools is useful
+// for inspecting SFNT fonts.
 
 import (
 	"errors"
@@ -26,17 +29,21 @@
 )
 
 var (
+	errGlyphIndexOutOfRange = errors.New("sfnt: glyph index out of range")
+
 	errInvalidBounds        = errors.New("sfnt: invalid bounds")
+	errInvalidCFFTable      = errors.New("sfnt: invalid CFF table")
 	errInvalidHeadTable     = errors.New("sfnt: invalid head table")
+	errInvalidLocationData  = errors.New("sfnt: invalid location data")
 	errInvalidMaxpTable     = errors.New("sfnt: invalid maxp table")
 	errInvalidSourceData    = errors.New("sfnt: invalid source data")
 	errInvalidTableOffset   = errors.New("sfnt: invalid table offset")
 	errInvalidTableTagOrder = errors.New("sfnt: invalid table tag order")
 	errInvalidVersion       = errors.New("sfnt: invalid version")
 
+	errUnsupportedCFFVersion        = errors.New("sfnt: unsupported CFF version")
 	errUnsupportedNumberOfTables    = errors.New("sfnt: unsupported number of tables")
 	errUnsupportedTableOffsetLength = errors.New("sfnt: unsupported table offset or length")
-	errUnsupportedVersion           = errors.New("sfnt: unsupported version")
 )
 
 // Units are an integral number of abstract, scalable "font units". The em
@@ -174,7 +181,8 @@
 	// https://www.microsoft.com/typography/otspec/otff.htm#otttables
 	// "Tables Related to PostScript Outlines".
 	//
-	// TODO: cff, cff2, vorg?
+	// TODO: cff2, vorg?
+	cff table
 
 	// https://www.microsoft.com/typography/otspec/otff.htm#otttables
 	// "Advanced Typographic Tables".
@@ -187,13 +195,17 @@
 	// TODO: hdmx, kern, vmtx? Others?
 
 	cached struct {
-		numGlyphs  int
-		unitsPerEm Units
+		isPostScript bool
+		unitsPerEm   Units
+
+		// The glyph data for the glyph index i is in
+		// src[locations[i+0]:locations[i+1]].
+		locations []uint32
 	}
 }
 
 // NumGlyphs returns the number of glyphs in f.
-func (f *Font) NumGlyphs() int { return f.cached.numGlyphs }
+func (f *Font) NumGlyphs() int { return len(f.cached.locations) - 1 }
 
 // UnitsPerEm returns the number of units per em for f.
 func (f *Font) UnitsPerEm() Units { return f.cached.unitsPerEm }
@@ -217,8 +229,7 @@
 	case 0x00010000:
 		// No-op.
 	case 0x4f54544f: // "OTTO".
-		// TODO: support CFF fonts.
-		return errUnsupportedVersion
+		f.cached.isPostScript = true
 	}
 	numTables := int(u16(buf[4:]))
 	if numTables > maxNumTables {
@@ -252,6 +263,8 @@
 
 		// Match the 4-byte tag as a uint32. For example, "OS/2" is 0x4f532f32.
 		switch tag {
+		case 0x43464620:
+			f.cff = table{o, n}
 		case 0x4f532f32:
 			f.os2 = table{o, n}
 		case 0x636d6170:
@@ -291,14 +304,47 @@
 	f.cached.unitsPerEm = Units(u)
 
 	// https://www.microsoft.com/typography/otspec/maxp.htm
-	if f.maxp.length != 32 {
-		return errInvalidMaxpTable
+	if f.cached.isPostScript {
+		if f.maxp.length != 6 {
+			return errInvalidMaxpTable
+		}
+	} else {
+		if f.maxp.length != 32 {
+			return errInvalidMaxpTable
+		}
 	}
 	u, err = f.src.u16(buf, f.maxp, 4)
 	if err != nil {
 		return err
 	}
-	f.cached.numGlyphs = int(u)
+	numGlyphs := int(u)
 
+	if f.cached.isPostScript {
+		p := cffParser{
+			src:    &f.src,
+			base:   int(f.cff.offset),
+			offset: int(f.cff.offset),
+			end:    int(f.cff.offset + f.cff.length),
+		}
+		f.cached.locations, err = p.parse()
+		if err != nil {
+			return err
+		}
+	} else {
+		// TODO: locaParser for TrueType fonts.
+		f.cached.locations = make([]uint32, numGlyphs+1)
+	}
+	if len(f.cached.locations) != numGlyphs+1 {
+		return errInvalidLocationData
+	}
 	return nil
 }
+
+func (f *Font) viewGlyphData(buf []byte, glyphIndex int) ([]byte, error) {
+	if glyphIndex < 0 || f.NumGlyphs() <= glyphIndex {
+		return nil, errGlyphIndexOutOfRange
+	}
+	i := f.cached.locations[glyphIndex+0]
+	j := f.cached.locations[glyphIndex+1]
+	return f.src.view(buf, int(i), int(j-i))
+}
diff --git a/font/sfnt/sfnt_test.go b/font/sfnt/sfnt_test.go
index b80e410..8c9df63 100644
--- a/font/sfnt/sfnt_test.go
+++ b/font/sfnt/sfnt_test.go
@@ -6,28 +6,30 @@
 
 import (
 	"bytes"
+	"io/ioutil"
+	"path/filepath"
 	"testing"
 
 	"golang.org/x/image/font/gofont/goregular"
 )
 
-func TestParse(t *testing.T) {
+func TestTrueTypeParse(t *testing.T) {
 	f, err := Parse(goregular.TTF)
 	if err != nil {
 		t.Fatalf("Parse: %v", err)
 	}
-	testFont(t, f)
+	testTrueType(t, f)
 }
 
-func TestParseReaderAt(t *testing.T) {
+func TestTrueTypeParseReaderAt(t *testing.T) {
 	f, err := ParseReaderAt(bytes.NewReader(goregular.TTF))
 	if err != nil {
 		t.Fatalf("ParseReaderAt: %v", err)
 	}
-	testFont(t, f)
+	testTrueType(t, f)
 }
 
-func testFont(t *testing.T, f *Font) {
+func testTrueType(t *testing.T, f *Font) {
 	if got, want := f.UnitsPerEm(), Units(2048); got != want {
 		t.Errorf("UnitsPerEm: got %d, want %d", got, want)
 	}
@@ -38,3 +40,39 @@
 		t.Errorf("NumGlyphs: got %d, want > %d", got, want)
 	}
 }
+
+func TestPostScript(t *testing.T) {
+	data, err := ioutil.ReadFile(filepath.Join("..", "testdata", "CFFTest.otf"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	f, err := Parse(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// TODO: replace this by a higher level test, once we parse Type 2
+	// Charstrings.
+	//
+	// As a sanity check for now, note that each string ends in '\x0e', which
+	// 5177.Type2.pdf Appendix A defines as "endchar".
+	wants := [...]string{
+		"\xf7\x63\x8b\xbd\xf8\x45\xbd\x01\xbd\xbd\xf7\xc0\xbd\x03\xbd\x16\xf8\x24\xf8\xa9\xfc\x24\x06\xbd\xfc\x77\x15\xf8\x45\xf7\xc0\xfc\x45\x07\x0e",
+		"\x8b\xef\xf8\xec\xef\x01\xef\xdb\xf7\x84\xdb\x03\xf7\xc0\xf9\x50\x15\xdb\xb3\xfb\x0c\x3b\xfb\x2a\x6d\xfb\x8e\x31\x3b\x63\xf7\x0c\xdb\xf7\x2a\xa9\xf7\x8e\xe5\x1f\xef\x04\x27\x27\xfb\x70\xfb\x48\xfb\x48\xef\xfb\x70\xef\xef\xef\xf7\x70\xf7\x48\xf7\x48\x27\xf7\x70\x27\x1f\x0e",
+		"\xf6\xa0\x76\x01\xef\xf7\x5c\x03\xef\x16\xf7\x5c\xf9\xb4\xfb\x5c\x06\x0e",
+		"\xf7\x89\xe1\x03\xf7\x21\xf8\x9c\x15\x87\xfb\x38\xf7\x00\xb7\xe1\xfc\x0a\xa3\xf8\x18\xf7\x00\x9f\x81\xf7\x4e\xfb\x04\x6f\x81\xf7\x3a\x33\x85\x83\xfb\x52\x05\x0e",
+	}
+	if ng := f.NumGlyphs(); ng != len(wants) {
+		t.Fatalf("NumGlyphs: got %d, want %d", ng, len(wants))
+	}
+	for i, want := range wants {
+		gd, err := f.viewGlyphData(nil, i)
+		if err != nil {
+			t.Errorf("i=%d: %v", i, err)
+			continue
+		}
+		if got := string(gd); got != want {
+			t.Errorf("i=%d:\ngot  % x\nwant % x", i, got, want)
+		}
+	}
+}
diff --git a/font/testdata/CFFTest.otf b/font/testdata/CFFTest.otf
new file mode 100644
index 0000000..7f21c52
--- /dev/null
+++ b/font/testdata/CFFTest.otf
Binary files differ
diff --git a/font/testdata/CFFTest.sfd b/font/testdata/CFFTest.sfd
new file mode 100644
index 0000000..deda7fe
--- /dev/null
+++ b/font/testdata/CFFTest.sfd
@@ -0,0 +1,124 @@
+SplineFontDB: 3.0
+FontName: CFFTest
+FullName: CFFTest
+FamilyName: CFFTest
+Weight: Regular
+Copyright: Copyright 2016 The Go Authors. All rights reserved.\nUse of this font is governed by a BSD-style license that can be found at https://golang.org/LICENSE.
+Version: 001.000
+ItalicAngle: 0
+UnderlinePosition: -100
+UnderlineWidth: 50
+Ascent: 800
+Descent: 200
+LayerCount: 2
+Layer: 0 0 "Back"  1
+Layer: 1 0 "Fore"  0
+XUID: [1021 367 888937226 7862908]
+FSType: 8
+OS2Version: 0
+OS2_WeightWidthSlopeOnly: 0
+OS2_UseTypoMetrics: 1
+CreationTime: 1479626795
+ModificationTime: 1480238616
+PfmFamily: 17
+TTFWeight: 400
+TTFWidth: 5
+LineGap: 90
+VLineGap: 0
+OS2TypoAscent: 0
+OS2TypoAOffset: 1
+OS2TypoDescent: 0
+OS2TypoDOffset: 1
+OS2TypoLinegap: 90
+OS2WinAscent: 0
+OS2WinAOffset: 1
+OS2WinDescent: 0
+OS2WinDOffset: 1
+HheadAscent: 0
+HheadAOffset: 1
+HheadDescent: 0
+HheadDOffset: 1
+OS2Vendor: 'PfEd'
+MarkAttachClasses: 1
+DEI: 91125
+LangName: 1033 
+Encoding: UnicodeBmp
+UnicodeInterp: none
+NameList: Adobe Glyph List
+DisplaySize: -24
+AntiAlias: 1
+FitToEm: 1
+WinInfo: 19712 32 23
+BeginPrivate: 0
+EndPrivate
+TeXData: 1 0 0 346030 173015 115343 0 1048576 115343 783286 444596 497025 792723 393216 433062 380633 303038 157286 324010 404750 52429 2506097 1059062 262144
+BeginChars: 65536 3
+
+StartChar: zero
+Encoding: 48 48 0
+Width: 600
+VWidth: 0
+HStem: 0 100<248.223 341.575> 700 100<258.425 351.777>
+VStem: 100 80<243.925 531.374> 420 80<268.627 556.075>
+LayerCount: 2
+Fore
+SplineSet
+300 700 m 0
+ 210 700 180 450 180 300 c 24
+ 180 220 220 100 300 100 c 0
+ 390 100 420 350 420 500 c 24
+ 420 580 380 700 300 700 c 0
+300 800 m 0
+ 400 800 500 580 500 400 c 0
+ 500 220 400 0 300 0 c 0
+ 200 0 100 220 100 400 c 0
+ 100 580 200 800 300 800 c 0
+EndSplineSet
+Validated: 1
+EndChar
+
+StartChar: one
+Encoding: 49 49 1
+Width: 400
+VWidth: 0
+Flags: W
+HStem: 0 21G<100 300>
+VStem: 100 200<0 800>
+LayerCount: 2
+Fore
+SplineSet
+100 0 m 25
+ 100 800 l 25
+ 300 800 l 29
+ 300 0 l 29
+ 100 0 l 25
+EndSplineSet
+Validated: 1
+EndChar
+
+StartChar: uni4E2D
+Encoding: 20013 20013 2
+Width: 600
+VWidth: 0
+Flags: W
+VStem: 245 86<641.8 752>
+LayerCount: 2
+Fore
+SplineSet
+141 520 m 25
+ 235 562 l 25
+ 243 752 l 25
+ 331 758 l 25
+ 341 592 l 25
+ 453 620 l 25
+ 463 434 l 25
+ 355 414 l 25
+ 331 26 l 25
+ 245 400 l 25
+ 137 356 l 25
+ 141 520 l 25
+EndSplineSet
+Validated: 1
+EndChar
+EndChars
+EndSplineFont
diff --git a/font/testdata/README b/font/testdata/README
new file mode 100644
index 0000000..d8737d3
--- /dev/null
+++ b/font/testdata/README
@@ -0,0 +1,2 @@
+CFFTest.sfd is a FontForge file for creating CFFTest.otf, a custom OpenType
+font for testing the golang.org/x/image/font/sfnt package's CFF support.

To view, visit this change. To unsubscribe, visit settings.

Dave Day (Gerrit)

unread,

Nov 29, 2016, 6:38:05 PM11/29/16

to Nigel Tao, golang-co...@googlegroups.com

Dave Day posted comments on this change.

View Change

Patch Set 1:

(7 comments)

File font/sfnt/postscript.go:
- Patch Set #1, Line 27: predefined
  Where?
- Patch Set #1, Line 104: .buf[0] != 1 || p.buf[1] != 0 || p.buf[2] != 4
  Consider if bigEndian(p.buf[:3]) != x { }
- Patch Set #1, Line 171: func (p *cffParser) read(n int) (ok bool) {
  I think read warrants a short comment regarding it's behaviour (particularly wrt p.buf and p.offset)
- Patch Set #1, Line 216: loc := bigEndian(p.buf[int32(i+0)*offSize : int32(i+1)*offSize])
  Maths is hard. Is this more obvious? var loc, prev uint32 buf := p.buf for i := range dst { loc, buf = buf[:offSize], buf[offSize:]
- Patch Set #1, Line 292: prUnknown
  prNone might be more accurate
- Patch Set #1, Line 298: if len(p.instructions) > 0 {
  if len(p.instructions) == 0 { return 0, prUnknown }
- Patch Set #1, Line 301:
```
switch b0 {
			case 28:
```
  Pull these to the top switch (case b == 28) and then make case < 32 just return prNone explicitly.

To view, visit this change. To unsubscribe, visit settings.

Nigel Tao (Gerrit)

unread,

Nov 30, 2016, 3:11:01 AM11/30/16

to Dave Day, golang-co...@googlegroups.com

Nigel Tao posted comments on this change.

View Change

Patch Set 1:

(7 comments)

File font/sfnt/postscript.go:

- Patch Set #1, Line 27: predefined
  Where?

- Done
- Patch Set #1, Line 104: .buf[0] != 1 || p.buf[1] != 0 || p.buf[2] != 4
  Consider
  Considered, but I prefer as is.

- Patch Set #1, Line 171: func (p *cffParser) read(n int) (ok bool) {
  I think read warrants a short comment regarding it's behaviour (particularl

- Done

- Patch Set #1, Line 216: loc := bigEndian(p.buf[int32(i+0)*offSize : int32(i+1)*offSize])
  Maths is hard. Is this more obvious?

- Done

- Patch Set #1, Line 292: prUnknown
  prNone might be more accurate

- Done

- Patch Set #1, Line 298: if len(p.instructions) > 0 {
  if len(p.instructions) == 0 {

- Done

- Patch Set #1, Line 301:
```
switch b0 {
			case 28:
```
  Pull these to the top switch (case b == 28) and then make case < 32 just re

- Done

To view, visit this change. To unsubscribe, visit settings.

Nigel Tao (Gerrit)

unread,

Nov 30, 2016, 3:11:36 AM11/30/16

to Dave Day, golang-co...@googlegroups.com

Nigel Tao uploaded patch set #2 to this change.

View Change

font/sfnt: parse the CFF table.

Parsing the per-glyph Charstrings will be a follow-up commit.

Change-Id: I52f849a77dd7fa14b6e07420820bdfb4347c2438
---
A font/sfnt/postscript.go
M font/sfnt/sfnt.go
M font/sfnt/sfnt_test.go
A font/testdata/CFFTest.otf
A font/testdata/CFFTest.sfd
A font/testdata/README
6 files changed, 614 insertions(+), 15 deletions(-)

To view, visit this change. To unsubscribe, visit settings.

Dave Day (Gerrit)

unread,

Nov 30, 2016, 7:23:29 PM11/30/16

to Nigel Tao, golang-co...@googlegroups.com

Dave Day posted comments on this change.

View Change

Patch Set 2: Code-Review+2

To view, visit this change. To unsubscribe, visit settings.

Nigel Tao (Gerrit)

unread,

Dec 1, 2016, 2:40:51 AM12/1/16

to Dave Day, golang-co...@googlegroups.com

Nigel Tao uploaded patch set #3 to this change.

View Change

font/sfnt: parse the CFF table.

Parsing the per-glyph Charstrings will be a follow-up commit.

Change-Id: I52f849a77dd7fa14b6e07420820bdfb4347c2438
---
A font/sfnt/postscript.go
M font/sfnt/sfnt.go
M font/sfnt/sfnt_test.go
A font/testdata/CFFTest.otf
A font/testdata/CFFTest.sfd
A font/testdata/README
6 files changed, 614 insertions(+), 15 deletions(-)

To view, visit this change. To unsubscribe, visit settings.

Nigel Tao (Gerrit)

unread,

Dec 1, 2016, 2:44:01 AM12/1/16

to golang-...@googlegroups.com, Dave Day, golang-co...@googlegroups.com

Nigel Tao merged this change.

View Change

Approvals:
  Dave Day: Looks good to me, approved

font/sfnt: parse the CFF table.

Parsing the per-glyph Charstrings will be a follow-up commit.

Change-Id: I52f849a77dd7fa14b6e07420820bdfb4347c2438
Reviewed-on: https://go-review.googlesource.com/33593
Reviewed-by: Dave Day <d...@golang.org>
---
A font/sfnt/postscript.go
M font/sfnt/sfnt.go
M font/sfnt/sfnt_test.go
A font/testdata/CFFTest.otf
A font/testdata/CFFTest.sfd
A font/testdata/README
6 files changed, 614 insertions(+), 15 deletions(-)

diff --git a/font/sfnt/postscript.go b/font/sfnt/postscript.go
new file mode 100644
index 0000000..1363dd8
--- /dev/null
+++ b/font/sfnt/postscript.go
@@ -0,0 +1,389 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package sfnt
+
+// Compact Font Format (CFF) fonts are written in PostScript, a stack-based
+// programming language.
+//
+// A fundamental concept is a DICT, or a key-value map, expressed in reverse
+// Polish notation. For example, this sequence of operations:
+//	- push the number 379
+//	- version operator
+//	- push the number 392
+//	- Notice operator
+//	- etc
+//	- push the number 100
+//	- push the number 0
+//	- push the number 500
+//	- push the number 800
+//	- FontBBox operator
+//	- etc
+// defines a DICT that maps "version" to the String ID (SID) 379, the copyright
+// "Notice" to the SID 392, the font bounding box "FontBBox" to the four
+// numbers [100, 0, 500, 800], etc.
+//
+// The first 391 String IDs (starting at 0) are predefined as per the CFF spec
+// Appendix A, in 5176.CFF.pdf referenced below. For example, 379 means
+// "001.000". String ID 392 is not predefined, and is mapped by a separate
+// structure, the "String INDEX", inside the CFF data. (String ID 391 is also
+// not predefined. Specifically for ../testdata/CFFTest.otf, 391 means
+// "uni4E2D", as this font contains a glyph for U+4E2D).
+//
+// The actual glyph vectors are similarly encoded (in PostScript), in a format
+// called Type 2 Charstrings. The wire encoding is similar to but not exactly
+// the same as CFF's. For example, the byte 0x05 means FontBBox for CFF DICTs,
+// but means rlineto (relative line-to) for Type 2 Charstrings. See
+// 5176.CFF.pdf Appendix H and 5177.Type2.pdf Appendix A in the PDF files
+// referenced below.
+//
+// CFF is a stand-alone format, but CFF as used in SFNT fonts have further
+// restrictions. For example, a stand-alone CFF can contain multiple fonts, but
+// https://www.microsoft.com/typography/OTSPEC/cff.htm says that "The Name
+// INDEX in the CFF must contain only one entry; that is, there must be only
+// one font in the CFF FontSet".
+//
+// The relevant specifications are:
+// 	- http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
+// 	- http://wwwimages.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
+
+import (
+	"fmt"
+)
+
+const (
+	// psStackSize is the stack size for a PostScript interpreter. 5176.CFF.pdf
+	// section 4 "DICT Data" says that "An operator may be preceded by up to a
+	// maximum of 48 operands". Similarly, 5177.Type2.pdf Appendix B "Type 2
+	// Charstring Implementation Limits" says that "Argument stack 48".
+	psStackSize = 48
+)
+
+func bigEndian(b []byte) uint32 {
+	switch len(b) {
+	case 1:
+		return uint32(b[0])
+	case 2:
+		return uint32(b[0])<<8 | uint32(b[1])
+	case 3:
+		return uint32(b[0])<<16 | uint32(b[1])<<8 | uint32(b[2])
+	case 4:
+		return uint32(b[0])<<24 | uint32(b[1])<<16 | uint32(b[2])<<8 | uint32(b[3])
+	}
+	panic("unreachable")
+}
+
+// cffParser parses the CFF table from an SFNT font.
+type cffParser struct {
+	src    *source
+	base   int
+	offset int
+	end    int
+	buf    []byte
+	err    error
+	locBuf [2]uint32
+
+	instructions []byte
+
+	stack struct {
+		a   [psStackSize]int32
+		top int32
+	}
+
+	saved struct {
+		charStrings int32
+	}
+}
+
+func (p *cffParser) parse() (locations []uint32, err error) {
+	// Parse header.
+	{
+		if !p.read(4) {
+			return nil, p.err
+		}
+		if p.buf[0] != 1 || p.buf[1] != 0 || p.buf[2] != 4 {
+			return nil, errUnsupportedCFFVersion
+		}
+	}
+
+	// Parse Name INDEX.
+	{
+		count, offSize, ok := p.parseIndexHeader()
+		if !ok {
+			return nil, p.err
+		}
+		// https://www.microsoft.com/typography/OTSPEC/cff.htm says that "The
+		// Name INDEX in the CFF must contain only one entry".
+		if count != 1 {
+			return nil, errInvalidCFFTable
+		}
+		if !p.parseIndexLocations(p.locBuf[:2], count, offSize) {
+			return nil, p.err
+		}
+		p.offset = int(p.locBuf[1])
+	}
+
+	// Parse Top DICT INDEX.
+	{
+		count, offSize, ok := p.parseIndexHeader()
+		if !ok {
+			return nil, p.err
+		}
+		// 5176.CFF.pdf section 8 "Top DICT INDEX" says that the count here
+		// should match the count of the Name INDEX, which is 1.
+		if count != 1 {
+			return nil, errInvalidCFFTable
+		}
+		if !p.parseIndexLocations(p.locBuf[:2], count, offSize) {
+			return nil, p.err
+		}
+		if !p.read(int(p.locBuf[1] - p.locBuf[0])) {
+			return nil, p.err
+		}
+
+		for p.instructions = p.buf; len(p.instructions) > 0; {
+			p.step()
+			if p.err != nil {
+				return nil, p.err
+			}
+		}
+	}
+
+	// Parse the CharStrings INDEX, whose location was found in the Top DICT.
+	if p.saved.charStrings <= 0 || int32(p.end-p.base) < p.saved.charStrings {
+		return nil, errInvalidCFFTable
+	}
+	p.offset = p.base + int(p.saved.charStrings)
+	count, offSize, ok := p.parseIndexHeader()
+	if !ok {
+		return nil, p.err
+	}
+	if count == 0 {
+		return nil, errInvalidCFFTable
+	}
+	locations = make([]uint32, count+1)
+	if !p.parseIndexLocations(locations, count, offSize) {
+		return nil, p.err
+	}
+	return locations, nil
+}
+
+// read sets p.buf to view the n bytes from p.offset to p.offset+n. It also
+// advances p.offset by n.
+//
+// As per the source.view method, the caller should not modify the contents of
+// p.buf after read returns, other than by calling read again.
+//
+// The caller should also avoid modifying the pointer / length / capacity of
+// the p.buf slice, not just avoid modifying the slice's contents, in order to
+// maximize the opportunity to re-use p.buf's allocated memory when viewing the
+// underlying source data for subsequent read calls.
+func (p *cffParser) read(n int) (ok bool) {
+	if p.end-p.offset < n {
+		p.err = errInvalidCFFTable
+		return false
+	}
+	p.buf, p.err = p.src.view(p.buf, p.offset, n)
+	p.offset += n
+	return p.err == nil
+}
+
+func (p *cffParser) parseIndexHeader() (count, offSize int32, ok bool) {
+	if !p.read(2) {
+		return 0, 0, false
+	}
+	count = int32(u16(p.buf[:2]))
+	// 5176.CFF.pdf section 5 "INDEX Data" says that "An empty INDEX is
+	// represented by a count field with a 0 value and no additional fields.
+	// Thus, the total size of an empty INDEX is 2 bytes".
+	if count == 0 {
+		return count, 0, true
+	}
+	if !p.read(1) {
+		return 0, 0, false
+	}
+	offSize = int32(p.buf[0])
+	if offSize < 1 || 4 < offSize {
+		p.err = errInvalidCFFTable
+		return 0, 0, false
+	}
+	return count, offSize, true
+}
+
+func (p *cffParser) parseIndexLocations(dst []uint32, count, offSize int32) (ok bool) {
+	if count == 0 {
+		return true
+	}
+	if len(dst) != int(count+1) {
+		panic("unreachable")
+	}
+	if !p.read(len(dst) * int(offSize)) {
+		return false
+	}
+
+	buf, prev := p.buf, uint32(0)
+	for i := range dst {
+		loc := bigEndian(buf[:offSize])
+		buf = buf[offSize:]
+
+		// Locations are off by 1 byte. 5176.CFF.pdf section 5 "INDEX Data"
+		// says that "Offsets in the offset array are relative to the byte that
+		// precedes the object data... This ensures that every object has a
+		// corresponding offset which is always nonzero".
+		if loc == 0 {
+			p.err = errInvalidCFFTable
+			return false
+		}
+		loc--
+
+		// In the same paragraph, "Therefore the first element of the offset
+		// array is always 1" before correcting for the off-by-1.
+		if i == 0 {
+			if loc != 0 {
+				p.err = errInvalidCFFTable
+				break
+			}
+		} else if loc <= prev { // Check that locations are increasing.
+			p.err = errInvalidCFFTable
+			break
+		}
+
+		// Check that locations are in bounds.
+		if uint32(p.end-p.offset) < loc {
+			p.err = errInvalidCFFTable
+			break
+		}
+
+		dst[i] = uint32(p.offset) + loc
+		prev = loc
+	}
+	return p.err == nil
+}
+
+// step executes a single operation, whether pushing a numeric operand onto the
+// stack or executing an operator.
+func (p *cffParser) step() {
+	if number, res := p.parseNumber(); res != prNone {
+		if res == prBad || p.stack.top == psStackSize {
+			p.err = errInvalidCFFTable
+			return
+		}
+		p.stack.a[p.stack.top] = number
+		p.stack.top++
+		return
+	}
+
+	b0 := p.instructions[0]
+	p.instructions = p.instructions[1:]
+	if int(b0) < len(cff1ByteOperators) {
+		if op := cff1ByteOperators[b0]; op.name != "" {
+			if p.stack.top < op.numPop {
+				p.err = errInvalidCFFTable
+				return
+			}
+			if op.run != nil {
+				op.run(p)
+			}
+			if op.numPop < 0 {
+				p.stack.top = 0
+			} else {
+				p.stack.top -= op.numPop
+			}
+			return
+		}
+	}
+
+	p.err = fmt.Errorf("sfnt: unrecognized CFF 1-byte operator %d", b0)
+}
+
+type parseResult int32
+
+const (
+	prBad  parseResult = -1
+	prNone parseResult = +0
+	prGood parseResult = +1
+)
+
+// See 5176.CFF.pdf section 4 "DICT Data".
+func (p *cffParser) parseNumber() (number int32, res parseResult) {
+	if len(p.instructions) == 0 {
+		return 0, prNone
+	}
+
+	switch b0 := p.instructions[0]; {
+	case b0 == 28:
+		if len(p.instructions) < 3 {
+			return 0, prBad
+		}
+		number = int32(int16(u16(p.instructions[1:])))
+		p.instructions = p.instructions[3:]
+		return number, prGood
+
+	case b0 == 29:
+		if len(p.instructions) < 5 {
+			return 0, prBad
+		}
+		number = int32(u32(p.instructions[1:]))
+		p.instructions = p.instructions[5:]
+		return number, prGood
+
+	case b0 < 32:
+		// No-op.
+
+	case b0 < 247:
+		p.instructions = p.instructions[1:]
+		return int32(b0) - 139, prGood
+
+	case b0 < 251:
+		if len(p.instructions) < 2 {
+			return 0, prBad
+		}
+		b1 := p.instructions[1]
+		p.instructions = p.instructions[2:]
+		return +int32(b0-247)*256 + int32(b1) + 108, prGood
+
+	case b0 < 255:
+		if len(p.instructions) < 2 {
+			return 0, prBad
+		}
+		b1 := p.instructions[1]
+		p.instructions = p.instructions[2:]
+		return -int32(b0-251)*256 - int32(b1) - 108, prGood
+	}
+
+	return 0, prNone
+}
+
+type cffOperator struct {
+	// numPop is the number of stack values to pop. -1 means "array" and -2
+	// means "delta" as per 5176.CFF.pdf Table 6 "Operand Types".
+	numPop int32
+	// name is the operator name. An empty name (i.e. the zero value for the
+	// struct overall) means an unrecognized 1-byte operator.
+	name string
+	// run is the function that implements the operator. Nil means that we
+	// ignore the operator, other than popping its arguments off the stack.
+	run func(*cffParser)
+}
+
+// cff1ByteOperators encodes the subset of 5176.CFF.pdf Table 9 "Top DICT
+// Operator Entries" used by this implementation.
+var cff1ByteOperators = [...]cffOperator{
+	0:  {+1, "version", nil},
+	1:  {+1, "Notice", nil},
+	2:  {+1, "FullName", nil},
+	3:  {+1, "FamilyName", nil},
+	4:  {+1, "Weight", nil},
+	5:  {-1, "FontBBox", nil},
+	13: {+1, "UniqueID", nil},
+	14: {-1, "XUID", nil},
+	15: {+1, "charset", nil},
+	16: {+1, "Encoding", nil},
+	17: {+1, "CharStrings", func(p *cffParser) {
+		p.saved.charStrings = p.stack.a[p.stack.top-1]
+	}},
+	18: {+2, "Private", nil},
+}
+
+// TODO: 2-byte operators.
diff --git a/font/sfnt/sfnt.go b/font/sfnt/sfnt.go
index e0b977f..205031d 100644
--- a/font/sfnt/sfnt.go
+++ b/font/sfnt/sfnt.go
@@ -10,6 +10,9 @@
 // https://www.microsoft.com/en-us/Typography/OpenTypeSpecification.aspx

Reply all

Reply to author

Forward