Commit 8aeb8647 authored by Robert Griesemer's avatar Robert Griesemer

- handle UTF-8 text in tabwriter

R=r
DELTA=84  (27 added, 3 deleted, 54 changed)
OCL=20539
CL=20584
parent c1868bc8
......@@ -8,12 +8,12 @@ import (
"os";
"io";
"array";
"utf8";
)
// ----------------------------------------------------------------------------
// ByteArray
// TODO should use a ByteArray library eventually
type ByteArray struct {
a *[]byte;
......@@ -62,11 +62,13 @@ func (b *ByteArray) Append(s *[]byte) {
// ----------------------------------------------------------------------------
// Writer is a filter implementing the io.Write interface. It assumes
// that the incoming bytes represent ASCII encoded text consisting of
// that the incoming bytes represent UTF-8 encoded text consisting of
// lines of tab-terminated "cells". Cells in adjacent lines constitute
// a column. Writer rewrites the incoming text such that all cells in
// a column have the same width; thus it effectively aligns cells. It
// does this by adding padding where necessary.
// does this by adding padding where necessary. All characters (ASCII
// or not) are assumed to be of the same width - this may not be true
// for arbitrary UTF-8 characters visualized on the screen.
//
// Note that any text at the end of a line that is not tab-terminated
// is not a cell and does not enforce alignment of cells in adjacent
......@@ -84,8 +86,6 @@ func (b *ByteArray) Append(s *[]byte) {
// (for correct-looking results, cellwidth must correspond
// to the tabwidth in the editor used to look at the result)
// TODO Should support UTF-8 (requires more complicated width bookkeeping)
export type Writer struct {
// TODO should not export any of the fields
......@@ -97,15 +97,18 @@ export type Writer struct {
align_left bool;
// current state
buf ByteArray; // the collected text w/o tabs and newlines
width int; // width of last incomplete cell
lines array.Array; // list of lines; each line is a list of cell widths
widths array.IntArray; // list of column widths - re-used during formatting
buf ByteArray; // collected text w/o tabs and newlines
size int; // size of last incomplete cell in bytes
width int; // width of last incomplete cell in runes
lines_size array.Array; // list of lines; each line is a list of cell sizes in bytes
lines_width array.Array; // list of lines; each line is a list of cell widths in runes
widths array.IntArray; // list of column widths in runes - re-used during formatting
}
func (b *Writer) AddLine() {
b.lines.Push(array.NewIntArray(0));
b.lines_size.Push(array.NewIntArray(0));
b.lines_width.Push(array.NewIntArray(0));
}
......@@ -125,7 +128,8 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali
b.align_left = align_left || padchar == '\t'; // tab enforces left-alignment
b.buf.Init(1024);
b.lines.Init(0);
b.lines_size.Init(0);
b.lines_width.Init(0);
b.widths.Init(0);
b.AddLine(); // the very first line
......@@ -133,21 +137,23 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali
}
func (b *Writer) Line(i int) *array.IntArray {
return b.lines.At(i).(*array.IntArray);
func (b *Writer) Line(i int) (*array.IntArray, *array.IntArray) {
return
b.lines_size.At(i).(*array.IntArray),
b.lines_width.At(i).(*array.IntArray);
}
// debugging support
func (b *Writer) Dump() {
pos := 0;
for i := 0; i < b.lines.Len(); i++ {
line := b.Line(i);
for i := 0; i < b.lines_size.Len(); i++ {
line_size, line_width := b.Line(i);
print("(", i, ") ");
for j := 0; j < line.Len(); j++ {
w := line.At(j);
print("[", string(b.buf.Slice(pos, pos + w)), "]");
pos += w;
for j := 0; j < line_size.Len(); j++ {
s := line_size.At(j);
print("[", string(b.buf.Slice(pos, pos + s)), "]");
pos += s;
}
print("\n");
}
......@@ -198,16 +204,16 @@ exit:
func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error) {
pos = pos0;
for i := line0; i < line1; i++ {
line := b.Line(i);
for j := 0; j < line.Len(); j++ {
w := line.At(j);
line_size, line_width := b.Line(i);
for j := 0; j < line_size.Len(); j++ {
s, w := line_size.At(j), line_width.At(j);
if b.align_left {
err = b.Write0(b.buf.a[pos : pos + w]);
err = b.Write0(b.buf.a[pos : pos + s]);
if err != nil {
goto exit;
}
pos += w;
pos += s;
if j < b.widths.Len() {
err = b.WritePadding(w, b.widths.At(j));
if err != nil {
......@@ -223,20 +229,20 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error)
goto exit;
}
}
err = b.Write0(b.buf.a[pos : pos + w]);
err = b.Write0(b.buf.a[pos : pos + s]);
if err != nil {
goto exit;
}
pos += w;
pos += s;
}
}
if i+1 == b.lines.Len() {
if i+1 == b.lines_size.Len() {
// last buffered line - we don't have a newline, so just write
// any outstanding buffered data
err = b.Write0(b.buf.a[pos : pos + b.width]);
pos += b.width;
b.width = 0;
err = b.Write0(b.buf.a[pos : pos + b.size]);
pos += b.size;
b.size, b.width = 0, 0;
} else {
// not the last line - write newline
err = b.Write0(Newline);
......@@ -256,9 +262,9 @@ func (b *Writer) Format(pos0 int, line0, line1 int) (pos int, err *os.Error) {
column := b.widths.Len();
last := line0;
for this := line0; this < line1; this++ {
line := b.Line(this);
line_size, line_width := b.Line(this);
if column < line.Len() - 1 {
if column < line_size.Len() - 1 {
// cell exists in this column
// (note that the last cell per line is ignored)
......@@ -272,10 +278,10 @@ func (b *Writer) Format(pos0 int, line0, line1 int) (pos int, err *os.Error) {
// column block begin
width := b.cellwidth; // minimal width
for ; this < line1; this++ {
line = b.Line(this);
if column < line.Len() - 1 {
line_size, line_width = b.Line(this);
if column < line_size.Len() - 1 {
// cell exists in this column => update width
w := line.At(column) + b.padding;
w := line_width.At(column) + b.padding;
if w > width {
width = w;
}
......@@ -302,18 +308,35 @@ exit:
}
func UnicodeLen(buf *[]byte) int {
l := 0;
for i := 0; i < len(buf); {
if buf[i] < utf8.RuneSelf {
i++;
} else {
rune, size := utf8.DecodeRune(buf[i : len(buf)]);
i += size;
}
l++;
}
return l;
}
func (b *Writer) Append(buf *[]byte) {
b.buf.Append(buf);
b.width += len(buf);
b.size += len(buf);
b.width += UnicodeLen(buf);
}
/* export */ func (b *Writer) Flush() *os.Error {
dummy, err := b.Format(0, 0, b.lines.Len());
dummy, err := b.Format(0, 0, b.lines_size.Len());
// reset (even in the presence of errors)
b.buf.Clear();
b.width = 0;
b.lines.Init(0);
b.size, b.width = 0, 0;
b.lines_size.Init(0);
b.lines_width.Init(0);
b.AddLine();
return err;
}
......@@ -329,13 +352,14 @@ func (b *Writer) Append(buf *[]byte) {
i0 = i + 1; // exclude ch from (next) cell
// terminate cell
last := b.Line(b.lines.Len() - 1);
last.Push(b.width);
b.width = 0;
last_size, last_width := b.Line(b.lines_size.Len() - 1);
last_size.Push(b.size);
last_width.Push(b.width);
b.size, b.width = 0, 0;
if ch == '\n' {
b.AddLine();
if last.Len() == 1 {
if last_size.Len() == 1 {
// The previous line has only one cell which does not have
// an impact on the formatting of the following lines (the
// last cell per line is ignored by Format), thus we can
......
......@@ -189,24 +189,24 @@ export func Test(t *testing.T) {
Check(
t, 8, 1, ' ', true,
"a\tb\tc\n"
"aa\tbbb\tcccc\tddddd\n"
"\tb\tc\n"
"aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
"aaa\tbbbb\n",
"a b c\n"
"aa bbb cccc ddddd\n"
" b c\n"
"aa 本本本 cccc ddddd\n"
"aaa bbbb\n"
);
Check(
t, 8, 1, ' ', false,
"a\tb\tc\t\n"
"aa\tbbb\tcccc\tddddd\t\n"
"aaa\tbbbb\t\n",
"a\tè\tc\t\n"
"aa\tèèè\tcccc\tddddd\t\n"
"aaa\tèèèè\t\n",
" a b c\n"
" aa bbb cccc ddddd\n"
" aaa bbbb\n"
" a è c\n"
" aa èèè cccc ddddd\n"
" aaa èèèè\n"
);
Check(
......@@ -233,7 +233,7 @@ export func Test(t *testing.T) {
Check(
t, 4, 1, '-', true,
"4444\t333\t22\t1\t333\n"
"4444\t日本語\t22\t1\t333\n"
"999999999\t22\n"
"7\t22\n"
"\t\t\t88888888\n"
......@@ -241,7 +241,7 @@ export func Test(t *testing.T) {
"666666\t666666\t666666\t4444\n"
"1\t1\t999999999\t0000000000\n",
"4444------333-22--1---333\n"
"4444------日本語-22--1---333\n"
"999999999-22\n"
"7---------22\n"
"------------------88888888\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment