Commit 1f4d54ea authored by Rob Pike's avatar Rob Pike

regexp: eliminate vector in favor of append.

R=rsc
CC=golang-dev
https://golang.org/cl/2795041
parent fafb116c
...@@ -60,7 +60,6 @@ package regexp ...@@ -60,7 +60,6 @@ package regexp
import ( import (
"bytes" "bytes"
"container/vector"
"io" "io"
"os" "os"
"strings" "strings"
...@@ -117,7 +116,7 @@ type Regexp struct { ...@@ -117,7 +116,7 @@ type Regexp struct {
expr string // the original expression expr string // the original expression
prefix string // initial plain text string prefix string // initial plain text string
prefixBytes []byte // initial plain text bytes prefixBytes []byte // initial plain text bytes
inst *vector.Vector inst []instr
start instr // first instruction of machine start instr // first instruction of machine
prefixStart instr // where to start if there is a prefix prefixStart instr // where to start if there is a prefix
nbra int // number of brackets in expression, for subexpressions nbra int // number of brackets in expression, for subexpressions
...@@ -190,8 +189,8 @@ func newChar(char int) *_Char { ...@@ -190,8 +189,8 @@ func newChar(char int) *_Char {
type _CharClass struct { type _CharClass struct {
common common
negate bool // is character class negated? ([^a-z]) negate bool // is character class negated? ([^a-z])
// vector of int, stored pairwise: [a-z] is (a,z); x is (x,x): // slice of int, stored pairwise: [a-z] is (a,z); x is (x,x):
ranges *vector.IntVector ranges []int
cmin, cmax int cmin, cmax int
} }
...@@ -202,9 +201,9 @@ func (cclass *_CharClass) print() { ...@@ -202,9 +201,9 @@ func (cclass *_CharClass) print() {
if cclass.negate { if cclass.negate {
print(" (negated)") print(" (negated)")
} }
for i := 0; i < cclass.ranges.Len(); i += 2 { for i := 0; i < len(cclass.ranges); i += 2 {
l := cclass.ranges.At(i) l := cclass.ranges[i]
r := cclass.ranges.At(i + 1) r := cclass.ranges[i+1]
if l == r { if l == r {
print(" [", string(l), "]") print(" [", string(l), "]")
} else { } else {
...@@ -215,8 +214,7 @@ func (cclass *_CharClass) print() { ...@@ -215,8 +214,7 @@ func (cclass *_CharClass) print() {
func (cclass *_CharClass) addRange(a, b int) { func (cclass *_CharClass) addRange(a, b int) {
// range is a through b inclusive // range is a through b inclusive
cclass.ranges.Push(a) cclass.ranges = append(cclass.ranges, a, b)
cclass.ranges.Push(b)
if a < cclass.cmin { if a < cclass.cmin {
cclass.cmin = a cclass.cmin = a
} }
...@@ -229,7 +227,7 @@ func (cclass *_CharClass) matches(c int) bool { ...@@ -229,7 +227,7 @@ func (cclass *_CharClass) matches(c int) bool {
if c < cclass.cmin || c > cclass.cmax { if c < cclass.cmin || c > cclass.cmax {
return cclass.negate return cclass.negate
} }
ranges := []int(*cclass.ranges) ranges := cclass.ranges
for i := 0; i < len(ranges); i = i + 2 { for i := 0; i < len(ranges); i = i + 2 {
if ranges[i] <= c && c <= ranges[i+1] { if ranges[i] <= c && c <= ranges[i+1] {
return !cclass.negate return !cclass.negate
...@@ -240,7 +238,7 @@ func (cclass *_CharClass) matches(c int) bool { ...@@ -240,7 +238,7 @@ func (cclass *_CharClass) matches(c int) bool {
func newCharClass() *_CharClass { func newCharClass() *_CharClass {
c := new(_CharClass) c := new(_CharClass)
c.ranges = new(vector.IntVector) c.ranges = make([]int, 0, 4)
c.cmin = 0x10FFFF + 1 // MaxRune + 1 c.cmin = 0x10FFFF + 1 // MaxRune + 1
c.cmax = -1 c.cmax = -1
return c return c
...@@ -298,8 +296,8 @@ func (nop *_Nop) kind() int { return _NOP } ...@@ -298,8 +296,8 @@ func (nop *_Nop) kind() int { return _NOP }
func (nop *_Nop) print() { print("nop") } func (nop *_Nop) print() { print("nop") }
func (re *Regexp) add(i instr) instr { func (re *Regexp) add(i instr) instr {
i.setIndex(re.inst.Len()) i.setIndex(len(re.inst))
re.inst.Push(i) re.inst = append(re.inst, i)
return i return i
} }
...@@ -380,15 +378,15 @@ func (p *parser) charClass() instr { ...@@ -380,15 +378,15 @@ func (p *parser) charClass() instr {
p.error(ErrBadRange) p.error(ErrBadRange)
} }
// Is it [^\n]? // Is it [^\n]?
if cc.negate && cc.ranges.Len() == 2 && if cc.negate && len(cc.ranges) == 2 &&
cc.ranges.At(0) == '\n' && cc.ranges.At(1) == '\n' { cc.ranges[0] == '\n' && cc.ranges[1] == '\n' {
nl := new(_NotNl) nl := new(_NotNl)
p.re.add(nl) p.re.add(nl)
return nl return nl
} }
// Special common case: "[a]" -> "a" // Special common case: "[a]" -> "a"
if !cc.negate && cc.ranges.Len() == 2 && cc.ranges.At(0) == cc.ranges.At(1) { if !cc.negate && len(cc.ranges) == 2 && cc.ranges[0] == cc.ranges[1] {
c := newChar(cc.ranges.At(0)) c := newChar(cc.ranges[0])
p.re.add(c) p.re.add(c)
return c return c
} }
...@@ -606,8 +604,7 @@ func unNop(i instr) instr { ...@@ -606,8 +604,7 @@ func unNop(i instr) instr {
} }
func (re *Regexp) eliminateNops() { func (re *Regexp) eliminateNops() {
for i := 0; i < re.inst.Len(); i++ { for _, inst := range re.inst {
inst := re.inst.At(i).(instr)
if inst.kind() == _END { if inst.kind() == _END {
continue continue
} }
...@@ -621,8 +618,7 @@ func (re *Regexp) eliminateNops() { ...@@ -621,8 +618,7 @@ func (re *Regexp) eliminateNops() {
func (re *Regexp) dump() { func (re *Regexp) dump() {
print("prefix <", re.prefix, ">\n") print("prefix <", re.prefix, ">\n")
for i := 0; i < re.inst.Len(); i++ { for _, inst := range re.inst {
inst := re.inst.At(i).(instr)
print(inst.index(), ": ") print(inst.index(), ": ")
inst.print() inst.print()
if inst.kind() != _END { if inst.kind() != _END {
...@@ -664,17 +660,17 @@ func (re *Regexp) setPrefix() { ...@@ -664,17 +660,17 @@ func (re *Regexp) setPrefix() {
var b []byte var b []byte
var utf = make([]byte, utf8.UTFMax) var utf = make([]byte, utf8.UTFMax)
// First instruction is start; skip that. // First instruction is start; skip that.
i := re.inst.At(0).(instr).next().index() i := re.inst[0].next().index()
Loop: Loop:
for i < re.inst.Len() { for i < len(re.inst) {
inst := re.inst.At(i).(instr) inst := re.inst[i]
// stop if this is not a char // stop if this is not a char
if inst.kind() != _CHAR { if inst.kind() != _CHAR {
break break
} }
// stop if this char can be followed by a match for an empty string, // stop if this char can be followed by a match for an empty string,
// which includes closures, ^, and $. // which includes closures, ^, and $.
switch re.inst.At(inst.next().index()).(instr).kind() { switch re.inst[inst.next().index()].kind() {
case _BOT, _EOT, _ALT: case _BOT, _EOT, _ALT:
break Loop break Loop
} }
...@@ -683,7 +679,7 @@ Loop: ...@@ -683,7 +679,7 @@ Loop:
i = inst.next().index() i = inst.next().index()
} }
// point prefixStart instruction to first non-CHAR after prefix // point prefixStart instruction to first non-CHAR after prefix
re.prefixStart = re.inst.At(i).(instr) re.prefixStart = re.inst[i]
re.prefixBytes = b re.prefixBytes = b
re.prefix = string(b) re.prefix = string(b)
} }
...@@ -700,7 +696,7 @@ func Compile(str string) (regexp *Regexp, error os.Error) { ...@@ -700,7 +696,7 @@ func Compile(str string) (regexp *Regexp, error os.Error) {
} }
}() }()
regexp.expr = str regexp.expr = str
regexp.inst = new(vector.Vector) regexp.inst = make([]instr, 0, 10)
regexp.doParse() regexp.doParse()
return return
} }
...@@ -830,8 +826,8 @@ func (a *matchArena) addState(s []state, inst instr, prefixed bool, match *match ...@@ -830,8 +826,8 @@ func (a *matchArena) addState(s []state, inst instr, prefixed bool, match *match
// If bytes == nil, scan str. // If bytes == nil, scan str.
func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int { func (re *Regexp) doExecute(str string, bytestr []byte, pos int) []int {
var s [2][]state var s [2][]state
s[0] = make([]state, 10)[0:0] s[0] = make([]state, 0, 10)
s[1] = make([]state, 10)[0:0] s[1] = make([]state, 0, 10)
in, out := 0, 1 in, out := 0, 1
var final state var final state
found := false found := false
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment