Commit 21d3721e authored by Russ Cox's avatar Russ Cox

regexp: add SubexpNames

Fixes #2440.

R=r, dsymonds
CC=golang-dev
https://golang.org/cl/5559043
parent e3e93b0f
...@@ -289,30 +289,45 @@ func TestLiteralPrefix(t *testing.T) { ...@@ -289,30 +289,45 @@ func TestLiteralPrefix(t *testing.T) {
} }
} }
type numSubexpCase struct { type subexpCase struct {
input string input string
expected int num int
} names []string
}
var numSubexpCases = []numSubexpCase{
{``, 0}, var subexpCases = []subexpCase{
{`.*`, 0}, {``, 0, nil},
{`abba`, 0}, {`.*`, 0, nil},
{`ab(b)a`, 1}, {`abba`, 0, nil},
{`ab(.*)a`, 1}, {`ab(b)a`, 1, []string{"", ""}},
{`(.*)ab(.*)a`, 2}, {`ab(.*)a`, 1, []string{"", ""}},
{`(.*)(ab)(.*)a`, 3}, {`(.*)ab(.*)a`, 2, []string{"", "", ""}},
{`(.*)((a)b)(.*)a`, 4}, {`(.*)(ab)(.*)a`, 3, []string{"", "", "", ""}},
{`(.*)(\(ab)(.*)a`, 3}, {`(.*)((a)b)(.*)a`, 4, []string{"", "", "", "", ""}},
{`(.*)(\(a\)b)(.*)a`, 3}, {`(.*)(\(ab)(.*)a`, 3, []string{"", "", "", ""}},
} {`(.*)(\(a\)b)(.*)a`, 3, []string{"", "", "", ""}},
{`(?P<foo>.*)(?P<bar>(a)b)(?P<foo>.*)a`, 4, []string{"", "foo", "bar", "", "foo"}},
func TestNumSubexp(t *testing.T) { }
for _, c := range numSubexpCases {
func TestSubexp(t *testing.T) {
for _, c := range subexpCases {
re := MustCompile(c.input) re := MustCompile(c.input)
n := re.NumSubexp() n := re.NumSubexp()
if n != c.expected { if n != c.num {
t.Errorf("NumSubexp for %q returned %d, expected %d", c.input, n, c.expected) t.Errorf("%q: NumSubexp = %d, want %d", c.input, n, c.num)
continue
}
names := re.SubexpNames()
if len(names) != 1+n {
t.Errorf("%q: len(SubexpNames) = %d, want %d", c.input, len(names), n)
continue
}
if c.names != nil {
for i := 0; i < 1+n; i++ {
if names[i] != c.names[i] {
t.Errorf("%q: SubexpNames[%d] = %q, want %q", c.input, i, names[i], c.names[i])
}
}
} }
} }
} }
......
...@@ -85,6 +85,7 @@ type Regexp struct { ...@@ -85,6 +85,7 @@ type Regexp struct {
prefixRune rune // first rune in prefix prefixRune rune // first rune in prefix
cond syntax.EmptyOp // empty-width conditions required at start of match cond syntax.EmptyOp // empty-width conditions required at start of match
numSubexp int numSubexp int
subexpNames []string
longest bool longest bool
// cache of machines for running regexp // cache of machines for running regexp
...@@ -140,17 +141,20 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { ...@@ -140,17 +141,20 @@ func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
return nil, err return nil, err
} }
maxCap := re.MaxCap() maxCap := re.MaxCap()
capNames := re.CapNames()
re = re.Simplify() re = re.Simplify()
prog, err := syntax.Compile(re) prog, err := syntax.Compile(re)
if err != nil { if err != nil {
return nil, err return nil, err
} }
regexp := &Regexp{ regexp := &Regexp{
expr: expr, expr: expr,
prog: prog, prog: prog,
numSubexp: maxCap, numSubexp: maxCap,
cond: prog.StartCond(), subexpNames: capNames,
longest: longest, cond: prog.StartCond(),
longest: longest,
} }
regexp.prefix, regexp.prefixComplete = prog.Prefix() regexp.prefix, regexp.prefixComplete = prog.Prefix()
if regexp.prefix != "" { if regexp.prefix != "" {
...@@ -223,6 +227,15 @@ func (re *Regexp) NumSubexp() int { ...@@ -223,6 +227,15 @@ func (re *Regexp) NumSubexp() int {
return re.numSubexp return re.numSubexp
} }
// SubexpNames returns the names of the parenthesized subexpressions
// in this Regexp. The name for the first sub-expression is names[1],
// so that if m is a match slice, the name for m[i] is SubexpNames()[i].
// Since the Regexp as a whole cannot be named, names[0] is always
// the empty string. The slice should not be modified.
func (re *Regexp) SubexpNames() []string {
return re.subexpNames
}
const endOfText rune = -1 const endOfText rune = -1
// input abstracts different representations of the input text. It provides // input abstracts different representations of the input text. It provides
......
...@@ -303,3 +303,19 @@ func (re *Regexp) MaxCap() int { ...@@ -303,3 +303,19 @@ func (re *Regexp) MaxCap() int {
} }
return m return m
} }
// CapNames walks the regexp to find the names of capturing groups.
func (re *Regexp) CapNames() []string {
names := make([]string, re.MaxCap()+1)
re.capNames(names)
return names
}
func (re *Regexp) capNames(names []string) {
if re.Op == OpCapture {
names[re.Cap] = re.Name
}
for _, sub := range re.Sub {
sub.capNames(names)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment