Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
go
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
go
Commits
74a60ed0
Commit
74a60ed0
authored
Jan 15, 2009
by
Rob Pike
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
casify lib/regexp
R=rsc DELTA=259 (0 added, 0 deleted, 259 changed) OCL=22897 CL=22900
parent
2527bba9
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
235 additions
and
235 deletions
+235
-235
src/lib/regexp/all_test.go
src/lib/regexp/all_test.go
+48
-48
src/lib/regexp/regexp.go
src/lib/regexp/regexp.go
+187
-187
No files found.
src/lib/regexp/all_test.go
View file @
74a60ed0
...
...
@@ -30,59 +30,59 @@ var good_re = []string{
}
// TODO: nice to do this with a map
type
S
tringError
struct
{
type
s
tringError
struct
{
re
string
;
err
*
os
.
Error
;
}
var
bad_re
=
[]
S
tringError
{
S
tringError
{
`*`
,
regexp
.
ErrBareClosure
},
S
tringError
{
`(abc`
,
regexp
.
ErrUnmatchedLpar
},
S
tringError
{
`abc)`
,
regexp
.
ErrUnmatchedRpar
},
S
tringError
{
`x[a-z`
,
regexp
.
ErrUnmatchedLbkt
},
S
tringError
{
`abc]`
,
regexp
.
ErrUnmatchedRbkt
},
S
tringError
{
`[z-a]`
,
regexp
.
ErrBadRange
},
S
tringError
{
`abc\`
,
regexp
.
ErrExtraneousBackslash
},
S
tringError
{
`a**`
,
regexp
.
ErrBadClosure
},
S
tringError
{
`a*+`
,
regexp
.
ErrBadClosure
},
S
tringError
{
`a??`
,
regexp
.
ErrBadClosure
},
S
tringError
{
`*`
,
regexp
.
ErrBareClosure
},
S
tringError
{
`\x`
,
regexp
.
ErrBadBackslash
},
var
bad_re
=
[]
s
tringError
{
s
tringError
{
`*`
,
regexp
.
ErrBareClosure
},
s
tringError
{
`(abc`
,
regexp
.
ErrUnmatchedLpar
},
s
tringError
{
`abc)`
,
regexp
.
ErrUnmatchedRpar
},
s
tringError
{
`x[a-z`
,
regexp
.
ErrUnmatchedLbkt
},
s
tringError
{
`abc]`
,
regexp
.
ErrUnmatchedRbkt
},
s
tringError
{
`[z-a]`
,
regexp
.
ErrBadRange
},
s
tringError
{
`abc\`
,
regexp
.
ErrExtraneousBackslash
},
s
tringError
{
`a**`
,
regexp
.
ErrBadClosure
},
s
tringError
{
`a*+`
,
regexp
.
ErrBadClosure
},
s
tringError
{
`a??`
,
regexp
.
ErrBadClosure
},
s
tringError
{
`*`
,
regexp
.
ErrBareClosure
},
s
tringError
{
`\x`
,
regexp
.
ErrBadBackslash
},
}
type
V
ec
[]
int
;
type
v
ec
[]
int
;
type
T
ester
struct
{
type
t
ester
struct
{
re
string
;
text
string
;
match
V
ec
;
}
var
matches
=
[]
T
ester
{
Tester
{
``
,
""
,
V
ec
{
0
,
0
}
},
Tester
{
`a`
,
"a"
,
V
ec
{
0
,
1
}
},
Tester
{
`x`
,
"y"
,
V
ec
{}
},
Tester
{
`b`
,
"abc"
,
V
ec
{
1
,
2
}
},
Tester
{
`.`
,
"a"
,
V
ec
{
0
,
1
}
},
Tester
{
`.*`
,
"abcdef"
,
V
ec
{
0
,
6
}
},
Tester
{
`^abcd$`
,
"abcd"
,
V
ec
{
0
,
4
}
},
Tester
{
`^bcd'`
,
"abcdef"
,
V
ec
{}
},
Tester
{
`^abcd$`
,
"abcde"
,
V
ec
{}
},
Tester
{
`a+`
,
"baaab"
,
V
ec
{
1
,
4
}
},
Tester
{
`a*`
,
"baaab"
,
V
ec
{
0
,
0
}
},
Tester
{
`[a-z]+`
,
"abcd"
,
V
ec
{
0
,
4
}
},
Tester
{
`[^a-z]+`
,
"ab1234cd"
,
V
ec
{
2
,
6
}
},
Tester
{
`[a\-\]z]+`
,
"az]-bcz"
,
V
ec
{
0
,
4
}
},
Tester
{
`[日本語]+`
,
"日本語日本語"
,
V
ec
{
0
,
18
}
},
Tester
{
`()`
,
""
,
V
ec
{
0
,
0
,
0
,
0
}
},
Tester
{
`(a)`
,
"a"
,
V
ec
{
0
,
1
,
0
,
1
}
},
Tester
{
`(.)(.)`
,
"日a"
,
V
ec
{
0
,
4
,
0
,
3
,
3
,
4
}
},
Tester
{
`(.*)`
,
""
,
V
ec
{
0
,
0
,
0
,
0
}
},
Tester
{
`(.*)`
,
"abcd"
,
V
ec
{
0
,
4
,
0
,
4
}
},
Tester
{
`(..)(..)`
,
"abcd"
,
V
ec
{
0
,
4
,
0
,
2
,
2
,
4
}
},
Tester
{
`(([^xyz]*)(d))`
,
"abcd"
,
V
ec
{
0
,
4
,
0
,
4
,
0
,
3
,
3
,
4
}
},
Tester
{
`((a|b|c)*(d))`
,
"abcd"
,
V
ec
{
0
,
4
,
0
,
4
,
2
,
3
,
3
,
4
}
},
Tester
{
`(((a|b|c)*)(d))`
,
"abcd"
,
V
ec
{
0
,
4
,
0
,
4
,
0
,
3
,
2
,
3
,
3
,
4
}
},
Tester
{
`a*(|(b))c*`
,
"aacc"
,
V
ec
{
0
,
4
,
2
,
2
,
-
1
,
-
1
}
},
match
v
ec
;
}
var
matches
=
[]
t
ester
{
tester
{
``
,
""
,
v
ec
{
0
,
0
}
},
tester
{
`a`
,
"a"
,
v
ec
{
0
,
1
}
},
tester
{
`x`
,
"y"
,
v
ec
{}
},
tester
{
`b`
,
"abc"
,
v
ec
{
1
,
2
}
},
tester
{
`.`
,
"a"
,
v
ec
{
0
,
1
}
},
tester
{
`.*`
,
"abcdef"
,
v
ec
{
0
,
6
}
},
tester
{
`^abcd$`
,
"abcd"
,
v
ec
{
0
,
4
}
},
tester
{
`^bcd'`
,
"abcdef"
,
v
ec
{}
},
tester
{
`^abcd$`
,
"abcde"
,
v
ec
{}
},
tester
{
`a+`
,
"baaab"
,
v
ec
{
1
,
4
}
},
tester
{
`a*`
,
"baaab"
,
v
ec
{
0
,
0
}
},
tester
{
`[a-z]+`
,
"abcd"
,
v
ec
{
0
,
4
}
},
tester
{
`[^a-z]+`
,
"ab1234cd"
,
v
ec
{
2
,
6
}
},
tester
{
`[a\-\]z]+`
,
"az]-bcz"
,
v
ec
{
0
,
4
}
},
tester
{
`[日本語]+`
,
"日本語日本語"
,
v
ec
{
0
,
18
}
},
tester
{
`()`
,
""
,
v
ec
{
0
,
0
,
0
,
0
}
},
tester
{
`(a)`
,
"a"
,
v
ec
{
0
,
1
,
0
,
1
}
},
tester
{
`(.)(.)`
,
"日a"
,
v
ec
{
0
,
4
,
0
,
3
,
3
,
4
}
},
tester
{
`(.*)`
,
""
,
v
ec
{
0
,
0
,
0
,
0
}
},
tester
{
`(.*)`
,
"abcd"
,
v
ec
{
0
,
4
,
0
,
4
}
},
tester
{
`(..)(..)`
,
"abcd"
,
v
ec
{
0
,
4
,
0
,
2
,
2
,
4
}
},
tester
{
`(([^xyz]*)(d))`
,
"abcd"
,
v
ec
{
0
,
4
,
0
,
4
,
0
,
3
,
3
,
4
}
},
tester
{
`((a|b|c)*(d))`
,
"abcd"
,
v
ec
{
0
,
4
,
0
,
4
,
2
,
3
,
3
,
4
}
},
tester
{
`(((a|b|c)*)(d))`
,
"abcd"
,
v
ec
{
0
,
4
,
0
,
4
,
0
,
3
,
2
,
3
,
3
,
4
}
},
tester
{
`a*(|(b))c*`
,
"aacc"
,
v
ec
{
0
,
4
,
2
,
2
,
-
1
,
-
1
}
},
}
func
CompileTest
(
t
*
testing
.
T
,
expr
string
,
error
*
os
.
Error
)
regexp
.
Regexp
{
...
...
@@ -93,7 +93,7 @@ func CompileTest(t *testing.T, expr string, error *os.Error) regexp.Regexp {
return
re
}
func
Print
V
ec
(
t
*
testing
.
T
,
m
[]
int
)
{
func
Print
v
ec
(
t
*
testing
.
T
,
m
[]
int
)
{
l
:=
len
(
m
);
if
l
==
0
{
t
.
Log
(
"
\t
<no match>"
);
...
...
@@ -149,9 +149,9 @@ func ExecuteTest(t *testing.T, expr string, str string, match []int) {
m
:=
re
.
Execute
(
str
);
if
!
Equal
(
m
,
match
)
{
t
.
Error
(
"Execute failure on `"
,
expr
,
"` matching `"
,
str
,
"`:"
);
Print
V
ec
(
t
,
m
);
Print
v
ec
(
t
,
m
);
t
.
Log
(
"should be:"
);
Print
V
ec
(
t
,
match
);
Print
v
ec
(
t
,
match
);
}
}
...
...
src/lib/regexp/regexp.go
View file @
74a60ed0
...
...
@@ -11,7 +11,7 @@ import (
"array"
;
)
export
var
debug
=
false
;
var
debug
=
false
;
export
var
ErrInternal
=
os
.
NewError
(
"internal error"
);
...
...
@@ -26,110 +26,110 @@ export var ErrBareClosure = os.NewError("closure applies to nothing");
export
var
ErrBadBackslash
=
os
.
NewError
(
"illegal backslash escape"
);
// An instruction executed by the NFA
type
Inst
interface
{
Type
()
int
;
// the type of this instruction:
CHAR,
ANY, etc.
Next
()
Inst
;
// the instruction to execute after this one
SetNext
(
i
Inst
);
type
instr
interface
{
Type
()
int
;
// the type of this instruction:
cCHAR, c
ANY, etc.
Next
()
instr
;
// the instruction to execute after this one
SetNext
(
i
instr
);
Index
()
int
;
SetIndex
(
i
int
);
Print
();
}
// Fields and methods common to all instructions
type
Common
struct
{
next
Inst
;
type
i
Common
struct
{
next
instr
;
index
int
;
}
func
(
c
*
Common
)
Next
()
Inst
{
return
c
.
next
}
func
(
c
*
Common
)
SetNext
(
i
Inst
)
{
c
.
next
=
i
}
func
(
c
*
Common
)
Index
()
int
{
return
c
.
index
}
func
(
c
*
Common
)
SetIndex
(
i
int
)
{
c
.
index
=
i
}
func
(
c
*
iCommon
)
Next
()
instr
{
return
c
.
next
}
func
(
c
*
iCommon
)
SetNext
(
i
instr
)
{
c
.
next
=
i
}
func
(
c
*
i
Common
)
Index
()
int
{
return
c
.
index
}
func
(
c
*
i
Common
)
SetIndex
(
i
int
)
{
c
.
index
=
i
}
type
RE
struct
{
type
regExp
struct
{
expr
string
;
// the original expression
ch
chan
<-
*
RE
;
// reply channel when we're done
ch
chan
<-
*
regExp
;
// reply channel when we're done
error
*
os
.
Error
;
// compile- or run-time error; nil if OK
inst
*
array
.
Array
;
start
Inst
;
start
instr
;
nbra
int
;
// number of brackets in expression, for subexpressions
}
const
(
START
// beginning of program
c
START
// beginning of program
=
iota
;
END
;
// end of program: success
BOT
;
// '^' beginning of text
EOT
;
// '$' end of text
CHAR
;
// 'a' regular character
CHARCLASS
;
// [a-z] character class
ANY
;
// '.' any character
BRA
;
// '(' parenthesized expression
EBRA
;
// ')'; end of '(' parenthesized expression
ALT
;
// '|' alternation
NOP
;
// do nothing; makes it easy to link without patching
c
END
;
// end of program: success
c
BOT
;
// '^' beginning of text
c
EOT
;
// '$' end of text
c
CHAR
;
// 'a' regular character
c
CHARCLASS
;
// [a-z] character class
c
ANY
;
// '.' any character
c
BRA
;
// '(' parenthesized expression
c
EBRA
;
// ')'; end of '(' parenthesized expression
c
ALT
;
// '|' alternation
c
NOP
;
// do nothing; makes it easy to link without patching
)
// --- START start of program
type
Start
struct
{
Common
type
i
Start
struct
{
i
Common
}
func
(
start
*
Start
)
Type
()
int
{
return
START
}
func
(
start
*
Start
)
Print
()
{
print
(
"start"
)
}
func
(
start
*
iStart
)
Type
()
int
{
return
c
START
}
func
(
start
*
i
Start
)
Print
()
{
print
(
"start"
)
}
// --- END end of program
type
End
struct
{
Common
type
i
End
struct
{
i
Common
}
func
(
end
*
End
)
Type
()
int
{
return
END
}
func
(
end
*
End
)
Print
()
{
print
(
"end"
)
}
func
(
end
*
iEnd
)
Type
()
int
{
return
c
END
}
func
(
end
*
i
End
)
Print
()
{
print
(
"end"
)
}
// --- BOT beginning of text
type
Bot
struct
{
Common
type
i
Bot
struct
{
i
Common
}
func
(
bot
*
Bot
)
Type
()
int
{
return
BOT
}
func
(
bot
*
Bot
)
Print
()
{
print
(
"bot"
)
}
func
(
bot
*
iBot
)
Type
()
int
{
return
c
BOT
}
func
(
bot
*
i
Bot
)
Print
()
{
print
(
"bot"
)
}
// --- EOT end of text
type
Eot
struct
{
Common
type
i
Eot
struct
{
i
Common
}
func
(
eot
*
Eot
)
Type
()
int
{
return
EOT
}
func
(
eot
*
Eot
)
Print
()
{
print
(
"eot"
)
}
func
(
eot
*
iEot
)
Type
()
int
{
return
c
EOT
}
func
(
eot
*
i
Eot
)
Print
()
{
print
(
"eot"
)
}
// --- CHAR a regular character
type
Char
struct
{
Common
;
type
i
Char
struct
{
i
Common
;
char
int
;
}
func
(
char
*
Char
)
Type
()
int
{
return
CHAR
}
func
(
char
*
Char
)
Print
()
{
print
(
"char "
,
string
(
char
.
char
))
}
func
(
char
*
iChar
)
Type
()
int
{
return
c
CHAR
}
func
(
char
*
i
Char
)
Print
()
{
print
(
"char "
,
string
(
char
.
char
))
}
func
NewChar
(
char
int
)
*
Char
{
c
:=
new
(
Char
);
func
newChar
(
char
int
)
*
i
Char
{
c
:=
new
(
i
Char
);
c
.
char
=
char
;
return
c
;
}
// --- CHARCLASS [a-z]
type
CharClass
struct
{
Common
;
type
i
CharClass
struct
{
i
Common
;
char
int
;
negate
bool
;
// is character class negated? ([^a-z])
// array of int, stored pairwise: [a-z] is (a,z); x is (x,x):
ranges
*
array
.
IntArray
;
}
func
(
cclass
*
CharClass
)
Type
()
int
{
return
CHARCLASS
}
func
(
cclass
*
iCharClass
)
Type
()
int
{
return
c
CHARCLASS
}
func
(
cclass
*
CharClass
)
Print
()
{
func
(
cclass
*
i
CharClass
)
Print
()
{
print
(
"charclass"
);
if
cclass
.
negate
{
print
(
" (negated)"
);
...
...
@@ -145,13 +145,13 @@ func (cclass *CharClass) Print() {
}
}
func
(
cclass
*
CharClass
)
AddRange
(
a
,
b
int
)
{
func
(
cclass
*
i
CharClass
)
AddRange
(
a
,
b
int
)
{
// range is a through b inclusive
cclass
.
ranges
.
Push
(
a
);
cclass
.
ranges
.
Push
(
b
);
}
func
(
cclass
*
CharClass
)
Matches
(
c
int
)
bool
{
func
(
cclass
*
i
CharClass
)
Matches
(
c
int
)
bool
{
for
i
:=
0
;
i
<
cclass
.
ranges
.
Len
();
i
=
i
+
2
{
min
:=
cclass
.
ranges
.
At
(
i
);
max
:=
cclass
.
ranges
.
At
(
i
+
1
);
...
...
@@ -162,84 +162,84 @@ func (cclass *CharClass) Matches(c int) bool {
return
cclass
.
negate
}
func
NewCharClass
()
*
CharClass
{
c
:=
new
(
CharClass
);
func
newCharClass
()
*
i
CharClass
{
c
:=
new
(
i
CharClass
);
c
.
ranges
=
array
.
NewIntArray
(
0
);
return
c
;
}
// --- ANY any character
type
Any
struct
{
Common
type
i
Any
struct
{
i
Common
}
func
(
any
*
Any
)
Type
()
int
{
return
ANY
}
func
(
any
*
Any
)
Print
()
{
print
(
"any"
)
}
func
(
any
*
iAny
)
Type
()
int
{
return
c
ANY
}
func
(
any
*
i
Any
)
Print
()
{
print
(
"any"
)
}
// --- BRA parenthesized expression
type
Bra
struct
{
Common
;
type
i
Bra
struct
{
i
Common
;
n
int
;
// subexpression number
}
func
(
bra
*
Bra
)
Type
()
int
{
return
BRA
}
func
(
bra
*
Bra
)
Print
()
{
print
(
"bra"
,
bra
.
n
);
}
func
(
bra
*
iBra
)
Type
()
int
{
return
c
BRA
}
func
(
bra
*
i
Bra
)
Print
()
{
print
(
"bra"
,
bra
.
n
);
}
// --- EBRA end of parenthesized expression
type
Ebra
struct
{
Common
;
type
i
Ebra
struct
{
i
Common
;
n
int
;
// subexpression number
}
func
(
ebra
*
Ebra
)
Type
()
int
{
return
EBRA
}
func
(
ebra
*
Ebra
)
Print
()
{
print
(
"ebra "
,
ebra
.
n
);
}
func
(
ebra
*
iEbra
)
Type
()
int
{
return
c
EBRA
}
func
(
ebra
*
i
Ebra
)
Print
()
{
print
(
"ebra "
,
ebra
.
n
);
}
// --- ALT alternation
type
Alt
struct
{
Common
;
left
Inst
;
// other branch
type
i
Alt
struct
{
i
Common
;
left
instr
;
// other branch
}
func
(
alt
*
Alt
)
Type
()
int
{
return
ALT
}
func
(
alt
*
Alt
)
Print
()
{
print
(
"alt("
,
alt
.
left
.
Index
(),
")"
);
}
func
(
alt
*
iAlt
)
Type
()
int
{
return
c
ALT
}
func
(
alt
*
i
Alt
)
Print
()
{
print
(
"alt("
,
alt
.
left
.
Index
(),
")"
);
}
// --- NOP no operation
type
Nop
struct
{
Common
type
i
Nop
struct
{
i
Common
}
func
(
nop
*
Nop
)
Type
()
int
{
return
NOP
}
func
(
nop
*
Nop
)
Print
()
{
print
(
"nop"
)
}
func
(
nop
*
iNop
)
Type
()
int
{
return
c
NOP
}
func
(
nop
*
i
Nop
)
Print
()
{
print
(
"nop"
)
}
// report error and exit compiling/executing goroutine
func
(
re
*
RE
)
Error
(
err
*
os
.
Error
)
{
func
(
re
*
regExp
)
Error
(
err
*
os
.
Error
)
{
re
.
error
=
err
;
re
.
ch
<-
re
;
sys
.
goexit
();
}
func
(
re
*
RE
)
Add
(
i
Inst
)
Inst
{
func
(
re
*
regExp
)
Add
(
i
instr
)
instr
{
i
.
SetIndex
(
re
.
inst
.
Len
());
re
.
inst
.
Push
(
i
);
return
i
;
}
type
P
arser
struct
{
re
*
RE
;
type
p
arser
struct
{
re
*
regExp
;
nlpar
int
;
// number of unclosed lpars
pos
int
;
ch
int
;
}
const
EOF
=
-
1
const
endOfFile
=
-
1
func
(
p
*
P
arser
)
c
()
int
{
func
(
p
*
p
arser
)
c
()
int
{
return
p
.
ch
;
}
func
(
p
*
P
arser
)
nextc
()
int
{
func
(
p
*
p
arser
)
nextc
()
int
{
if
p
.
pos
>=
len
(
p
.
re
.
expr
)
{
p
.
ch
=
EOF
p
.
ch
=
endOfFile
}
else
{
c
,
w
:=
sys
.
stringtorune
(
p
.
re
.
expr
,
p
.
pos
);
p
.
ch
=
c
;
...
...
@@ -248,11 +248,11 @@ func (p *Parser) nextc() int {
return
p
.
ch
;
}
func
NewParser
(
re
*
RE
)
*
P
arser
{
p
arser
:=
new
(
P
arser
);
p
arser
.
re
=
re
;
p
arser
.
nextc
();
// load p.ch
return
p
arser
;
func
newParser
(
re
*
regExp
)
*
p
arser
{
p
:=
new
(
p
arser
);
p
.
re
=
re
;
p
.
nextc
();
// load p.ch
return
p
;
}
/*
...
...
@@ -274,9 +274,9 @@ Grammar:
*/
func
(
p
*
Parser
)
Regexp
()
(
start
,
end
Inst
)
func
(
p
*
parser
)
Regexp
()
(
start
,
end
instr
)
var
NULL
Inst
var
iNULL
instr
func
special
(
c
int
)
bool
{
s
:=
`\.+*?()|[]`
;
...
...
@@ -298,8 +298,8 @@ func specialcclass(c int) bool {
return
false
}
func
(
p
*
Parser
)
CharClass
()
Inst
{
cc
:=
N
ewCharClass
();
func
(
p
*
parser
)
CharClass
()
instr
{
cc
:=
n
ewCharClass
();
p
.
re
.
Add
(
cc
);
if
p
.
c
()
==
'^'
{
cc
.
negate
=
true
;
...
...
@@ -308,7 +308,7 @@ func (p *Parser) CharClass() Inst {
left
:=
-
1
;
for
{
switch
c
:=
p
.
c
();
c
{
case
']'
,
EOF
:
case
']'
,
endOfFile
:
if
left
>=
0
{
p
.
re
.
Error
(
ErrBadRange
);
}
...
...
@@ -318,7 +318,7 @@ func (p *Parser) CharClass() Inst {
case
'\\'
:
c
=
p
.
nextc
();
switch
{
case
c
==
EOF
:
case
c
==
endOfFile
:
p
.
re
.
Error
(
ErrExtraneousBackslash
);
case
c
==
'n'
:
c
=
'\n'
;
...
...
@@ -346,33 +346,33 @@ func (p *Parser) CharClass() Inst {
}
}
}
return
NULL
return
i
NULL
}
func
(
p
*
Parser
)
Term
()
(
start
,
end
Inst
)
{
func
(
p
*
parser
)
Term
()
(
start
,
end
instr
)
{
switch
c
:=
p
.
c
();
c
{
case
'|'
,
EOF
:
return
NULL
,
NULL
;
case
'|'
,
endOfFile
:
return
iNULL
,
i
NULL
;
case
'*'
,
'+'
:
p
.
re
.
Error
(
ErrBareClosure
);
case
')'
:
if
p
.
nlpar
==
0
{
p
.
re
.
Error
(
ErrUnmatchedRpar
);
}
return
NULL
,
NULL
;
return
iNULL
,
i
NULL
;
case
']'
:
p
.
re
.
Error
(
ErrUnmatchedRbkt
);
case
'^'
:
p
.
nextc
();
start
=
p
.
re
.
Add
(
new
(
Bot
));
start
=
p
.
re
.
Add
(
new
(
i
Bot
));
return
start
,
start
;
case
'$'
:
p
.
nextc
();
start
=
p
.
re
.
Add
(
new
(
Eot
));
start
=
p
.
re
.
Add
(
new
(
i
Eot
));
return
start
,
start
;
case
'.'
:
p
.
nextc
();
start
=
p
.
re
.
Add
(
new
(
Any
));
start
=
p
.
re
.
Add
(
new
(
i
Any
));
return
start
,
start
;
case
'['
:
p
.
nextc
();
...
...
@@ -393,14 +393,14 @@ func (p *Parser) Term() (start, end Inst) {
}
p
.
nlpar
--
;
p
.
nextc
();
bra
:=
new
(
Bra
);
bra
:=
new
(
i
Bra
);
p
.
re
.
Add
(
bra
);
ebra
:=
new
(
Ebra
);
ebra
:=
new
(
i
Ebra
);
p
.
re
.
Add
(
ebra
);
bra
.
n
=
nbra
;
ebra
.
n
=
nbra
;
if
start
==
NULL
{
if
end
==
NULL
{
p
.
re
.
Error
(
ErrInternal
)
}
if
start
==
i
NULL
{
if
end
==
i
NULL
{
p
.
re
.
Error
(
ErrInternal
)
}
start
=
ebra
}
else
{
end
.
SetNext
(
ebra
);
...
...
@@ -410,7 +410,7 @@ func (p *Parser) Term() (start, end Inst) {
case
'\\'
:
c
=
p
.
nextc
();
switch
{
case
c
==
EOF
:
case
c
==
endOfFile
:
p
.
re
.
Error
(
ErrExtraneousBackslash
);
case
c
==
'n'
:
c
=
'\n'
;
...
...
@@ -422,22 +422,22 @@ func (p *Parser) Term() (start, end Inst) {
fallthrough
;
default
:
p
.
nextc
();
start
=
N
ewChar
(
c
);
start
=
n
ewChar
(
c
);
p
.
re
.
Add
(
start
);
return
start
,
start
}
panic
(
"unreachable"
);
}
func
(
p
*
Parser
)
Closure
()
(
start
,
end
Inst
)
{
func
(
p
*
parser
)
Closure
()
(
start
,
end
instr
)
{
start
,
end
=
p
.
Term
();
if
start
==
NULL
{
if
start
==
i
NULL
{
return
}
switch
p
.
c
()
{
case
'*'
:
// (start,end)*:
alt
:=
new
(
Alt
);
alt
:=
new
(
i
Alt
);
p
.
re
.
Add
(
alt
);
end
.
SetNext
(
alt
);
// after end, do alt
alt
.
left
=
start
;
// alternate brach: return to start
...
...
@@ -445,16 +445,16 @@ func (p *Parser) Closure() (start, end Inst) {
end
=
alt
;
case
'+'
:
// (start,end)+:
alt
:=
new
(
Alt
);
alt
:=
new
(
i
Alt
);
p
.
re
.
Add
(
alt
);
end
.
SetNext
(
alt
);
// after end, do alt
alt
.
left
=
start
;
// alternate brach: return to start
end
=
alt
;
// start is unchanged; end is alt
case
'?'
:
// (start,end)?:
alt
:=
new
(
Alt
);
alt
:=
new
(
i
Alt
);
p
.
re
.
Add
(
alt
);
nop
:=
new
(
Nop
);
nop
:=
new
(
i
Nop
);
p
.
re
.
Add
(
nop
);
alt
.
left
=
start
;
// alternate branch is start
alt
.
next
=
nop
;
// follow on to nop
...
...
@@ -471,18 +471,18 @@ func (p *Parser) Closure() (start, end Inst) {
return
}
func
(
p
*
Parser
)
Concatenation
()
(
start
,
end
Inst
)
{
start
,
end
=
NULL
,
NULL
;
func
(
p
*
parser
)
Concatenation
()
(
start
,
end
instr
)
{
start
,
end
=
iNULL
,
i
NULL
;
for
{
nstart
,
nend
:=
p
.
Closure
();
switch
{
case
nstart
==
NULL
:
// end of this concatenation
if
start
==
NULL
{
// this is the empty string
nop
:=
p
.
re
.
Add
(
new
(
Nop
));
case
nstart
==
i
NULL
:
// end of this concatenation
if
start
==
i
NULL
{
// this is the empty string
nop
:=
p
.
re
.
Add
(
new
(
i
Nop
));
return
nop
,
nop
;
}
return
;
case
start
==
NULL
:
// this is first element of concatenation
case
start
==
i
NULL
:
// this is first element of concatenation
start
,
end
=
nstart
,
nend
;
default
:
end
.
SetNext
(
nstart
);
...
...
@@ -492,7 +492,7 @@ func (p *Parser) Concatenation() (start, end Inst) {
panic
(
"unreachable"
);
}
func
(
p
*
Parser
)
Regexp
()
(
start
,
end
Inst
)
{
func
(
p
*
parser
)
Regexp
()
(
start
,
end
instr
)
{
start
,
end
=
p
.
Concatenation
();
for
{
switch
p
.
c
()
{
...
...
@@ -501,11 +501,11 @@ func (p *Parser) Regexp() (start, end Inst) {
case
'|'
:
p
.
nextc
();
nstart
,
nend
:=
p
.
Concatenation
();
alt
:=
new
(
Alt
);
alt
:=
new
(
i
Alt
);
p
.
re
.
Add
(
alt
);
alt
.
left
=
start
;
alt
.
next
=
nstart
;
nop
:=
new
(
Nop
);
nop
:=
new
(
i
Nop
);
p
.
re
.
Add
(
nop
);
end
.
SetNext
(
nop
);
nend
.
SetNext
(
nop
);
...
...
@@ -515,47 +515,47 @@ func (p *Parser) Regexp() (start, end Inst) {
panic
(
"unreachable"
);
}
func
UnNop
(
i
Inst
)
Inst
{
for
i
.
Type
()
==
NOP
{
func
UnNop
(
i
instr
)
instr
{
for
i
.
Type
()
==
c
NOP
{
i
=
i
.
Next
()
}
return
i
}
func
(
re
*
RE
)
EliminateNops
()
{
func
(
re
*
regExp
)
EliminateNops
()
{
for
i
:=
0
;
i
<
re
.
inst
.
Len
();
i
++
{
inst
:=
re
.
inst
.
At
(
i
)
.
(
Inst
);
if
inst
.
Type
()
==
END
{
inst
:=
re
.
inst
.
At
(
i
)
.
(
instr
);
if
inst
.
Type
()
==
c
END
{
continue
}
inst
.
SetNext
(
UnNop
(
inst
.
Next
()));
if
inst
.
Type
()
==
ALT
{
alt
:=
inst
.
(
*
Alt
);
if
inst
.
Type
()
==
c
ALT
{
alt
:=
inst
.
(
*
i
Alt
);
alt
.
left
=
UnNop
(
alt
.
left
);
}
}
}
func
(
re
*
RE
)
Dump
()
{
func
(
re
*
regExp
)
Dump
()
{
for
i
:=
0
;
i
<
re
.
inst
.
Len
();
i
++
{
inst
:=
re
.
inst
.
At
(
i
)
.
(
Inst
);
inst
:=
re
.
inst
.
At
(
i
)
.
(
instr
);
print
(
inst
.
Index
(),
": "
);
inst
.
Print
();
if
inst
.
Type
()
!=
END
{
if
inst
.
Type
()
!=
c
END
{
print
(
" -> "
,
inst
.
Next
()
.
Index
())
}
print
(
"
\n
"
);
}
}
func
(
re
*
RE
)
DoParse
()
{
p
arser
:=
N
ewParser
(
re
);
start
:=
new
(
Start
);
func
(
re
*
regExp
)
DoParse
()
{
p
:=
n
ewParser
(
re
);
start
:=
new
(
i
Start
);
re
.
Add
(
start
);
s
,
e
:=
p
arser
.
Regexp
();
s
,
e
:=
p
.
Regexp
();
start
.
next
=
s
;
re
.
start
=
start
;
e
.
SetNext
(
re
.
Add
(
new
(
End
)));
e
.
SetNext
(
re
.
Add
(
new
(
i
End
)));
if
debug
{
re
.
Dump
();
...
...
@@ -571,8 +571,8 @@ func (re *RE) DoParse() {
}
func
Compiler
(
str
string
,
ch
chan
*
RE
)
{
re
:=
new
(
RE
);
func
Compiler
(
str
string
,
ch
chan
*
regExp
)
{
re
:=
new
(
regExp
);
re
.
expr
=
str
;
re
.
inst
=
array
.
New
(
0
);
re
.
ch
=
ch
;
...
...
@@ -589,20 +589,20 @@ export type Regexp interface {
// Compile in separate goroutine; wait for result
export
func
Compile
(
str
string
)
(
regexp
Regexp
,
error
*
os
.
Error
)
{
ch
:=
make
(
chan
*
RE
);
ch
:=
make
(
chan
*
regExp
);
go
Compiler
(
str
,
ch
);
re
:=
<-
ch
;
return
re
,
re
.
error
}
type
S
tate
struct
{
inst
Inst
;
// next instruction to execute
type
s
tate
struct
{
inst
instr
;
// next instruction to execute
match
[]
int
;
// pairs of bracketing submatches. 0th is start,end
}
// Append new state to to-do list. Leftmost-longest wins so avoid
// adding a state that's already active.
func
AddState
(
s
[]
State
,
inst
Inst
,
match
[]
int
)
[]
S
tate
{
func
addState
(
s
[]
state
,
inst
instr
,
match
[]
int
)
[]
s
tate
{
index
:=
inst
.
Index
();
l
:=
len
(
s
);
pos
:=
match
[
0
];
...
...
@@ -615,7 +615,7 @@ func AddState(s []State, inst Inst, match []int) []State {
}
}
if
l
==
cap
(
s
)
{
s1
:=
make
([]
S
tate
,
2
*
l
)[
0
:
l
];
s1
:=
make
([]
s
tate
,
2
*
l
)[
0
:
l
];
for
i
:=
0
;
i
<
l
;
i
++
{
s1
[
i
]
=
s
[
i
];
}
...
...
@@ -627,12 +627,12 @@ func AddState(s []State, inst Inst, match []int) []State {
return
s
;
}
func
(
re
*
RE
)
DoExecute
(
str
string
,
pos
int
)
[]
int
{
var
s
[
2
][]
State
;
// TODO: use a vector when S
tate values (not ptrs) can be vector elements
s
[
0
]
=
make
([]
S
tate
,
10
)[
0
:
0
];
s
[
1
]
=
make
([]
S
tate
,
10
)[
0
:
0
];
func
(
re
*
regExp
)
DoExecute
(
str
string
,
pos
int
)
[]
int
{
var
s
[
2
][]
state
;
// TODO: use a vector when s
tate values (not ptrs) can be vector elements
s
[
0
]
=
make
([]
s
tate
,
10
)[
0
:
0
];
s
[
1
]
=
make
([]
s
tate
,
10
)[
0
:
0
];
in
,
out
:=
0
,
1
;
var
final
S
tate
;
var
final
s
tate
;
found
:=
false
;
for
pos
<=
len
(
str
)
{
if
!
found
{
...
...
@@ -642,7 +642,7 @@ func (re *RE) DoExecute(str string, pos int) []int {
match
[
i
]
=
-
1
;
// no match seen; catches cases like "a(b)?c" on "ac"
}
match
[
0
]
=
pos
;
s
[
out
]
=
A
ddState
(
s
[
out
],
re
.
start
.
Next
(),
match
);
s
[
out
]
=
a
ddState
(
s
[
out
],
re
.
start
.
Next
(),
match
);
}
in
,
out
=
out
,
in
;
// old out state is new in state
s
[
out
]
=
s
[
out
][
0
:
0
];
// clear out state
...
...
@@ -651,60 +651,60 @@ func (re *RE) DoExecute(str string, pos int) []int {
break
;
}
charwidth
:=
1
;
c
:=
EOF
;
c
:=
endOfFile
;
if
pos
<
len
(
str
)
{
c
,
charwidth
=
sys
.
stringtorune
(
str
,
pos
);
}
for
i
:=
0
;
i
<
len
(
s
[
in
]);
i
++
{
st
ate
:=
s
[
in
][
i
];
st
:=
s
[
in
][
i
];
switch
s
[
in
][
i
]
.
inst
.
Type
()
{
case
BOT
:
case
c
BOT
:
if
pos
==
0
{
s
[
in
]
=
AddState
(
s
[
in
],
state
.
inst
.
Next
(),
state
.
match
)
s
[
in
]
=
addState
(
s
[
in
],
st
.
inst
.
Next
(),
st
.
match
)
}
case
EOT
:
case
c
EOT
:
if
pos
==
len
(
str
)
{
s
[
in
]
=
AddState
(
s
[
in
],
state
.
inst
.
Next
(),
state
.
match
)
}
case
CHAR
:
if
c
==
st
ate
.
inst
.
(
*
Char
)
.
char
{
s
[
out
]
=
AddState
(
s
[
out
],
state
.
inst
.
Next
(),
state
.
match
)
}
case
CHARCLASS
:
if
st
ate
.
inst
.
(
*
CharClass
)
.
Matches
(
c
)
{
s
[
out
]
=
AddState
(
s
[
out
],
state
.
inst
.
Next
(),
state
.
match
)
}
case
ANY
:
if
c
!=
EOF
{
s
[
out
]
=
AddState
(
s
[
out
],
state
.
inst
.
Next
(),
state
.
match
)
}
case
BRA
:
n
:=
st
ate
.
inst
.
(
*
Bra
)
.
n
;
st
ate
.
match
[
2
*
n
]
=
pos
;
s
[
in
]
=
AddState
(
s
[
in
],
state
.
inst
.
Next
(),
state
.
match
);
case
EBRA
:
n
:=
st
ate
.
inst
.
(
*
Ebra
)
.
n
;
st
ate
.
match
[
2
*
n
+
1
]
=
pos
;
s
[
in
]
=
AddState
(
s
[
in
],
state
.
inst
.
Next
(),
state
.
match
);
case
ALT
:
s
[
in
]
=
AddState
(
s
[
in
],
state
.
inst
.
(
*
Alt
)
.
left
,
state
.
match
);
s
[
in
]
=
addState
(
s
[
in
],
st
.
inst
.
Next
(),
st
.
match
)
}
case
c
CHAR
:
if
c
==
st
.
inst
.
(
*
i
Char
)
.
char
{
s
[
out
]
=
addState
(
s
[
out
],
st
.
inst
.
Next
(),
st
.
match
)
}
case
c
CHARCLASS
:
if
st
.
inst
.
(
*
i
CharClass
)
.
Matches
(
c
)
{
s
[
out
]
=
addState
(
s
[
out
],
st
.
inst
.
Next
(),
st
.
match
)
}
case
c
ANY
:
if
c
!=
endOfFile
{
s
[
out
]
=
addState
(
s
[
out
],
st
.
inst
.
Next
(),
st
.
match
)
}
case
c
BRA
:
n
:=
st
.
inst
.
(
*
i
Bra
)
.
n
;
st
.
match
[
2
*
n
]
=
pos
;
s
[
in
]
=
addState
(
s
[
in
],
st
.
inst
.
Next
(),
st
.
match
);
case
c
EBRA
:
n
:=
st
.
inst
.
(
*
i
Ebra
)
.
n
;
st
.
match
[
2
*
n
+
1
]
=
pos
;
s
[
in
]
=
addState
(
s
[
in
],
st
.
inst
.
Next
(),
st
.
match
);
case
c
ALT
:
s
[
in
]
=
addState
(
s
[
in
],
st
.
inst
.
(
*
iAlt
)
.
left
,
st
.
match
);
// give other branch a copy of this match vector
s1
:=
make
([]
int
,
2
*
(
re
.
nbra
+
1
));
for
i
:=
0
;
i
<
len
(
s1
);
i
++
{
s1
[
i
]
=
st
ate
.
match
[
i
]
s1
[
i
]
=
st
.
match
[
i
]
}
s
[
in
]
=
AddState
(
s
[
in
],
state
.
inst
.
Next
(),
s1
);
case
END
:
s
[
in
]
=
addState
(
s
[
in
],
st
.
inst
.
Next
(),
s1
);
case
c
END
:
// choose leftmost longest
if
!
found
||
// first
st
ate
.
match
[
0
]
<
final
.
match
[
0
]
||
// leftmost
(
st
ate
.
match
[
0
]
==
final
.
match
[
0
]
&&
pos
>
final
.
match
[
1
])
{
// longest
final
=
st
ate
;
st
.
match
[
0
]
<
final
.
match
[
0
]
||
// leftmost
(
st
.
match
[
0
]
==
final
.
match
[
0
]
&&
pos
>
final
.
match
[
1
])
{
// longest
final
=
st
;
final
.
match
[
1
]
=
pos
;
}
found
=
true
;
default
:
st
ate
.
inst
.
Print
();
st
.
inst
.
Print
();
panic
(
"unknown instruction in execute"
);
}
}
...
...
@@ -714,17 +714,17 @@ func (re *RE) DoExecute(str string, pos int) []int {
}
func
(
re
*
RE
)
Execute
(
s
string
)
[]
int
{
func
(
re
*
regExp
)
Execute
(
s
string
)
[]
int
{
return
re
.
DoExecute
(
s
,
0
)
}
func
(
re
*
RE
)
Match
(
s
string
)
bool
{
func
(
re
*
regExp
)
Match
(
s
string
)
bool
{
return
len
(
re
.
DoExecute
(
s
,
0
))
>
0
}
func
(
re
*
RE
)
MatchStrings
(
s
string
)
[]
string
{
func
(
re
*
regExp
)
MatchStrings
(
s
string
)
[]
string
{
r
:=
re
.
DoExecute
(
s
,
0
);
if
r
==
nil
{
return
nil
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment