Commit d2a6098e authored by Nigel Tao's avatar Nigel Tao

exp/html/atom: faster, hash-based lookup.

exp/html/atom benchmark:
benchmark          old ns/op    new ns/op    delta
BenchmarkLookup       199226        80770  -59.46%

exp/html benchmark:
benchmark                      old ns/op    new ns/op    delta
BenchmarkParser                  4864890      4510834   -7.28%
BenchmarkHighLevelTokenizer      2209192      1969684  -10.84%
benchmark                       old MB/s     new MB/s  speedup
BenchmarkParser                    16.07        17.33    1.08x
BenchmarkHighLevelTokenizer        35.38        39.68    1.12x

R=r
CC=golang-dev
https://golang.org/cl/6261054
parent baf91c31
...@@ -6,33 +6,40 @@ ...@@ -6,33 +6,40 @@
// frequently occurring HTML strings: lower-case tag names and attribute keys // frequently occurring HTML strings: lower-case tag names and attribute keys
// such as "p" and "id". // such as "p" and "id".
// //
// Sharing an atom's string representation between all elements with the same // Sharing an atom's name between all elements with the same tag can result in
// tag can result in fewer string allocations when tokenizing and parsing HTML. // fewer string allocations when tokenizing and parsing HTML. Integer
// Integer comparisons are also generally faster than string comparisons. // comparisons are also generally faster than string comparisons.
// //
// An atom's particular code (such as atom.Div == 63) is not guaranteed to // The value of an atom's particular code is not guaranteed to stay the same
// stay the same between versions of this package. Neither is any ordering // between versions of this package. Neither is any ordering guaranteed:
// guaranteed: whether atom.H1 < atom.H2 may also change. The codes are not // whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
// guaranteed to be dense. The only guarantees are that e.g. looking up "div" // be dense. The only guarantees are that e.g. looking up "div" will yield
// will yield atom.Div, calling atom.Div.String will return "div", and // atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
// atom.Div != 0.
package atom package atom
// The hash function must be the same as the one used in gen.go
func hash(s []byte) (h uint32) {
for i := 0; i < len(s); i++ {
h = h<<5 ^ h>>27 ^ uint32(s[i])
}
return h
}
// Atom is an integer code for a string. The zero value maps to "". // Atom is an integer code for a string. The zero value maps to "".
type Atom int type Atom int
// String returns the atom's string representation. // String returns the atom's name.
func (a Atom) String() string { func (a Atom) String() string {
if a <= 0 || a > max { if 0 <= a && a < Atom(len(table)) {
return "" return table[a]
} }
return table[a] return ""
} }
// Lookup returns the atom whose name is s. It returns zero if there is no // Lookup returns the atom whose name is s. It returns zero if there is no
// such atom. // such atom.
func Lookup(s []byte) Atom { func Lookup(s []byte) Atom {
if len(s) == 0 { if len(s) == 0 || len(s) > maxLen {
return 0 return 0
} }
if len(s) == 1 { if len(s) == 1 {
...@@ -42,15 +49,25 @@ func Lookup(s []byte) Atom { ...@@ -42,15 +49,25 @@ func Lookup(s []byte) Atom {
} }
return oneByteAtoms[x-'a'] return oneByteAtoms[x-'a']
} }
// Binary search for the atom. Unlike sort.Search, this returns early on an exact match. hs := hash(s)
// TODO: this could be optimized further. For example, lo and hi could be initialized // Binary search for hs. Unlike sort.Search, this returns early on an exact match.
// from s[0]. Separately, all the "onxxx" atoms could be moved into their own table. // A loop invariant is that len(table[i]) == len(s) for all i in [lo, hi).
lo, hi := Atom(1), 1+max lo := Atom(loHi[len(s)])
hi := Atom(loHi[len(s)+1])
for lo < hi { for lo < hi {
mid := (lo + hi) / 2 mid := (lo + hi) / 2
if cmp := compare(s, table[mid]); cmp == 0 { if ht := hashes[mid]; hs == ht {
// The gen.go program ensures that each atom's name has a distinct hash.
// However, arbitrary strings may collide with the atom's name. We have
// to check that string(s) == table[mid].
t := table[mid]
for i, si := range s {
if si != t[i] {
return 0
}
}
return mid return mid
} else if cmp > 0 { } else if hs > ht {
lo = mid + 1 lo = mid + 1
} else { } else {
hi = mid hi = mid
...@@ -67,22 +84,3 @@ func String(s []byte) string { ...@@ -67,22 +84,3 @@ func String(s []byte) string {
} }
return string(s) return string(s)
} }
// compare is like bytes.Compare, except that it takes one []byte argument and
// one string argument, and returns negative/0/positive instead of -1/0/+1.
func compare(s []byte, t string) int {
n := len(s)
if n > len(t) {
n = len(t)
}
for i, si := range s[:n] {
ti := t[i]
switch {
case si > ti:
return +1
case si < ti:
return -1
}
}
return len(s) - len(t)
}
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
package atom package atom
import ( import (
"sort"
"testing" "testing"
) )
...@@ -42,6 +43,8 @@ func TestMisses(t *testing.T) { ...@@ -42,6 +43,8 @@ func TestMisses(t *testing.T) {
"h7", "h7",
"onClick", "onClick",
"λ", "λ",
// The following string has the same hash (0xa1d7fab7) as "onmouseover".
"\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
} }
for _, tc := range testCases { for _, tc := range testCases {
got := Lookup([]byte(tc)) got := Lookup([]byte(tc))
...@@ -50,3 +53,21 @@ func TestMisses(t *testing.T) { ...@@ -50,3 +53,21 @@ func TestMisses(t *testing.T) {
} }
} }
} }
func BenchmarkLookup(b *testing.B) {
sortedTable := make([]string, len(table))
copy(sortedTable, table[:])
sort.Strings(sortedTable)
x := make([][]byte, 1000)
for i := range x {
x[i] = []byte(sortedTable[i%len(sortedTable)])
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, s := range x {
Lookup(s)
}
}
}
...@@ -13,9 +13,30 @@ package main ...@@ -13,9 +13,30 @@ package main
import ( import (
"fmt" "fmt"
"os"
"sort" "sort"
) )
// The hash function must be the same as the one used in atom.go
func hash(s string) (h uint32) {
for i := 0; i < len(s); i++ {
h = h<<5 ^ h>>27 ^ uint32(s[i])
}
return h
}
// lhash returns a uint64 whose high 32 bits are len(s) and whose low 32 bits
// are hash(s).
func lhash(s string) uint64 {
return uint64(len(s))<<32 | uint64(hash(s))
}
type byLhash []string
func (b byLhash) Len() int { return len(b) }
func (b byLhash) Less(i, j int) bool { return lhash(b[i]) < lhash(b[j]) }
func (b byLhash) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
// identifier converts s to a Go exported identifier. // identifier converts s to a Go exported identifier.
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset". // It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
func identifier(s string) string { func identifier(s string) string {
...@@ -36,43 +57,84 @@ func identifier(s string) string { ...@@ -36,43 +57,84 @@ func identifier(s string) string {
} }
func main() { func main() {
m := map[string]bool{ // Construct a list of atoms, sorted by their lhash.
m0 := map[string]bool{
"": true, "": true,
} }
for _, list := range [][]string{elements, attributes, eventHandlers, extra} { for _, list := range [][]string{elements, attributes, eventHandlers, extra} {
for _, s := range list { for _, s := range list {
m[s] = true m0[s] = true
} }
} }
atoms := make([]string, 0, len(m)) atoms := make([]string, 0, len(m0))
for s := range m { for s := range m0 {
atoms = append(atoms, s) atoms = append(atoms, s)
} }
sort.Strings(atoms) sort.Sort(byLhash(atoms))
// Calculate the magic constants to output as table.go.
byInt := []string{} byInt := []string{}
byStr := map[string]int{} byStr := map[string]int{}
ident := []string{} ident := []string{}
lhashes := []uint64{}
maxLen := 0
for i, s := range atoms { for i, s := range atoms {
byInt = append(byInt, s) byInt = append(byInt, s)
byStr[s] = i byStr[s] = i
ident = append(ident, identifier(s)) ident = append(ident, identifier(s))
lhashes = append(lhashes, lhash(s))
if maxLen < len(s) {
maxLen = len(s)
}
}
// Check for hash collisions.
m1 := map[uint64]int{}
for i, h := range lhashes {
h &= 1<<32 - 1
if j, ok := m1[h]; ok {
fmt.Fprintf(os.Stderr, "hash collision at 0x%08x: %q, %q\n", h, byInt[i], byInt[j])
os.Exit(1)
}
m1[h] = i
} }
// Generate the Go code.
fmt.Printf("package atom\n\nconst (\n") fmt.Printf("package atom\n\nconst (\n")
for i, _ := range byInt { {
if i == 0 { // Print the Atoms in alphabetical order.
continue lines := []string{}
for i, _ := range byInt {
if i == 0 {
continue
}
lines = append(lines, fmt.Sprintf("\t%s Atom = %d", ident[i], i))
} }
fmt.Printf("\t%s Atom = %d\n", ident[i], i) sort.Strings(lines)
for _, line := range lines {
fmt.Println(line)
}
fmt.Printf(")\n\n")
} }
fmt.Printf(")\n\n") fmt.Printf("const maxLen = %d\n\n", maxLen)
fmt.Printf("const max Atom = %d\n\n", len(byInt)-1) fmt.Printf("var table = [...]string{\n")
fmt.Printf("var table = []string{\n")
for _, s := range byInt { for _, s := range byInt {
fmt.Printf("\t%q,\n", s) fmt.Printf("\t%q,\n", s)
} }
fmt.Printf("}\n\n") fmt.Printf("}\n\n")
fmt.Printf("var hashes = [...]uint32{\n")
for _, s := range byInt {
fmt.Printf("\t0x%08x,\n", hash(s))
}
fmt.Printf("}\n\n")
fmt.Printf("var loHi = [maxLen + 2]uint16{\n")
for n := 0; n <= maxLen; n++ {
fmt.Printf("\t%d,\n", sort.Search(len(byInt), func(i int) bool {
return int(lhashes[i]>>32) >= n
}))
}
fmt.Printf("\t%d,\n", len(byInt))
fmt.Printf("}\n\n")
fmt.Printf("var oneByteAtoms = [26]Atom{\n") fmt.Printf("var oneByteAtoms = [26]Atom{\n")
for i := 'a'; i <= 'z'; i++ { for i := 'a'; i <= 'z'; i++ {
val := "0" val := "0"
......
...@@ -2,601 +2,921 @@ package atom ...@@ -2,601 +2,921 @@ package atom
const ( const (
A Atom = 1 A Atom = 1
Abbr Atom = 2 Abbr Atom = 58
Accept Atom = 3 Accept Atom = 126
AcceptCharset Atom = 4 AcceptCharset Atom = 288
Accesskey Atom = 5 Accesskey Atom = 230
Action Atom = 6 Action Atom = 127
Address Atom = 7 Address Atom = 182
Align Atom = 8 Align Atom = 91
Alt Atom = 9 Alt Atom = 32
Annotation Atom = 10 Annotation Atom = 251
Applet Atom = 11 Applet Atom = 128
Area Atom = 12 Area Atom = 59
Article Atom = 13 Article Atom = 184
Aside Atom = 14 Aside Atom = 92
Async Atom = 15 Async Atom = 93
Audio Atom = 16 Audio Atom = 94
Autocomplete Atom = 17 Autocomplete Atom = 278
Autofocus Atom = 18 Autofocus Atom = 243
Autoplay Atom = 19 Autoplay Atom = 221
B Atom = 20 B Atom = 2
Base Atom = 21 Base Atom = 56
Bdi Atom = 22 Bdi Atom = 30
Bdo Atom = 23 Bdo Atom = 31
Blockquote Atom = 24 Blockquote Atom = 248
Body Atom = 25 Body Atom = 57
Border Atom = 26 Border Atom = 124
Br Atom = 27 Br Atom = 8
Button Atom = 28 Button Atom = 125
Canvas Atom = 29 Canvas Atom = 121
Caption Atom = 30 Caption Atom = 160
Center Atom = 31 Center Atom = 122
Challenge Atom = 32 Challenge Atom = 242
Charset Atom = 33 Charset Atom = 163
Checked Atom = 34 Checked Atom = 164
Cite Atom = 35 Cite Atom = 53
Class Atom = 36 Class Atom = 90
Code Atom = 37 Code Atom = 54
Col Atom = 38 Col Atom = 29
Colgroup Atom = 39 Colgroup Atom = 193
Color Atom = 40 Color Atom = 89
Cols Atom = 41 Cols Atom = 55
Colspan Atom = 42 Colspan Atom = 167
Command Atom = 43 Command Atom = 166
Content Atom = 44 Content Atom = 165
Contenteditable Atom = 45 Contenteditable Atom = 292
Contextmenu Atom = 46 Contextmenu Atom = 277
Controls Atom = 47 Controls Atom = 192
Coords Atom = 48 Coords Atom = 123
Crossorigin Atom = 49 Crossorigin Atom = 266
Data Atom = 50 Data Atom = 62
Datalist Atom = 51 Datalist Atom = 219
Datetime Atom = 52 Datetime Atom = 220
Dd Atom = 53 Dd Atom = 10
Default Atom = 54 Default Atom = 188
Defer Atom = 55 Defer Atom = 97
Del Atom = 56 Del Atom = 35
Details Atom = 57 Details Atom = 189
Dfn Atom = 58 Dfn Atom = 34
Dialog Atom = 59 Dialog Atom = 131
Dir Atom = 60 Dir Atom = 36
Dirname Atom = 61 Dirname Atom = 190
Disabled Atom = 62 Disabled Atom = 216
Div Atom = 63 Div Atom = 37
Dl Atom = 64 Dl Atom = 11
Download Atom = 65 Download Atom = 195
Draggable Atom = 66 Draggable Atom = 235
Dropzone Atom = 67 Dropzone Atom = 202
Dt Atom = 68 Dt Atom = 12
Em Atom = 69 Em Atom = 9
Embed Atom = 70 Embed Atom = 96
Enctype Atom = 71 Enctype Atom = 183
Fieldset Atom = 72 Fieldset Atom = 212
Figcaption Atom = 73 Figcaption Atom = 247
Figure Atom = 74 Figure Atom = 129
Font Atom = 75 Font Atom = 60
Footer Atom = 76 Footer Atom = 130
For Atom = 77 For Atom = 33
Form Atom = 78 Form Atom = 61
Formaction Atom = 79 Formaction Atom = 256
Formenctype Atom = 80 Formenctype Atom = 273
Formmethod Atom = 81 Formmethod Atom = 261
Formnovalidate Atom = 82 Formnovalidate Atom = 289
Formtarget Atom = 83 Formtarget Atom = 263
Frame Atom = 84 Frame Atom = 95
Frameset Atom = 85 Frameset Atom = 198
H1 Atom = 86 H1 Atom = 13
H2 Atom = 87 H2 Atom = 14
H3 Atom = 88 H3 Atom = 15
H4 Atom = 89 H4 Atom = 16
H5 Atom = 90 H5 Atom = 17
H6 Atom = 91 H6 Atom = 18
Head Atom = 92 Head Atom = 65
Header Atom = 93 Header Atom = 136
Headers Atom = 94 Headers Atom = 187
Height Atom = 95 Height Atom = 137
Hgroup Atom = 96 Hgroup Atom = 135
Hidden Atom = 97 Hidden Atom = 138
High Atom = 98 High Atom = 66
Hr Atom = 99 Hr Atom = 20
Href Atom = 100 Href Atom = 67
Hreflang Atom = 101 Hreflang Atom = 199
Html Atom = 102 Html Atom = 68
HttpEquiv Atom = 103 HttpEquiv Atom = 254
I Atom = 104 I Atom = 3
Icon Atom = 105 Icon Atom = 64
Id Atom = 106 Id Atom = 19
Iframe Atom = 107 Iframe Atom = 133
Img Atom = 108 Img Atom = 40
Inert Atom = 109 Inert Atom = 98
Input Atom = 110 Input Atom = 99
Ins Atom = 111 Ins Atom = 39
Ismap Atom = 112 Ismap Atom = 100
Itemid Atom = 113 Itemid Atom = 134
Itemprop Atom = 114 Itemprop Atom = 223
Itemref Atom = 115 Itemref Atom = 185
Itemscope Atom = 116 Itemscope Atom = 240
Itemtype Atom = 117 Itemtype Atom = 224
Kbd Atom = 118 Kbd Atom = 38
Keygen Atom = 119 Keygen Atom = 132
Keytype Atom = 120 Keytype Atom = 162
Kind Atom = 121 Kind Atom = 63
Label Atom = 122 Label Atom = 104
Lang Atom = 123 Lang Atom = 75
Legend Atom = 124 Legend Atom = 149
Li Atom = 125 Li Atom = 22
Link Atom = 126 Link Atom = 76
List Atom = 127 List Atom = 77
Loop Atom = 128 Loop Atom = 78
Low Atom = 129 Low Atom = 45
Manifest Atom = 130 Manifest Atom = 213
Map Atom = 131 Map Atom = 42
Mark Atom = 132 Mark Atom = 72
Max Atom = 133 Max Atom = 43
Maxlength Atom = 134 Maxlength Atom = 245
Media Atom = 135 Media Atom = 101
Mediagroup Atom = 136 Mediagroup Atom = 257
Menu Atom = 137 Menu Atom = 73
Meta Atom = 138 Meta Atom = 74
Meter Atom = 139 Meter Atom = 102
Method Atom = 140 Method Atom = 148
Min Atom = 141 Min Atom = 44
Multiple Atom = 142 Multiple Atom = 215
Muted Atom = 143 Muted Atom = 103
Name Atom = 144 Name Atom = 70
Nav Atom = 145 Nav Atom = 41
Nobr Atom = 146 Nobr Atom = 71
Noscript Atom = 147 Noscript Atom = 194
Novalidate Atom = 148 Novalidate Atom = 262
Object Atom = 149 Object Atom = 139
Ol Atom = 150 Ol Atom = 21
Onabort Atom = 151 Onabort Atom = 170
Onafterprint Atom = 152 Onafterprint Atom = 280
Onbeforeprint Atom = 153 Onbeforeprint Atom = 287
Onbeforeunload Atom = 154 Onbeforeunload Atom = 291
Onblur Atom = 155 Onblur Atom = 140
Oncancel Atom = 156 Oncancel Atom = 196
Oncanplay Atom = 157 Oncanplay Atom = 227
Oncanplaythrough Atom = 158 Oncanplaythrough Atom = 295
Onchange Atom = 159 Onchange Atom = 197
Onclick Atom = 160 Onclick Atom = 168
Onclose Atom = 161 Onclose Atom = 169
Oncontextmenu Atom = 162 Oncontextmenu Atom = 286
Oncuechange Atom = 163 Oncuechange Atom = 267
Ondblclick Atom = 164 Ondblclick Atom = 255
Ondrag Atom = 165 Ondrag Atom = 141
Ondragend Atom = 166 Ondragend Atom = 246
Ondragenter Atom = 167 Ondragenter Atom = 270
Ondragleave Atom = 168 Ondragleave Atom = 269
Ondragover Atom = 169 Ondragover Atom = 252
Ondragstart Atom = 170 Ondragstart Atom = 268
Ondrop Atom = 171 Ondrop Atom = 142
Ondurationchange Atom = 172 Ondurationchange Atom = 293
Onemptied Atom = 173 Onemptied Atom = 241
Onended Atom = 174 Onended Atom = 172
Onerror Atom = 175 Onerror Atom = 173
Onfocus Atom = 176 Onfocus Atom = 171
Onhashchange Atom = 177 Onhashchange Atom = 283
Oninput Atom = 178 Oninput Atom = 175
Oninvalid Atom = 179 Oninvalid Atom = 239
Onkeydown Atom = 180 Onkeydown Atom = 231
Onkeypress Atom = 181 Onkeypress Atom = 264
Onkeyup Atom = 182 Onkeyup Atom = 174
Onload Atom = 183 Onload Atom = 143
Onloadeddata Atom = 184 Onloadeddata Atom = 284
Onloadedmetadata Atom = 185 Onloadedmetadata Atom = 294
Onloadstart Atom = 186 Onloadstart Atom = 271
Onmessage Atom = 187 Onmessage Atom = 236
Onmousedown Atom = 188 Onmousedown Atom = 274
Onmousemove Atom = 189 Onmousemove Atom = 275
Onmouseout Atom = 190 Onmouseout Atom = 250
Onmouseover Atom = 191 Onmouseover Atom = 276
Onmouseup Atom = 192 Onmouseup Atom = 237
Onmousewheel Atom = 193 Onmousewheel Atom = 279
Onoffline Atom = 194 Onoffline Atom = 228
Ononline Atom = 195 Ononline Atom = 201
Onpagehide Atom = 196 Onpagehide Atom = 259
Onpageshow Atom = 197 Onpageshow Atom = 258
Onpause Atom = 198 Onpause Atom = 177
Onplay Atom = 199 Onplay Atom = 145
Onplaying Atom = 200 Onplaying Atom = 244
Onpopstate Atom = 201 Onpopstate Atom = 249
Onprogress Atom = 202 Onprogress Atom = 253
Onratechange Atom = 203 Onratechange Atom = 282
Onreset Atom = 204 Onreset Atom = 176
Onresize Atom = 205 Onresize Atom = 207
Onscroll Atom = 206 Onscroll Atom = 203
Onseeked Atom = 207 Onseeked Atom = 204
Onseeking Atom = 208 Onseeking Atom = 229
Onselect Atom = 209 Onselect Atom = 205
Onshow Atom = 210 Onshow Atom = 144
Onstalled Atom = 211 Onstalled Atom = 233
Onstorage Atom = 212 Onstorage Atom = 234
Onsubmit Atom = 213 Onsubmit Atom = 206
Onsuspend Atom = 214 Onsuspend Atom = 232
Ontimeupdate Atom = 215 Ontimeupdate Atom = 281
Onunload Atom = 216 Onunload Atom = 208
Onvolumechange Atom = 217 Onvolumechange Atom = 290
Onwaiting Atom = 218 Onwaiting Atom = 226
Open Atom = 219 Open Atom = 69
Optgroup Atom = 220 Optgroup Atom = 225
Optimum Atom = 221 Optimum Atom = 179
Option Atom = 222 Option Atom = 146
Output Atom = 223 Output Atom = 147
P Atom = 224 P Atom = 4
Param Atom = 225 Param Atom = 111
Pattern Atom = 226 Pattern Atom = 186
Ping Atom = 227 Ping Atom = 85
Placeholder Atom = 228 Placeholder Atom = 272
Poster Atom = 229 Poster Atom = 156
Pre Atom = 230 Pre Atom = 50
Preload Atom = 231 Preload Atom = 191
Progress Atom = 232 Progress Atom = 200
Q Atom = 233 Q Atom = 5
Radiogroup Atom = 234 Radiogroup Atom = 260
Readonly Atom = 235 Readonly Atom = 210
Rel Atom = 236 Rel Atom = 49
Required Atom = 237 Required Atom = 217
Reversed Atom = 238 Reversed Atom = 218
Rows Atom = 239 Rows Atom = 83
Rowspan Atom = 240 Rowspan Atom = 181
Rp Atom = 241 Rp Atom = 23
Rt Atom = 242 Rt Atom = 24
Ruby Atom = 243 Ruby Atom = 84
S Atom = 244 S Atom = 6
Samp Atom = 245 Samp Atom = 79
Sandbox Atom = 246 Sandbox Atom = 159
Scope Atom = 247 Scope Atom = 105
Scoped Atom = 248 Scoped Atom = 150
Script Atom = 249 Script Atom = 151
Seamless Atom = 250 Seamless Atom = 211
Section Atom = 251 Section Atom = 161
Select Atom = 252 Select Atom = 152
Selected Atom = 253 Selected Atom = 214
Shape Atom = 254 Shape Atom = 107
Size Atom = 255 Size Atom = 80
Sizes Atom = 256 Sizes Atom = 106
Small Atom = 257 Small Atom = 108
Source Atom = 258 Source Atom = 153
Span Atom = 259 Span Atom = 81
Spellcheck Atom = 260 Spellcheck Atom = 265
Src Atom = 261 Src Atom = 46
Srcdoc Atom = 262 Srcdoc Atom = 154
Srclang Atom = 263 Srclang Atom = 178
Start Atom = 264 Start Atom = 109
Step Atom = 265 Step Atom = 82
Strong Atom = 266 Strong Atom = 155
Style Atom = 267 Style Atom = 110
Sub Atom = 268 Sub Atom = 47
Summary Atom = 269 Summary Atom = 180
Sup Atom = 270 Sup Atom = 48
Tabindex Atom = 271 Tabindex Atom = 209
Table Atom = 272 Table Atom = 116
Target Atom = 273 Target Atom = 158
Tbody Atom = 274 Tbody Atom = 115
Td Atom = 275 Td Atom = 26
Textarea Atom = 276 Textarea Atom = 222
Tfoot Atom = 277 Tfoot Atom = 117
Th Atom = 278 Th Atom = 27
Thead Atom = 279 Thead Atom = 119
Time Atom = 280 Time Atom = 87
Title Atom = 281 Title Atom = 118
Tr Atom = 282 Tr Atom = 28
Track Atom = 283 Track Atom = 120
Translate Atom = 284 Translate Atom = 238
Type Atom = 285 Type Atom = 88
Typemustmatch Atom = 286 Typemustmatch Atom = 285
U Atom = 287 U Atom = 7
Ul Atom = 288 Ul Atom = 25
Usemap Atom = 289 Usemap Atom = 157
Value Atom = 290 Value Atom = 113
Var Atom = 291 Var Atom = 52
Video Atom = 292 Video Atom = 114
Wbr Atom = 293 Wbr Atom = 51
Width Atom = 294 Width Atom = 112
Wrap Atom = 295 Wrap Atom = 86
) )
const max Atom = 295 const maxLen = 16
var table = []string{ var table = [...]string{
"", "",
"a", "a",
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"address",
"align",
"alt",
"annotation",
"applet",
"area",
"article",
"aside",
"async",
"audio",
"autocomplete",
"autofocus",
"autoplay",
"b", "b",
"base", "i",
"bdi", "p",
"bdo", "q",
"blockquote", "s",
"body", "u",
"border",
"br", "br",
"button", "em",
"canvas",
"caption",
"center",
"challenge",
"charset",
"checked",
"cite",
"class",
"code",
"col",
"colgroup",
"color",
"cols",
"colspan",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"datalist",
"datetime",
"dd", "dd",
"default",
"defer",
"del",
"details",
"dfn",
"dialog",
"dir",
"dirname",
"disabled",
"div",
"dl", "dl",
"download",
"draggable",
"dropzone",
"dt", "dt",
"em",
"embed",
"enctype",
"fieldset",
"figcaption",
"figure",
"font",
"footer",
"for",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"frame",
"frameset",
"h1", "h1",
"h2", "h2",
"h3", "h3",
"h4", "h4",
"h5", "h5",
"h6", "h6",
"id",
"hr",
"ol",
"li",
"rp",
"rt",
"ul",
"td",
"th",
"tr",
"col",
"bdi",
"bdo",
"alt",
"for",
"dfn",
"del",
"dir",
"div",
"kbd",
"ins",
"img",
"nav",
"map",
"max",
"min",
"low",
"src",
"sub",
"sup",
"rel",
"pre",
"wbr",
"var",
"cite",
"code",
"cols",
"base",
"body",
"abbr",
"area",
"font",
"form",
"data",
"kind",
"icon",
"head", "head",
"header",
"headers",
"height",
"hgroup",
"hidden",
"high", "high",
"hr",
"href", "href",
"hreflang",
"html", "html",
"http-equiv", "open",
"i", "name",
"icon", "nobr",
"id", "mark",
"iframe", "menu",
"img", "meta",
"inert",
"input",
"ins",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"kbd",
"keygen",
"keytype",
"kind",
"label",
"lang", "lang",
"legend",
"li",
"link", "link",
"list", "list",
"loop", "loop",
"low", "samp",
"manifest", "size",
"map", "span",
"mark", "step",
"max", "rows",
"maxlength", "ruby",
"ping",
"wrap",
"time",
"type",
"color",
"class",
"align",
"aside",
"async",
"audio",
"frame",
"embed",
"defer",
"inert",
"input",
"ismap",
"media", "media",
"mediagroup",
"menu",
"meta",
"meter", "meter",
"method",
"min",
"multiple",
"muted", "muted",
"name", "label",
"nav", "scope",
"nobr", "sizes",
"noscript", "shape",
"novalidate", "small",
"start",
"style",
"param",
"width",
"value",
"video",
"tbody",
"table",
"tfoot",
"title",
"thead",
"track",
"canvas",
"center",
"coords",
"border",
"button",
"accept",
"action",
"applet",
"figure",
"footer",
"dialog",
"keygen",
"iframe",
"itemid",
"hgroup",
"header",
"height",
"hidden",
"object", "object",
"ol",
"onabort",
"onafterprint",
"onbeforeprint",
"onbeforeunload",
"onblur", "onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncuechange",
"ondblclick",
"ondrag", "ondrag",
"ondragend",
"ondragenter",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop", "ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onload", "onload",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmessage",
"onmousedown",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onseeked",
"onseeking",
"onselect",
"onshow", "onshow",
"onstalled", "onplay",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"onunload",
"onvolumechange",
"onwaiting",
"open",
"optgroup",
"optimum",
"option", "option",
"output", "output",
"p", "method",
"param", "legend",
"pattern",
"ping",
"placeholder",
"poster",
"pre",
"preload",
"progress",
"q",
"radiogroup",
"readonly",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"rp",
"rt",
"ruby",
"s",
"samp",
"sandbox",
"scope",
"scoped", "scoped",
"script", "script",
"seamless",
"section",
"select", "select",
"selected",
"shape",
"size",
"sizes",
"small",
"source", "source",
"span",
"spellcheck",
"src",
"srcdoc", "srcdoc",
"srclang",
"start",
"step",
"strong", "strong",
"style", "poster",
"sub", "usemap",
"target",
"sandbox",
"caption",
"section",
"keytype",
"charset",
"checked",
"content",
"command",
"colspan",
"onclick",
"onclose",
"onabort",
"onfocus",
"onended",
"onerror",
"onkeyup",
"oninput",
"onreset",
"onpause",
"srclang",
"optimum",
"summary", "summary",
"sup", "rowspan",
"address",
"enctype",
"article",
"itemref",
"pattern",
"headers",
"default",
"details",
"dirname",
"preload",
"controls",
"colgroup",
"noscript",
"download",
"oncancel",
"onchange",
"frameset",
"hreflang",
"progress",
"ononline",
"dropzone",
"onscroll",
"onseeked",
"onselect",
"onsubmit",
"onresize",
"onunload",
"tabindex", "tabindex",
"table", "readonly",
"target", "seamless",
"tbody", "fieldset",
"td", "manifest",
"selected",
"multiple",
"disabled",
"required",
"reversed",
"datalist",
"datetime",
"autoplay",
"textarea", "textarea",
"tfoot", "itemprop",
"th", "itemtype",
"thead", "optgroup",
"time", "onwaiting",
"title", "oncanplay",
"tr", "onoffline",
"track", "onseeking",
"accesskey",
"onkeydown",
"onsuspend",
"onstalled",
"onstorage",
"draggable",
"onmessage",
"onmouseup",
"translate", "translate",
"type", "oninvalid",
"itemscope",
"onemptied",
"challenge",
"autofocus",
"onplaying",
"maxlength",
"ondragend",
"figcaption",
"blockquote",
"onpopstate",
"onmouseout",
"annotation",
"ondragover",
"onprogress",
"http-equiv",
"ondblclick",
"formaction",
"mediagroup",
"onpageshow",
"onpagehide",
"radiogroup",
"formmethod",
"novalidate",
"formtarget",
"onkeypress",
"spellcheck",
"crossorigin",
"oncuechange",
"ondragstart",
"ondragleave",
"ondragenter",
"onloadstart",
"placeholder",
"formenctype",
"onmousedown",
"onmousemove",
"onmouseover",
"contextmenu",
"autocomplete",
"onmousewheel",
"onafterprint",
"ontimeupdate",
"onratechange",
"onhashchange",
"onloadeddata",
"typemustmatch", "typemustmatch",
"u", "oncontextmenu",
"ul", "onbeforeprint",
"usemap", "accept-charset",
"value", "formnovalidate",
"var", "onvolumechange",
"video", "onbeforeunload",
"wbr", "contenteditable",
"width", "ondurationchange",
"wrap", "onloadedmetadata",
"oncanplaythrough",
}
var hashes = [...]uint32{
0x00000000,
0x00000061,
0x00000062,
0x00000069,
0x00000070,
0x00000071,
0x00000073,
0x00000075,
0x00000c32,
0x00000ccd,
0x00000ce4,
0x00000cec,
0x00000cf4,
0x00000d31,
0x00000d32,
0x00000d33,
0x00000d34,
0x00000d35,
0x00000d36,
0x00000d44,
0x00000d72,
0x00000d8c,
0x00000de9,
0x00000e30,
0x00000e34,
0x00000ecc,
0x00000ee4,
0x00000ee8,
0x00000ef2,
0x0001818c,
0x000184e9,
0x000184ef,
0x000189f4,
0x00019592,
0x00019cae,
0x00019ccc,
0x00019d52,
0x00019d56,
0x0001a024,
0x0001a9b3,
0x0001a9c7,
0x0001b456,
0x0001b850,
0x0001b858,
0x0001b94e,
0x0001bd97,
0x0001c223,
0x0001c2c2,
0x0001c2d0,
0x0001c4cc,
0x0001ce25,
0x0001d032,
0x0001d452,
0x00302ae5,
0x003030e5,
0x003031f3,
0x00308a05,
0x0030b0f9,
0x00310432,
0x003144c1,
0x0032b1b4,
0x0032b22d,
0x00338ae1,
0x003429a4,
0x0035018e,
0x00359844,
0x0035a888,
0x0035c4c6,
0x0035ddcc,
0x00364cce,
0x003689c5,
0x0036b032,
0x00370a2b,
0x003719b5,
0x00371ae1,
0x003789a7,
0x0037a9ab,
0x0037aa14,
0x0037b190,
0x003809d0,
0x00382b25,
0x00384c4e,
0x00385cd0,
0x0038b293,
0x0038d839,
0x0039a9a7,
0x003a4450,
0x003ba9c5,
0x003bea65,
0x06063d92,
0x06078a13,
0x0627a88e,
0x062828e5,
0x062869a3,
0x062b1d4f,
0x065889c5,
0x066704c4,
0x067314d2,
0x06a69a34,
0x06a6ced4,
0x06a83850,
0x06e31d41,
0x06e35cd2,
0x06eb5cc4,
0x06f104cc,
0x07003265,
0x070564d3,
0x07058a65,
0x070709ec,
0x070b8a34,
0x070be9e5,
0x0731444d,
0x07451ee8,
0x07513ec5,
0x07551ccf,
0x0770b0f9,
0x077105e5,
0x0772b194,
0x07755de5,
0x07759844,
0x0778880b,
0xc026d453,
0xc066dcd2,
0xc0c644f3,
0xc2c89cd2,
0xc36bdd8e,
0xc4001a74,
0xc40ba98e,
0xc539bcd4,
0xcaa25a25,
0xcac65cd2,
0xcea13d87,
0xd06d10ce,
0xd45889c5,
0xd5733944,
0xd648b2d0,
0xd6611cd2,
0xd6651174,
0xd6a39cce,
0xd8149814,
0xd8d0bed2,
0xd8d3c447,
0xd8d3c590,
0xd8d7b044,
0xd8d82d97,
0xd8d9bc59,
0xd93ba98e,
0xd96bced4,
0xdc6bad84,
0xde6219a4,
0xe0064cc4,
0xe008aa74,
0xe0679814,
0xe0cb4405,
0xe1101d83,
0xe178b1a7,
0xe6c85cd2,
0xed033850,
0xee2890d4,
0x04d38584,
0x053ba996,
0x0c0ba992,
0x0dabea7f,
0x1628c0cc,
0x166020dc,
0x18db99ac,
0x18e709bc,
0x18f84c56,
0x1a07a810,
0x1a07b21e,
0x1a20b22f,
0x1a5602c8,
0x1a669cdf,
0x1a68c589,
0x1a836acb,
0x1aa6cecf,
0x1b1340cf,
0x1b315a1e,
0x220789bb,
0x27753ad6,
0x2ce70a25,
0x59484c52,
0x8e789a0b,
0x9a0bea7c,
0xa37501fd,
0xae6744dc,
0xc57b9a32,
0xcc239a29,
0xcc5159ed,
0xcd7129ea,
0xd51689dc,
0xe267b058,
0x1b78b2f0,
0x1e48b1d3,
0x2008a91f,
0x28d7b37f,
0x402683af,
0x40b137e6,
0x44e343f8,
0x4c578afb,
0x5848998f,
0x58d7aac6,
0x593cb299,
0x6008b28f,
0x606323a7,
0x60679b77,
0x6160ba37,
0x62682846,
0x6cd7b327,
0x82a69f60,
0x84763670,
0x84e79992,
0x8cf3c3fe,
0x9aa29964,
0x9e605f45,
0x9f754e90,
0xa020bffe,
0xa565474d,
0xaa68c34d,
0xae27a92c,
0xae6baafd,
0xaec9bf4c,
0xb7714778,
0xcce9c6c5,
0xccebe930,
0xee48b1b4,
0x04abc5ca,
0x04d9d031,
0x0a57c5ce,
0x0c6445cb,
0x0d0842d9,
0x0da3dee4,
0x2d09f5c8,
0x2e27d0a8,
0x2ec8e4e9,
0x8841626d,
0x8d0864ee,
0x996876bb,
0x9b07fd6d,
0x9b51512e,
0x9d0058dc,
0x9d3bc4ad,
0x9ef354dd,
0xd8566066,
0xde2d45cb,
0xde66fcfe,
0xe22275cd,
0x053703ae,
0x11271d85,
0x2706077e,
0x2d0ebfa7,
0x2e27e4e5,
0x444bd9ee,
0x5845178f,
0x5c642eea,
0x5e0a2533,
0x84070505,
0x844574ea,
0x8865a00f,
0x8868257d,
0x984690ea,
0x9c67010f,
0x9eae264d,
0xae243c5f,
0xb5351752,
0xde0b8b38,
0x18973dca,
0x81009434,
0x88ba2dbc,
0x8942ad2d,
0x89d77b5a,
0x8eba2554,
0x970a7ed3,
0x9b9e7b14,
0xa1d21ceb,
0xa1d69cc0,
0xa1d7fab7,
0xb6f6940c,
0x2c6d76e6,
0x3b705478,
0x950cec0d,
0x9b056094,
0xb687163c,
0xf6845607,
0xfa4666f0,
0x53ad92bb,
0x71f6940a,
0x8bbc6cd6,
0x1632b560,
0x561a2687,
0x5a00c22c,
0x7c4f1c15,
0x0ee8aacc,
0x2838bda9,
0x6f3c2ece,
0xf1d8d91d,
}
var loHi = [maxLen + 2]uint16{
0,
1,
8,
29,
53,
89,
121,
159,
192,
226,
247,
266,
278,
285,
288,
292,
293,
296,
} }
var oneByteAtoms = [26]Atom{ var oneByteAtoms = [26]Atom{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment