Commit 9525372e authored by Russ Cox's avatar Russ Cox

path/filepath: avoid allocation in Clean of cleaned path

Alternative to https://golang.org/cl/6330044.

Fixes #3681.

R=golang-dev, r, hanwen, iant
CC=golang-dev
https://golang.org/cl/6335056
parent 5a5e698c
...@@ -13,6 +13,43 @@ import ( ...@@ -13,6 +13,43 @@ import (
"strings" "strings"
) )
// A lazybuf is a lazily constructed path buffer.
// It supports append, reading previously appended bytes,
// and retrieving the final string. It does not allocate a buffer
// to hold the output until that output diverges from s.
type lazybuf struct {
s string
buf []byte
w int
}
func (b *lazybuf) index(i int) byte {
if b.buf != nil {
return b.buf[i]
}
return b.s[i]
}
func (b *lazybuf) append(c byte) {
if b.buf == nil {
if b.w < len(b.s) && b.s[b.w] == c {
b.w++
return
}
b.buf = make([]byte, len(b.s))
copy(b.buf, b.s[:b.w])
}
b.buf[b.w] = c
b.w++
}
func (b *lazybuf) string() string {
if b.buf == nil {
return b.s[:b.w]
}
return string(b.buf[:b.w])
}
const ( const (
Separator = os.PathSeparator Separator = os.PathSeparator
ListSeparator = os.PathListSeparator ListSeparator = os.PathListSeparator
...@@ -57,11 +94,11 @@ func Clean(path string) string { ...@@ -57,11 +94,11 @@ func Clean(path string) string {
// dotdot is index in buf where .. must stop, either because // dotdot is index in buf where .. must stop, either because
// it is the leading slash or it is a leading ../../.. prefix. // it is the leading slash or it is a leading ../../.. prefix.
n := len(path) n := len(path)
buf := []byte(path) out := lazybuf{s: path}
r, w, dotdot := 0, 0, 0 r, dotdot := 0, 0
if rooted { if rooted {
buf[0] = Separator out.append(Separator)
r, w, dotdot = 1, 1, 1 r, dotdot = 1, 1
} }
for r < n { for r < n {
...@@ -76,46 +113,40 @@ func Clean(path string) string { ...@@ -76,46 +113,40 @@ func Clean(path string) string {
// .. element: remove to last separator // .. element: remove to last separator
r += 2 r += 2
switch { switch {
case w > dotdot: case out.w > dotdot:
// can backtrack // can backtrack
w-- out.w--
for w > dotdot && !os.IsPathSeparator(buf[w]) { for out.w > dotdot && !os.IsPathSeparator(out.index(out.w)) {
w-- out.w--
} }
case !rooted: case !rooted:
// cannot backtrack, but not rooted, so append .. element. // cannot backtrack, but not rooted, so append .. element.
if w > 0 { if out.w > 0 {
buf[w] = Separator out.append(Separator)
w++
} }
buf[w] = '.' out.append('.')
w++ out.append('.')
buf[w] = '.' dotdot = out.w
w++
dotdot = w
} }
default: default:
// real path element. // real path element.
// add slash if needed // add slash if needed
if rooted && w != 1 || !rooted && w != 0 { if rooted && out.w != 1 || !rooted && out.w != 0 {
buf[w] = Separator out.append(Separator)
w++
} }
// copy element // copy element
for ; r < n && !os.IsPathSeparator(path[r]); r++ { for ; r < n && !os.IsPathSeparator(path[r]); r++ {
buf[w] = path[r] out.append(path[r])
w++
} }
} }
} }
// Turn empty string into "." // Turn empty string into "."
if w == 0 { if out.w == 0 {
buf[w] = '.' out.append('.')
w++
} }
return FromSlash(vol + string(buf[0:w])) return FromSlash(vol + out.string())
} }
// ToSlash returns the result of replacing each separator character // ToSlash returns the result of replacing each separator character
......
...@@ -99,6 +99,24 @@ func TestClean(t *testing.T) { ...@@ -99,6 +99,24 @@ func TestClean(t *testing.T) {
if s := filepath.Clean(test.path); s != test.result { if s := filepath.Clean(test.path); s != test.result {
t.Errorf("Clean(%q) = %q, want %q", test.path, s, test.result) t.Errorf("Clean(%q) = %q, want %q", test.path, s, test.result)
} }
if s := filepath.Clean(test.result); s != test.result {
t.Errorf("Clean(%q) = %q, want %q", test.result, s, test.result)
}
}
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
allocs := -ms.Mallocs
const rounds = 100
for i := 0; i < rounds; i++ {
for _, test := range tests {
filepath.Clean(test.result)
}
}
runtime.ReadMemStats(&ms)
allocs += ms.Mallocs
if allocs >= rounds {
t.Errorf("Clean cleaned paths: %d allocations per test round, want zero", allocs/rounds)
} }
} }
......
...@@ -10,6 +10,43 @@ import ( ...@@ -10,6 +10,43 @@ import (
"strings" "strings"
) )
// A lazybuf is a lazily constructed path buffer.
// It supports append, reading previously appended bytes,
// and retrieving the final string. It does not allocate a buffer
// to hold the output until that output diverges from s.
type lazybuf struct {
s string
buf []byte
w int
}
func (b *lazybuf) index(i int) byte {
if b.buf != nil {
return b.buf[i]
}
return b.s[i]
}
func (b *lazybuf) append(c byte) {
if b.buf == nil {
if b.w < len(b.s) && b.s[b.w] == c {
b.w++
return
}
b.buf = make([]byte, len(b.s))
copy(b.buf, b.s[:b.w])
}
b.buf[b.w] = c
b.w++
}
func (b *lazybuf) string() string {
if b.buf == nil {
return b.s[:b.w]
}
return string(b.buf[:b.w])
}
// Clean returns the shortest path name equivalent to path // Clean returns the shortest path name equivalent to path
// by purely lexical processing. It applies the following rules // by purely lexical processing. It applies the following rules
// iteratively until no further processing can be done: // iteratively until no further processing can be done:
...@@ -42,10 +79,11 @@ func Clean(path string) string { ...@@ -42,10 +79,11 @@ func Clean(path string) string {
// writing to buf; w is index of next byte to write. // writing to buf; w is index of next byte to write.
// dotdot is index in buf where .. must stop, either because // dotdot is index in buf where .. must stop, either because
// it is the leading slash or it is a leading ../../.. prefix. // it is the leading slash or it is a leading ../../.. prefix.
buf := []byte(path) out := lazybuf{s: path}
r, w, dotdot := 0, 0, 0 r, dotdot := 0, 0
if rooted { if rooted {
r, w, dotdot = 1, 1, 1 out.append('/')
r, dotdot = 1, 1
} }
for r < n { for r < n {
...@@ -60,46 +98,40 @@ func Clean(path string) string { ...@@ -60,46 +98,40 @@ func Clean(path string) string {
// .. element: remove to last / // .. element: remove to last /
r += 2 r += 2
switch { switch {
case w > dotdot: case out.w > dotdot:
// can backtrack // can backtrack
w-- out.w--
for w > dotdot && buf[w] != '/' { for out.w > dotdot && out.index(out.w) != '/' {
w-- out.w--
} }
case !rooted: case !rooted:
// cannot backtrack, but not rooted, so append .. element. // cannot backtrack, but not rooted, so append .. element.
if w > 0 { if out.w > 0 {
buf[w] = '/' out.append('/')
w++
} }
buf[w] = '.' out.append('.')
w++ out.append('.')
buf[w] = '.' dotdot = out.w
w++
dotdot = w
} }
default: default:
// real path element. // real path element.
// add slash if needed // add slash if needed
if rooted && w != 1 || !rooted && w != 0 { if rooted && out.w != 1 || !rooted && out.w != 0 {
buf[w] = '/' out.append('/')
w++
} }
// copy element // copy element
for ; r < n && path[r] != '/'; r++ { for ; r < n && path[r] != '/'; r++ {
buf[w] = path[r] out.append(path[r])
w++
} }
} }
} }
// Turn empty string into "." // Turn empty string into "."
if w == 0 { if out.w == 0 {
buf[w] = '.' return "."
w++
} }
return string(buf[0:w]) return out.string()
} }
// Split splits path immediately following the final slash. // Split splits path immediately following the final slash.
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
package path package path
import ( import (
"runtime"
"testing" "testing"
) )
...@@ -67,6 +68,24 @@ func TestClean(t *testing.T) { ...@@ -67,6 +68,24 @@ func TestClean(t *testing.T) {
if s := Clean(test.path); s != test.result { if s := Clean(test.path); s != test.result {
t.Errorf("Clean(%q) = %q, want %q", test.path, s, test.result) t.Errorf("Clean(%q) = %q, want %q", test.path, s, test.result)
} }
if s := Clean(test.result); s != test.result {
t.Errorf("Clean(%q) = %q, want %q", test.result, s, test.result)
}
}
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
allocs := -ms.Mallocs
const rounds = 100
for i := 0; i < rounds; i++ {
for _, test := range cleantests {
Clean(test.result)
}
}
runtime.ReadMemStats(&ms)
allocs += ms.Mallocs
if allocs >= rounds {
t.Errorf("Clean cleaned paths: %d allocations per test round, want zero", allocs/rounds)
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment