Use multiple loops rather than goroutine spawning. This allows

the request to be reused, thus reducing garbage generation. Speed-up of ~35%. Go-FUSE is now only 10% slower than libfuse.

Use multiple loops rather than goroutine spawning. This allows
the request to be reused, thus reducing garbage generation. Speed-up of ~35%. Go-FUSE is now only 10% slower than libfuse.
011e4ecc · Han-Wen Nienhuys · 99f887ce · 011e4ecc · 011e4ecc · 011e4ecc
Commit 011e4ecc authored May 21, 2012 by Han-Wen Nienhuys
Hide whitespace changes
Inline Side-by-side

Showing with 45 additions and 18 deletions

README README +3 -4

fuse/mountstate.go fuse/mountstate.go +27 -14

fuse/request.go fuse/request.go +15 -0

No files found.
--- a/README
+++ b/README
@@ -75,13 +75,12 @@ We use threaded stats over a read-only filesystem for benchmarking.
 Automated code is under benchmark/ directory. A simple C version of
 the same FS gives a FUSE baseline

-Data points (median time per stat, Go-FUSE version May 2012), 1000
-files, high level interface, all kernel caching turned off:
+Data points (Go-FUSE version May 2012), 1000 files, high level
+interface, all kernel caching turned off:

 platform                    libfuse     Go-FUSE      difference (%)

-Lenovo T60/Fedora17 (2cpu)  346us       572us        35% slower
-Lenovo T400/Lucid (2cpu)    152us       256us        68% slower
+Lenovo T60/Fedora17 (1cpu)  349us       379us        9% slower


 CREDITS

--- a/fuse/mountstate.go
+++ b/fuse/mountstate.go
@@ -4,6 +4,7 @@ import (
 	"log"
 	"os"
 	"strings"
+	"sync/atomic"
 	"time"
 	"unsafe"

@@ -35,6 +36,10 @@ type MountState struct {

 	opts           *MountOptions
 	kernelSettings raw.InitIn
+
+	// Number of loops blocked on reading; used to control amount
+	// of concurrency.
+	readers int32
 }

 func (ms *MountState) KernelSettings() raw.InitIn {
@@ -171,17 +176,23 @@ func (ms *MountState) recordStats(req *request) {
 func (ms *MountState) Loop() {
 	ms.loop()
 	ms.mountFile.Close()
-	ms.mountFile = nil
 }

+const _MAX_READERS = 10
 func (ms *MountState) loop() {
 	var dest []byte
+	var req *request
 	for {
 		if dest == nil {
 			dest = ms.buffers.AllocBuffer(uint32(ms.opts.MaxWrite + 4096))
 		}
-		
+		if atomic.AddInt32(&ms.readers, 0) > _MAX_READERS {
+			break
+		}
+
+		atomic.AddInt32(&ms.readers, 1)
 		n, err := ms.mountFile.Read(dest)
+		readers := atomic.AddInt32(&ms.readers, -1)
 		if err != nil {
 			errNo := ToStatus(err)
 		
@@ -190,8 +201,8 @@ func (ms *MountState) loop() {
 				continue
 			}

+			// Unmount.
 			if errNo == ENODEV {
-				// Unmount.
 				break
 			}

@@ -199,7 +210,13 @@ func (ms *MountState) loop() {
 			break
 		}
 		
-		req := ms.newRequest()
+		if readers <= 0 {
+			go ms.loop()
+		}
+
+		if req == nil {
+			req = ms.newRequest()
+		}
 		if ms.latencies != nil {
 			req.startNs = time.Now().UnixNano()
 		}
@@ -207,19 +224,15 @@ func (ms *MountState) loop() {
 			dest = nil
 		}

-		// When closely analyzing timings, the context switch
-		// generates some delay.  While unfortunate, the
-		// alternative is to have a fixed goroutine pool,
-		// which will lock up the FS if the daemon has too
-		// many blocking calls.
-		go func(r *request) {
-			ms.handleRequest(r)
-			r.Discard()
-		}(req)
+		ms.handleRequest(req)
+		req.clear()
 	}
-}

+	ms.buffers.FreeBuffer(dest)
+}
+	
 func (ms *MountState) handleRequest(req *request) {
+	defer req.Discard()
 	defer ms.recordStats(req)

 	req.parse()

--- a/fuse/request.go
+++ b/fuse/request.go
@@ -55,6 +55,21 @@ type request struct {
 	handler *operationHandler
 }

+func (r *request) clear() {
+	r.bufferPoolInputBuf = nil
+	r.inputBuf = nil
+	r.inHeader = nil
+	r.inData = nil
+	r.arg = nil
+	r.filenames = nil
+	r.outData = nil
+	r.status = OK
+	r.flatData = nil
+	r.preWriteNs = 0
+	r.startNs = 0
+	r.handler = nil
+}
+
 func (r *request) InputDebug() string {
 	val := " "
 	if r.handler.DecodeIn != nil {