Commit e0a0b09a authored by Han-Wen Nienhuys's avatar Han-Wen Nienhuys

fs: support passthrough mode

If a returned file implements the FilePassthroughFder interface, we
try to register the file in the kernel.

Implement this for loopbackFile, and test the behavior.

For benchmarking, use a single reader. With multiple readers, contents
are served out of kernel cache, and do not reflect FUSE performance. 

Benchmark (CPU i5-8350U pinned at 2Ghz):

$ go build -v && go test -run "abc" -bench '(Libfuse|FD)' --passthrough_hp ~/vc/libfuse/build/example/passthrough_hp -test.cpu=1
BenchmarkGoFuseFDRead 	   27444	     45997 ns/op	1424.80 MB/s	      87 B/op	       1 allocs/op
BenchmarkLibfuseHP    	   35377	     32198 ns/op	2035.43 MB/s	       0 B/op	       0 allocs/op

$ go build -v && sudo go test -run "abc" -bench '(Libfuse|FD)' --passthrough_hp ~/vc/libfuse/build/example/passthrough_hp -test.cpu=1
BenchmarkGoFuseFDRead 	   91788	     11902 ns/op	5506.23 MB/s	       3 B/op	       0 allocs/op
BenchmarkLibfuseHP    	  100556	     11831 ns/op	5539.38 MB/s	       0 B/op	       0 allocs/op

Change-Id: If8bde502a3450028f4d87ba61fa9c76ea3ea6c63
parent ff286a50
...@@ -17,7 +17,7 @@ GO_TEST="go test -timeout 5m -p 1 -count 1" ...@@ -17,7 +17,7 @@ GO_TEST="go test -timeout 5m -p 1 -count 1"
# Run all tests as current user # Run all tests as current user
$GO_TEST ./... $GO_TEST ./...
# Direct-mount tests need to run as root # Direct-mount tests need to run as root
sudo env PATH=$PATH $GO_TEST -run TestDirectMount ./fs ./fuse sudo env PATH=$PATH $GO_TEST -run 'Test(DirectMount|Passthrough)' ./fs ./fuse
make -C benchmark make -C benchmark
go test ./benchmark -test.bench '.*' -test.cpu 1,2 go test ./benchmark -test.bench '.*' -test.cpu 1,2
...@@ -87,7 +87,7 @@ func BenchmarkGoFuseFDRead(b *testing.B) { ...@@ -87,7 +87,7 @@ func BenchmarkGoFuseFDRead(b *testing.B) {
b.Fatal(err) b.Fatal(err)
} }
mnt := setupFS(root, b.N, b) mnt := setupFS(root, b.N, b)
benchmarkRead(mnt, b, 32, "") benchmarkRead(mnt, b, 1, "")
} }
var libfusePath = flag.String("passthrough_hp", "", "path to libfuse's passthrough_hp") var libfusePath = flag.String("passthrough_hp", "", "path to libfuse's passthrough_hp")
...@@ -130,5 +130,5 @@ func BenchmarkLibfuseHP(b *testing.B) { ...@@ -130,5 +130,5 @@ func BenchmarkLibfuseHP(b *testing.B) {
} }
} }
benchmarkRead(mnt, b, 32, "") benchmarkRead(mnt, b, 1, "")
} }
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
// nodes of the file system tree. // nodes of the file system tree.
// //
// type myNode struct { // type myNode struct {
// fs.Inode // fs.Inode
// } // }
// //
// // Node types must be InodeEmbedders // // Node types must be InodeEmbedders
...@@ -20,10 +20,10 @@ ...@@ -20,10 +20,10 @@
// var _ = (fs.NodeLookuper)((*myNode)(nil)) // var _ = (fs.NodeLookuper)((*myNode)(nil))
// //
// func (n *myNode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*Inode, syscall.Errno) { // func (n *myNode) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*Inode, syscall.Errno) {
// ops := myNode{} // ops := myNode{}
// out.Mode = 0755 // out.Mode = 0755
// out.Size = 42 // out.Size = 42
// return n.NewInode(ctx, &ops, fs.StableAttr{Mode: syscall.S_IFREG}), 0 // return n.NewInode(ctx, &ops, fs.StableAttr{Mode: syscall.S_IFREG}), 0
// } // }
// //
// The method names are inspired on the system call names, so we have // The method names are inspired on the system call names, so we have
...@@ -568,6 +568,17 @@ type NodeRenamer interface { ...@@ -568,6 +568,17 @@ type NodeRenamer interface {
type FileHandle interface { type FileHandle interface {
} }
// FilePassthroughFder is a file backed by a physical
// file. PassthroughFd should return an open file descriptor (and
// true), and the kernel will execute read/write operations directly
// on the backing file, bypassing the FUSE process. This function will
// be called once when processing the Create or Open operation, so
// there is no concern about concurrent access to the Fd. If the
// function returns false, passthrough will not be used for this file.
type FilePassthroughFder interface {
PassthroughFd() (int, bool)
}
// See NodeReleaser. // See NodeReleaser.
type FileReleaser interface { type FileReleaser interface {
Release(ctx context.Context) syscall.Errno Release(ctx context.Context) syscall.Errno
......
...@@ -63,6 +63,12 @@ type ServerCallbacks interface { ...@@ -63,6 +63,12 @@ type ServerCallbacks interface {
InodeNotifyStoreCache(node uint64, offset int64, data []byte) fuse.Status InodeNotifyStoreCache(node uint64, offset int64, data []byte) fuse.Status
} }
// TODO: fold serverBackingFdCallbacks into ServerCallbacks and bump API version
type serverBackingFdCallbacks interface {
RegisterBackingFd(*fuse.BackingMap) (int32, syscall.Errno)
UnregisterBackingFd(id int32) syscall.Errno
}
type rawBridge struct { type rawBridge struct {
options Options options Options
root *Inode root *Inode
...@@ -98,8 +104,13 @@ type rawBridge struct { ...@@ -98,8 +104,13 @@ type rawBridge struct {
// estimate for stableAttrs. // estimate for stableAttrs.
nodeCountHigh int nodeCountHigh int
files []*fileEntry files []*fileEntry
// indices of files that are not allocated.
freeFiles []uint32 freeFiles []uint32
// If set, don't try to register backing file for Create/Open calls.
disableBackingFiles bool
} }
// newInode creates creates new inode pointing to ops. // newInode creates creates new inode pointing to ops.
...@@ -480,10 +491,10 @@ func (b *rawBridge) Create(cancel <-chan struct{}, input *fuse.CreateIn, name st ...@@ -480,10 +491,10 @@ func (b *rawBridge) Create(cancel <-chan struct{}, input *fuse.CreateIn, name st
} }
child, fe := b.addNewChild(parent, name, child, f, input.Flags|syscall.O_CREAT|syscall.O_EXCL, &out.EntryOut) child, fe := b.addNewChild(parent, name, child, f, input.Flags|syscall.O_CREAT|syscall.O_EXCL, &out.EntryOut)
out.Fh = uint64(fe.fh) out.Fh = uint64(fe.fh)
out.OpenFlags = flags out.OpenFlags = flags
b.addBackingID(child, f, &out.OpenOut)
child.setEntryOut(&out.EntryOut) child.setEntryOut(&out.EntryOut)
b.setEntryOutTimeout(&out.EntryOut) b.setEntryOutTimeout(&out.EntryOut)
return fuse.OK return fuse.OK
...@@ -736,20 +747,82 @@ func (b *rawBridge) Open(cancel <-chan struct{}, input *fuse.OpenIn, out *fuse.O ...@@ -736,20 +747,82 @@ func (b *rawBridge) Open(cancel <-chan struct{}, input *fuse.OpenIn, out *fuse.O
if errno != 0 { if errno != 0 {
return errnoToStatus(errno) return errnoToStatus(errno)
} }
out.OpenFlags = flags
if f != nil { if f != nil {
b.mu.Lock() b.mu.Lock()
defer b.mu.Unlock() defer b.mu.Unlock()
fe := b.registerFile(n, f, input.Flags) fe := b.registerFile(n, f, input.Flags)
out.Fh = uint64(fe.fh) out.Fh = uint64(fe.fh)
b.addBackingID(n, f, out)
} }
out.OpenFlags = flags
return fuse.OK return fuse.OK
} }
return fuse.ENOTSUP return fuse.ENOTSUP
} }
// must hold bridge.mu
func (b *rawBridge) addBackingID(n *Inode, f FileHandle, out *fuse.OpenOut) {
if b.disableBackingFiles {
return
}
bc, ok := b.server.(serverBackingFdCallbacks)
if !ok {
b.disableBackingFiles = true
return
}
pth, ok := f.(FilePassthroughFder)
if !ok {
return
}
if n.backingID == 0 {
fd, ok := pth.PassthroughFd()
if !ok {
return
}
m := fuse.BackingMap{
Fd: int32(fd),
}
id, errno := bc.RegisterBackingFd(&m)
if errno != 0 {
// This happens if we're not root or CAP_PASSTHROUGH is missing.
b.disableBackingFiles = true
} else {
n.backingID = id
}
}
if n.backingID != 0 {
out.BackingID = n.backingID
out.OpenFlags |= fuse.FOPEN_PASSTHROUGH
out.OpenFlags &= ^uint32(fuse.FOPEN_KEEP_CACHE)
n.backingIDRefcount++
}
}
// must hold bridge.mu
func (b *rawBridge) releaseBackingIDRef(n *Inode) {
if n.backingID == 0 {
return
}
n.backingIDRefcount--
if n.backingIDRefcount == 0 {
errno := b.server.(serverBackingFdCallbacks).UnregisterBackingFd(n.backingID)
if errno != 0 {
b.logf("UnregisterBackingFd: %v", errno)
}
n.backingID = 0
n.backingIDRefcount = 0
} else if n.backingIDRefcount < 0 {
log.Panic("backingIDRefcount underflow")
}
}
// registerFile hands out a file handle. Must have bridge.mu // registerFile hands out a file handle. Must have bridge.mu
func (b *rawBridge) registerFile(n *Inode, f FileHandle, flags uint32) *fileEntry { func (b *rawBridge) registerFile(n *Inode, f FileHandle, flags uint32) *fileEntry {
fe := &fileEntry{} fe := &fileEntry{}
...@@ -766,6 +839,7 @@ func (b *rawBridge) registerFile(n *Inode, f FileHandle, flags uint32) *fileEntr ...@@ -766,6 +839,7 @@ func (b *rawBridge) registerFile(n *Inode, f FileHandle, flags uint32) *fileEntr
fe.nodeIndex = len(n.openFiles) fe.nodeIndex = len(n.openFiles)
fe.file = f fe.file = f
n.openFiles = append(n.openFiles, fe.fh) n.openFiles = append(n.openFiles, fe.fh)
return fe return fe
} }
...@@ -838,11 +912,13 @@ func (b *rawBridge) Release(cancel <-chan struct{}, input *fuse.ReleaseIn) { ...@@ -838,11 +912,13 @@ func (b *rawBridge) Release(cancel <-chan struct{}, input *fuse.ReleaseIn) {
b.mu.Lock() b.mu.Lock()
defer b.mu.Unlock() defer b.mu.Unlock()
b.releaseBackingIDRef(n)
b.freeFiles = append(b.freeFiles, uint32(input.Fh)) b.freeFiles = append(b.freeFiles, uint32(input.Fh))
} }
func (b *rawBridge) ReleaseDir(input *fuse.ReleaseIn) { func (b *rawBridge) ReleaseDir(input *fuse.ReleaseIn) {
_, f := b.releaseFileEntry(input.NodeId, input.Fh) n, f := b.releaseFileEntry(input.NodeId, input.Fh)
f.wg.Wait() f.wg.Wait()
f.mu.Lock() f.mu.Lock()
...@@ -854,6 +930,7 @@ func (b *rawBridge) ReleaseDir(input *fuse.ReleaseIn) { ...@@ -854,6 +930,7 @@ func (b *rawBridge) ReleaseDir(input *fuse.ReleaseIn) {
b.mu.Lock() b.mu.Lock()
defer b.mu.Unlock() defer b.mu.Unlock()
b.releaseBackingIDRef(n)
b.freeFiles = append(b.freeFiles, uint32(input.Fh)) b.freeFiles = append(b.freeFiles, uint32(input.Fh))
} }
......
...@@ -40,6 +40,14 @@ var _ = (FileFlusher)((*loopbackFile)(nil)) ...@@ -40,6 +40,14 @@ var _ = (FileFlusher)((*loopbackFile)(nil))
var _ = (FileFsyncer)((*loopbackFile)(nil)) var _ = (FileFsyncer)((*loopbackFile)(nil))
var _ = (FileSetattrer)((*loopbackFile)(nil)) var _ = (FileSetattrer)((*loopbackFile)(nil))
var _ = (FileAllocater)((*loopbackFile)(nil)) var _ = (FileAllocater)((*loopbackFile)(nil))
var _ = (FilePassthroughFder)((*loopbackFile)(nil))
func (f *loopbackFile) PassthroughFd() (int, bool) {
// This Fd is not accessed concurrently, but lock anyway for uniformity.
f.mu.Lock()
defer f.mu.Unlock()
return f.fd, true
}
func (f *loopbackFile) Read(ctx context.Context, buf []byte, off int64) (res fuse.ReadResult, errno syscall.Errno) { func (f *loopbackFile) Read(ctx context.Context, buf []byte, off int64) (res fuse.ReadResult, errno syscall.Errno) {
f.mu.Lock() f.mu.Lock()
......
...@@ -69,6 +69,11 @@ type Inode struct { ...@@ -69,6 +69,11 @@ type Inode struct {
// protected by bridge.mu // protected by bridge.mu
openFiles []uint32 openFiles []uint32
// backing files, protected by bridge.mu
backingIDRefcount int
backingID int32
backingFd int
// mu protects the following mutable fields. When locking // mu protects the following mutable fields. When locking
// multiple Inodes, locks must be acquired using // multiple Inodes, locks must be acquired using
// lockNodes/unlockNodes // lockNodes/unlockNodes
......
// Copyright 2024 the Go-FUSE Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package fs
import (
"context"
"io"
"os"
"sync"
"syscall"
"testing"
"github.com/hanwen/go-fuse/v2/fuse"
"github.com/hanwen/go-fuse/v2/internal/testutil"
)
type rwRegisteringNode struct {
LoopbackNode
mu sync.Mutex
reads int
writes int
}
func (n *rwRegisteringNode) Read(ctx context.Context, f FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) {
n.mu.Lock()
defer n.mu.Unlock()
n.reads++
return f.(FileReader).Read(ctx, dest, off)
}
func (n *rwRegisteringNode) Write(ctx context.Context, f FileHandle, data []byte, off int64) (written uint32, errno syscall.Errno) {
n.mu.Lock()
defer n.mu.Unlock()
n.writes++
return f.(FileWriter).Write(ctx, data, off)
}
func TestPassthrough(t *testing.T) {
if os.Geteuid() != 0 {
t.Skip("passthrough requires CAP_SYS_ADMIN")
}
mnt := t.TempDir()
n := &rwRegisteringNode{}
rootData := &LoopbackRoot{
Path: t.TempDir(),
NewNode: func(rootData *LoopbackRoot, parent *Inode, name string, st *syscall.Stat_t) InodeEmbedder {
return n
},
}
n.RootData = rootData
root := &LoopbackNode{
RootData: rootData,
}
opts := &Options{}
opts.Debug = testutil.VerboseTest()
server, err := Mount(mnt, root, opts)
if err != nil {
t.Fatal(err)
}
defer server.Unmount()
fn := mnt + "/file"
want := "hello there"
if err := os.WriteFile(fn, []byte(want), 0666); err != nil {
t.Fatalf("WriteFile: %v", err)
}
f, err := os.Open(fn)
if err != nil {
t.Fatalf("Open: %v", err)
}
defer f.Close()
got, err := io.ReadAll(f)
if err != nil {
t.Fatalf("Open: %v", err)
}
if want != string(got) {
t.Errorf("got %q want %q", got, want)
}
want2 := "xxxx"
if err := os.WriteFile(fn, []byte(want2), 0666); err != nil {
t.Fatalf("WriteFile: %v", err)
}
got2, err := os.ReadFile(fn)
if err != nil {
t.Fatalf("ReadFile: %v", err)
}
if string(got2) != want2 {
t.Errorf("got %q want %q", got2, want2)
}
f.Close()
server.Unmount()
if n.reads > 0 {
t.Errorf("got readcount %d want 0", n.reads)
}
if n.writes > 0 {
t.Errorf("got writecount %d want 0", n.writes)
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment