Commit 624393db authored by Kirill Smelkov's avatar Kirill Smelkov

Hook in git2go (cgo bindings to libgit2)

Currently for every file -> blob, and blob -> file we invoke git
subprocess (cat-file or hash-object). We also invoke git subprocess for
every tag read/write and the same for commits and this 1-subprocess per
1 object has very high overhead.

The ways to avoid such overhead could be:

1) for every kind of operation spawn git service process, like e.g.
   `git cat-file --batch` for reading files, and only do request/reply
   per object with it.

2) use some go library to work with git repository ourselves.

"1" can work but:

    - at present there is no counterpart of `cat-file --batch` for
      e.g. `hash-object` - i.e. we cannot write objects without quirks
      or patching git.

    - even if we add support for hashing via request/reply, as all
      requests are processed sequentially on git side by e.g. `git
      cat-file --batch`, we won't be able to leverage parallelism.

    - request/reply has also latency attached.

For "2" we have roughly the following choices:

    - use cgo bindings to libgit2   (git2go)

    - use some pure-go git library

Pure-go approach has pros that it by design avoids problems related to
tricky CGo pointer C <-> Go passing rules. The fact that this was sorted
out by go team itself only during 1.6 cycle

    https://github.com/golang/go/issues/12416

tells a lot. The net is full of examples where those were hard to get,
and git2go in particular has a story of e.g. heap corruption (the bug
was on golang itself side and fixed only for 1.5)

    https://github.com/libgit2/git2go/issues/223
    https://groups.google.com/forum/#!topic/golang-nuts/Vi1HD-54BTA/discussion

However there is no good (to my knowledge) pure-go git library, and the
family of forks around github.com/speedata/gogit either:

    - works 3x slower compared to git2go

      ( or the same 3x in serial mode compared to e.g. `git cat-file --batch`
        as in serial mode git subservice and git2go has roughly similar performance )

    - or does not work at all (e.g. barfing out on REF_DELTA pack
      entries, etc)

So because of 3x slowdown, pure-go way is currently a no-runner.

Since one person from golang team cared to update git2go to properly
follow the CGo rules

    https://github.com/libgit2/git2go/pull/282

we can be relatively confident about git2go bindings quality and try to
use it.

This commit only hooks git2go into the build, subcommands and to Sha1
for to/from Oid conversion. We'll be switching places to git2go
incrementally in upcoming patches.

NOTE for now we need git2go from next branch for

    https://github.com/libgit2/git2go/commit/cf7553e7

The plan is to eventually switch to

    gopkg.in/libgit2/git2go.v25

once it is out.
parent fdaa4a19
...@@ -71,6 +71,8 @@ import ( ...@@ -71,6 +71,8 @@ import (
"strings" "strings"
"syscall" "syscall"
"time" "time"
git "github.com/libgit2/git2go"
) )
// verbose output // verbose output
...@@ -354,7 +356,7 @@ type PullSpec struct { ...@@ -354,7 +356,7 @@ type PullSpec struct {
dir, prefix string dir, prefix string
} }
func cmd_pull(argv []string) { func cmd_pull(gb *git.Repository, argv []string) {
flags := flag.FlagSet{Usage: cmd_pull_usage} flags := flag.FlagSet{Usage: cmd_pull_usage}
flags.Init("", flag.ExitOnError) flags.Init("", flag.ExitOnError)
flags.Parse(argv) flags.Parse(argv)
...@@ -377,7 +379,7 @@ func cmd_pull(argv []string) { ...@@ -377,7 +379,7 @@ func cmd_pull(argv []string) {
pullspecv = append(pullspecv, PullSpec{dir, prefix}) pullspecv = append(pullspecv, PullSpec{dir, prefix})
} }
cmd_pull_(pullspecv) cmd_pull_(gb, pullspecv)
} }
// info about ref pointing to sha1 // info about ref pointing to sha1
...@@ -386,7 +388,7 @@ type Ref struct { ...@@ -386,7 +388,7 @@ type Ref struct {
sha1 Sha1 sha1 Sha1
} }
func cmd_pull_(pullspecv []PullSpec) { func cmd_pull_(gb *git.Repository, pullspecv []PullSpec) {
// while pulling, we'll keep refs from all pulled repositories under temp // while pulling, we'll keep refs from all pulled repositories under temp
// unique work refs namespace. // unique work refs namespace.
backup_time := time.Now().Format("20060102-1504") // %Y%m%d-%H%M backup_time := time.Now().Format("20060102-1504") // %Y%m%d-%H%M
...@@ -601,7 +603,7 @@ type RestoreSpec struct { ...@@ -601,7 +603,7 @@ type RestoreSpec struct {
prefix, dir string prefix, dir string
} }
func cmd_restore(argv []string) { func cmd_restore(gb *git.Repository, argv []string) {
flags := flag.FlagSet{Usage: cmd_restore_usage} flags := flag.FlagSet{Usage: cmd_restore_usage}
flags.Init("", flag.ExitOnError) flags.Init("", flag.ExitOnError)
flags.Parse(argv) flags.Parse(argv)
...@@ -626,7 +628,7 @@ func cmd_restore(argv []string) { ...@@ -626,7 +628,7 @@ func cmd_restore(argv []string) {
restorespecv = append(restorespecv, RestoreSpec{prefix, dir}) restorespecv = append(restorespecv, RestoreSpec{prefix, dir})
} }
cmd_restore_(HEAD, restorespecv) cmd_restore_(gb, HEAD, restorespecv)
} }
// kirr/wendelin.core.git/heads/master -> kirr/wendelin.core.git, heads/master // kirr/wendelin.core.git/heads/master -> kirr/wendelin.core.git, heads/master
...@@ -714,7 +716,7 @@ func (br ByRepoPath) Search(prefix string) int { ...@@ -714,7 +716,7 @@ func (br ByRepoPath) Search(prefix string) int {
}) })
} }
func cmd_restore_(HEAD_ string, restorespecv []RestoreSpec) { func cmd_restore_(gb *git.Repository, HEAD_ string, restorespecv []RestoreSpec) {
HEAD := xgitSha1("rev-parse", "--verify", HEAD_) HEAD := xgitSha1("rev-parse", "--verify", HEAD_)
// read backup refs index // read backup refs index
...@@ -878,7 +880,7 @@ func cmd_restore_(HEAD_ string, restorespecv []RestoreSpec) { ...@@ -878,7 +880,7 @@ func cmd_restore_(HEAD_ string, restorespecv []RestoreSpec) {
} }
} }
var commands = map[string]func([]string){ var commands = map[string]func(*git.Repository, []string){
"pull": cmd_pull, "pull": cmd_pull,
"restore": cmd_restore, "restore": cmd_restore,
} }
...@@ -933,5 +935,9 @@ func main() { ...@@ -933,5 +935,9 @@ func main() {
os.Exit(1) os.Exit(1)
}) })
cmd(argv[1:]) // backup repository
gb, err := git.OpenRepository(".")
raiseif(err)
cmd(gb, argv[1:])
} }
...@@ -21,6 +21,8 @@ import ( ...@@ -21,6 +21,8 @@ import (
"strings" "strings"
"syscall" "syscall"
"testing" "testing"
git "github.com/libgit2/git2go"
) )
func xgetcwd(t *testing.T) string { func xgetcwd(t *testing.T) string {
...@@ -82,10 +84,14 @@ func TestPullRestore(t *testing.T) { ...@@ -82,10 +84,14 @@ func TestPullRestore(t *testing.T) {
// init backup repository // init backup repository
xgit("init", "--bare", "backup.git") xgit("init", "--bare", "backup.git")
xchdir(t, "backup.git") xchdir(t, "backup.git")
gb, err := git.OpenRepository(".")
if err != nil {
t.Fatal(err)
}
// pull from testdata // pull from testdata
my1 := mydir + "/testdata/1" my1 := mydir + "/testdata/1"
cmd_pull([]string{my1+":b1"}) cmd_pull(gb, []string{my1+":b1"})
// prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits) // prune all non-reachable objects (e.g. tags just pulled - they were encoded as commits)
xgit("prune") xgit("prune")
...@@ -107,9 +113,16 @@ func TestPullRestore(t *testing.T) { ...@@ -107,9 +113,16 @@ func TestPullRestore(t *testing.T) {
} }
} }
// reopen backup repository - to avoid having stale cache with present
// objects we deleted above with `git prune`
gb, err = git.OpenRepository(".")
if err != nil {
t.Fatal(err)
}
// restore backup // restore backup
work1 := workdir + "/1" work1 := workdir + "/1"
cmd_restore([]string{"HEAD", "b1:"+work1}) cmd_restore(gb, []string{"HEAD", "b1:"+work1})
// verify files restored to the same as original // verify files restored to the same as original
gerr, diff, _ := ggit("diff", "--no-index", "--raw", "--exit-code", my1, work1) gerr, diff, _ := ggit("diff", "--no-index", "--raw", "--exit-code", my1, work1)
...@@ -181,7 +194,7 @@ func TestPullRestore(t *testing.T) { ...@@ -181,7 +194,7 @@ func TestPullRestore(t *testing.T) {
defer errcatch(func(e *Error) { defer errcatch(func(e *Error) {
// it ok - pull should raise // it ok - pull should raise
}) })
cmd_pull([]string{my2+":b2"}) cmd_pull(gb, []string{my2+":b2"})
t.Fatal("fetching from corrupt.git did not complain") t.Fatal("fetching from corrupt.git did not complain")
}() }()
} }
......
...@@ -17,6 +17,8 @@ import ( ...@@ -17,6 +17,8 @@ import (
"bytes" "bytes"
"encoding/hex" "encoding/hex"
"fmt" "fmt"
git "github.com/libgit2/git2go"
) )
const SHA1_RAWSIZE = 20 const SHA1_RAWSIZE = 20
...@@ -81,3 +83,12 @@ type BySha1 []Sha1 ...@@ -81,3 +83,12 @@ type BySha1 []Sha1
func (p BySha1) Len() int { return len(p) } func (p BySha1) Len() int { return len(p) }
func (p BySha1) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p BySha1) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p BySha1) Less(i, j int) bool { return bytes.Compare(p[i].sha1[:], p[j].sha1[:]) < 0 } func (p BySha1) Less(i, j int) bool { return bytes.Compare(p[i].sha1[:], p[j].sha1[:]) < 0 }
// interoperability with git2go
func (sha1 *Sha1) AsOid() *git.Oid {
return (*git.Oid)(&sha1.sha1)
}
func Sha1FromOid(oid *git.Oid) Sha1 {
return Sha1{*oid}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment