Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neo
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Kirill Smelkov
neo
Commits
529c4666
Commit
529c4666
authored
Aug 28, 2017
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
6e05fb59
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
79 additions
and
49 deletions
+79
-49
go/neo/nodetab.go
go/neo/nodetab.go
+34
-28
go/neo/server/master.go
go/neo/server/master.go
+14
-12
go/neo/server/server.go
go/neo/server/server.go
+2
-2
go/neo/server/storage.go
go/neo/server/storage.go
+8
-7
go/neo/server/util.go
go/neo/server/util.go
+3
-0
go/neo/util.go
go/neo/util.go
+18
-0
No files found.
go/neo/nodetab.go
View file @
529c4666
...
...
@@ -173,9 +173,32 @@ func (p *Peer) Connect(ctx context.Context) (*NodeLink, error) {
}
}
link
,
err
:=
p
.
dial
(
ctx
)
dialT
=
time
.
Now
()
return
link
,
err
}()
p
.
linkMu
.
Lock
()
p
.
link
=
link
p
.
dialT
=
dialT
p
.
dialing
=
nil
p
.
linkMu
.
Unlock
()
dialing
.
link
=
link
dialing
.
err
=
err
close
(
dialing
.
ready
)
}()
<-
dialing
.
ready
return
dialing
.
link
,
dialing
.
err
}
// XXX dial does low-level work to dial peer
// XXX p.* reading without lock - ok?
func
(
p
*
Peer
)
dial
(
ctx
context
.
Context
)
(
*
NodeLink
,
error
)
{
var
me
*
NodeCommon
// XXX temp stub
conn0
,
accept
,
err
:=
me
.
Dial
(
ctx
,
p
.
Type
,
p
.
Addr
.
String
())
dialT
=
time
.
Now
()
if
err
!=
nil
{
return
nil
,
err
}
...
...
@@ -197,27 +220,12 @@ func (p *Peer) Connect(ctx context.Context) (*NodeLink, error) {
}
if
err
!=
nil
{
//log.Iferr
(ctx, link.Close())
//log.Errorif
(ctx, link.Close())
lclose
(
ctx
,
link
)
link
=
nil
}
return
link
,
err
}()
p
.
linkMu
.
Lock
()
p
.
link
=
link
p
.
dialT
=
dialT
p
.
dialing
=
nil
p
.
linkMu
.
Unlock
()
dialing
.
link
=
link
dialing
.
err
=
err
close
(
dialing
.
ready
)
}()
<-
dialing
.
ready
return
dialing
.
link
,
dialing
.
err
}
...
...
@@ -226,8 +234,6 @@ func (p *Peer) Connect(ctx context.Context) (*NodeLink, error) {
//trace:event traceNodeChanged(nt *NodeTable, n *Node)
// Node represents a node entry in NodeTable
...
...
go/neo/server/master.go
View file @
529c4666
...
...
@@ -33,6 +33,7 @@ import (
"lab.nexedi.com/kirr/neo/go/neo"
"lab.nexedi.com/kirr/neo/go/zodb"
"lab.nexedi.com/kirr/neo/go/xcommon/log"
"lab.nexedi.com/kirr/neo/go/xcommon/task"
"lab.nexedi.com/kirr/neo/go/xcommon/xcontext"
"lab.nexedi.com/kirr/neo/go/xcommon/xnet"
...
...
@@ -159,7 +160,7 @@ func (m *Master) Run(ctx context.Context) (err error) {
return
err
// XXX err ctx
}
defer
r
unningf
(
&
ctx
,
"master(%v)"
,
l
.
Addr
())(
&
err
)
defer
task
.
R
unningf
(
&
ctx
,
"master(%v)"
,
l
.
Addr
())(
&
err
)
m
.
node
.
MasterAddr
=
l
.
Addr
()
.
String
()
naddr
,
err
:=
neo
.
Addr
(
l
.
Addr
())
...
...
@@ -221,7 +222,7 @@ func (m *Master) Run(ctx context.Context) (err error) {
// runMain is the process which implements main master cluster management logic: node tracking, cluster
// state updates, scheduling data movement between storage nodes etc
func
(
m
*
Master
)
runMain
(
ctx
context
.
Context
)
(
err
error
)
{
defer
r
unning
(
&
ctx
,
"main"
)(
&
err
)
defer
task
.
R
unning
(
&
ctx
,
"main"
)(
&
err
)
// NOTE Run's goroutine is the only mutator of nodeTab, partTab and other cluster state
...
...
@@ -286,7 +287,7 @@ type storRecovery struct {
// - nil: recovery was ok and a command came for cluster to start
// - !nil: recovery was cancelled
func
(
m
*
Master
)
recovery
(
ctx
context
.
Context
)
(
err
error
)
{
defer
r
unning
(
&
ctx
,
"recovery"
)(
&
err
)
defer
task
.
R
unning
(
&
ctx
,
"recovery"
)(
&
err
)
m
.
setClusterState
(
neo
.
ClusterRecovering
)
ctx
,
rcancel
:=
context
.
WithCancel
(
ctx
)
...
...
@@ -497,7 +498,7 @@ func storCtlRecovery(ctx context.Context, stor *neo.Node, res chan storRecovery)
// on error provide feedback to storRecovery chan
res
<-
storRecovery
{
stor
:
stor
,
err
:
err
}
}()
defer
r
unningf
(
&
ctx
,
"%s: stor recovery"
,
stor
.
Link
.
RemoteAddr
())(
&
err
)
defer
task
.
R
unningf
(
&
ctx
,
"%s: stor recovery"
,
stor
.
Link
.
RemoteAddr
())(
&
err
)
conn
:=
stor
.
Conn
// conn, err := stor.Link.NewConn()
...
...
@@ -553,7 +554,7 @@ var errClusterDegraded = stderrors.New("cluster became non-operatonal")
//
// prerequisite for start: .partTab is operational wrt .nodeTab
func
(
m
*
Master
)
verify
(
ctx
context
.
Context
)
(
err
error
)
{
defer
r
unning
(
&
ctx
,
"verify"
)(
&
err
)
defer
task
.
R
unning
(
&
ctx
,
"verify"
)(
&
err
)
m
.
setClusterState
(
neo
.
ClusterVerifying
)
ctx
,
vcancel
:=
context
.
WithCancel
(
ctx
)
...
...
@@ -714,7 +715,7 @@ func storCtlVerify(ctx context.Context, stor *neo.Node, pt *neo.PartitionTable,
res
<-
storVerify
{
stor
:
stor
,
err
:
err
}
}
}()
defer
r
unningf
(
&
ctx
,
"%s: stor verify"
,
stor
.
Link
)(
&
err
)
defer
task
.
R
unningf
(
&
ctx
,
"%s: stor verify"
,
stor
.
Link
)(
&
err
)
conn
:=
stor
.
Conn
...
...
@@ -772,7 +773,7 @@ type serviceDone struct {
//
// prerequisite for start: .partTab is operational wrt .nodeTab and verification passed
func
(
m
*
Master
)
service
(
ctx
context
.
Context
)
(
err
error
)
{
defer
r
unning
(
&
ctx
,
"service"
)(
&
err
)
defer
task
.
R
unning
(
&
ctx
,
"service"
)(
&
err
)
m
.
setClusterState
(
neo
.
ClusterRunning
)
ctx
,
cancel
:=
context
.
WithCancel
(
ctx
)
...
...
@@ -827,6 +828,7 @@ loop:
case
d
:=
<-
serviced
:
// TODO if S goes away -> check partTab still operational -> if not - recovery
_
=
d
// XXX who sends here?
case
n
:=
<-
m
.
nodeLeave
:
...
...
@@ -866,7 +868,7 @@ func storCtlService(ctx context.Context, stor *neo.Node, done chan serviceDone)
}
func
storCtlService1
(
ctx
context
.
Context
,
stor
*
neo
.
Node
)
(
err
error
)
{
defer
r
unningf
(
&
ctx
,
"%s: stor service"
,
stor
.
Link
.
RemoteAddr
())(
&
err
)
defer
task
.
R
unningf
(
&
ctx
,
"%s: stor service"
,
stor
.
Link
.
RemoteAddr
())(
&
err
)
conn
:=
stor
.
Conn
...
...
@@ -906,7 +908,7 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp
// - NodeType valid
// - IdTimestamp ?
uuid
:=
n
.
idReq
.
Node
UUID
uuid
:=
n
.
idReq
.
UUID
nodeType
:=
n
.
idReq
.
NodeType
err
:=
func
()
*
neo
.
Error
{
...
...
@@ -939,7 +941,7 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp
return
nil
}()
subj
:=
fmt
.
Sprintf
(
"identify: %s (%s)"
,
n
.
conn
.
Link
()
.
RemoteAddr
(),
n
.
idReq
.
Node
UUID
)
subj
:=
fmt
.
Sprintf
(
"identify: %s (%s)"
,
n
.
conn
.
Link
()
.
RemoteAddr
(),
n
.
idReq
.
UUID
)
if
err
!=
nil
{
log
.
Infof
(
ctx
,
"%s: rejecting: %s"
,
subj
,
err
)
return
nil
,
err
...
...
@@ -949,10 +951,10 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp
accept
:=
&
neo
.
AcceptIdentification
{
NodeType
:
neo
.
MASTER
,
My
NodeUUID
:
m
.
node
.
MyInfo
.
UUID
,
My
UUID
:
m
.
node
.
MyInfo
.
UUID
,
NumPartitions
:
1
,
// FIXME hardcoded
NumReplicas
:
1
,
// FIXME hardcoded
Your
Node
UUID
:
uuid
,
YourUUID
:
uuid
,
}
// update nodeTab
...
...
go/neo/server/server.go
View file @
529c4666
...
...
@@ -118,10 +118,10 @@ func IdentifyPeer(link *neo.NodeLink, myNodeType neo.NodeType) (nodeInfo neo.Req
err
=
conn
.
Send
(
&
neo
.
AcceptIdentification
{
NodeType
:
myNodeType
,
My
NodeUUID
:
0
,
// XXX
My
UUID
:
0
,
// XXX
NumPartitions
:
1
,
// XXX
NumReplicas
:
1
,
// XXX
Your
NodeUUID
:
req
.
Node
UUID
,
Your
UUID
:
req
.
UUID
,
})
if
err
!=
nil
{
...
...
go/neo/server/storage.go
View file @
529c4666
...
...
@@ -29,6 +29,7 @@ import (
"lab.nexedi.com/kirr/neo/go/neo"
"lab.nexedi.com/kirr/neo/go/zodb"
"lab.nexedi.com/kirr/neo/go/xcommon/log"
"lab.nexedi.com/kirr/neo/go/xcommon/task"
"lab.nexedi.com/kirr/neo/go/xcommon/xnet"
"lab.nexedi.com/kirr/neo/go/xcommon/xcontext"
...
...
@@ -94,7 +95,7 @@ func (stor *Storage) Run(ctx context.Context) error {
return
err
// XXX err ctx
}
defer
r
unningf
(
&
ctx
,
"storage(%v)"
,
l
.
Addr
())(
&
err
)
defer
task
.
R
unningf
(
&
ctx
,
"storage(%v)"
,
l
.
Addr
())(
&
err
)
// start serving incoming connections
wg
:=
sync
.
WaitGroup
{}
...
...
@@ -143,7 +144,7 @@ func (stor *Storage) Run(ctx context.Context) error {
//
// it always returns an error - either due to cancel or command from master to shutdown
func
(
stor
*
Storage
)
talkMaster
(
ctx
context
.
Context
)
(
err
error
)
{
defer
r
unningf
(
&
ctx
,
"talk master(%v)"
,
stor
.
node
.
MasterAddr
)(
&
err
)
defer
task
.
R
unningf
(
&
ctx
,
"talk master(%v)"
,
stor
.
node
.
MasterAddr
)(
&
err
)
for
{
err
:=
stor
.
talkMaster1
(
ctx
)
...
...
@@ -194,9 +195,9 @@ func (stor *Storage) talkMaster1(ctx context.Context) (err error) {
}
// XXX -> node.Dial ?
if
accept
.
Your
Node
UUID
!=
stor
.
node
.
MyInfo
.
UUID
{
log
.
Infof
(
ctx
,
"master told us to have uuid=%v"
,
accept
.
Your
Node
UUID
)
stor
.
node
.
MyInfo
.
UUID
=
accept
.
Your
Node
UUID
if
accept
.
YourUUID
!=
stor
.
node
.
MyInfo
.
UUID
{
log
.
Infof
(
ctx
,
"master told us to have uuid=%v"
,
accept
.
YourUUID
)
stor
.
node
.
MyInfo
.
UUID
=
accept
.
YourUUID
}
...
...
@@ -298,7 +299,7 @@ func (stor *Storage) talkMaster1(ctx context.Context) (err error) {
// - nil: initialization was ok and a command came from master to start operation
// - !nil: initialization was cancelled or failed somehow
func
(
stor
*
Storage
)
m1initialize
(
ctx
context
.
Context
,
Mconn
*
neo
.
Conn
)
(
err
error
)
{
defer
r
unningf
(
&
ctx
,
"init %v"
,
Mconn
)(
&
err
)
defer
task
.
R
unningf
(
&
ctx
,
"init %v"
,
Mconn
)(
&
err
)
for
{
msg
,
err
:=
Mconn
.
Recv
()
...
...
@@ -371,7 +372,7 @@ func (stor *Storage) m1initialize(ctx context.Context, Mconn *neo.Conn) (err err
// either due to master commanding us to stop, or context cancel or some other
// error.
func
(
stor
*
Storage
)
m1serve
(
ctx
context
.
Context
,
Mconn
*
neo
.
Conn
)
(
err
error
)
{
defer
r
unningf
(
&
ctx
,
"serve %v"
,
Mconn
)(
&
err
)
defer
task
.
R
unningf
(
&
ctx
,
"serve %v"
,
Mconn
)(
&
err
)
// refresh stor.opCtx and cancel it when we finish so that client
// handlers know they need to stop operating as master told us to do so.
...
...
go/neo/server/util.go
View file @
529c4666
...
...
@@ -23,11 +23,14 @@ package server
import
(
"context"
"io"
"lab.nexedi.com/kirr/neo/go/xcommon/log"
)
// lclose closes c and logs closing error if there was any.
// the error is otherwise ignored
// XXX dup in neo
func
lclose
(
ctx
context
.
Context
,
c
io
.
Closer
)
{
err
:=
c
.
Close
()
if
err
!=
nil
{
...
...
go/neo/util.go
0 → 100644
View file @
529c4666
package
neo
import
(
"context"
"io"
"lab.nexedi.com/kirr/neo/go/xcommon/log"
)
// lclose closes c and logs closing error if there was any.
// the error is otherwise ignored
// XXX dup in server
func
lclose
(
ctx
context
.
Context
,
c
io
.
Closer
)
{
err
:=
c
.
Close
()
if
err
!=
nil
{
log
.
Error
(
ctx
,
err
)
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment