Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neo
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
2
Merge Requests
2
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Jobs
Commits
Open sidebar
Kirill Smelkov
neo
Commits
1d3db8a3
Commit
1d3db8a3
authored
Jan 28, 2021
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
5a6ce3e2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
96 additions
and
106 deletions
+96
-106
go/neo/client.go
go/neo/client.go
+21
-81
go/neo/mastered.go
go/neo/mastered.go
+75
-25
No files found.
go/neo/client.go
View file @
1d3db8a3
...
@@ -35,6 +35,7 @@ import (
...
@@ -35,6 +35,7 @@ import (
"lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/xerr"
"lab.nexedi.com/kirr/go123/xerr"
"lab.nexedi.com/kirr/go123/xnet"
"lab.nexedi.com/kirr/go123/xnet"
"lab.nexedi.com/kirr/go123/xsync"
// "lab.nexedi.com/kirr/neo/go/internal/log"
// "lab.nexedi.com/kirr/neo/go/internal/log"
"lab.nexedi.com/kirr/neo/go/internal/task"
"lab.nexedi.com/kirr/neo/go/internal/task"
...
@@ -54,7 +55,8 @@ type Client struct {
...
@@ -54,7 +55,8 @@ type Client struct {
// node *xneo.NodeApp
// node *xneo.NodeApp
node
*
_MasteredNode
node
*
_MasteredNode
talkMasterCancel
func
()
runWG
*
xsync
.
WorkGroup
runCancel
func
()
// link to master - established and maintained by talkMaster.
// link to master - established and maintained by talkMaster.
// users retrieve it via masterLink().
// users retrieve it via masterLink().
...
@@ -116,21 +118,26 @@ func (cli *Client) Run(ctx context.Context) (err error) {
...
@@ -116,21 +118,26 @@ func (cli *Client) Run(ctx context.Context) (err error) {
// run process which performs master talk
// run process which performs master talk
ctx
,
cancel
:=
context
.
WithCancel
(
ctx
)
ctx
,
cancel
:=
context
.
WithCancel
(
ctx
)
cli
.
talkMaster
Cancel
=
cancel
cli
.
run
Cancel
=
cancel
// cli.node.OnShutdown = cancel // XXX ok?
// cli.node.OnShutdown = cancel // XXX ok?
// return cli.talkMaster(ctx)
// return cli.talkMaster(ctx)
return
cli
.
node
.
talkMaster
(
ctx
)
cli
.
runWG
.
Go
(
cli
.
node
.
talkMaster
)
cli
.
runWG
.
Go
(
cli
.
recvMaster
)
return
cli
.
runWG
.
Wait
()
}
}
// Close implements zodb.IStorageDriver.
// Close implements zodb.IStorageDriver.
func
(
c
*
Client
)
Close
()
(
err
error
)
{
func
(
c
*
Client
)
Close
()
(
err
error
)
{
c
.
talkMasterCancel
()
c
.
runCancel
()
// XXX wait talkMaster finishes -> XXX return err from that?
err
=
c
.
runWG
.
Wait
()
// XXX what else?
// close networker if configured to do so
// close networker if configured to do so
if
c
.
ownNet
{
if
c
.
ownNet
{
err
=
c
.
node
.
Net
.
Close
()
err2
:=
c
.
node
.
Net
.
Close
()
if
err
==
nil
{
err
=
err2
}
}
}
return
err
return
err
}
}
...
@@ -236,37 +243,6 @@ func (c *Client) withOperational(ctx context.Context) error {
...
@@ -236,37 +243,6 @@ func (c *Client) withOperational(ctx context.Context) error {
*/
*/
/*
/*
// talkMaster connects to master, announces self and receives notifications.
// it tries to persist master link reconnecting as needed.
//
// TODO C -> M for commit (-> another channel)
//
// XXX always error (dup Storage.talkMaster) ?
func (c *Client) talkMaster(ctx context.Context) (err error) {
defer task.Runningf(&ctx, "client: talk master(%v)", c.node.MasterAddr)(&err)
// XXX dup wrt Storage.talkMaster
for {
// XXX .nodeTab.Reset() ?
err := c.talkMaster1(ctx)
log.Warning(ctx, err) // XXX Warning ok? -> Error?
// TODO if err == "reject identification / protocol error" -> shutdown client
// TODO if err = shutdown -> return
// exit on cancel / throttle reconnecting
select {
case <-ctx.Done():
return ctx.Err()
// XXX 1s hardcoded -> move out of here
case <-time.After(1*time.Second):
// ok
}
}
}
func (c *Client) talkMaster1(ctx context.Context) (err error) {
func (c *Client) talkMaster1(ctx context.Context) (err error) {
mlink, accept, err := c.node.Dial(ctx, proto.MASTER, c.node.MasterAddr)
mlink, accept, err := c.node.Dial(ctx, proto.MASTER, c.node.MasterAddr)
if err != nil {
if err != nil {
...
@@ -385,67 +361,31 @@ func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (er
...
@@ -385,67 +361,31 @@ func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (er
// recvMaster receives and handles notifications from master.
// recvMaster receives and handles notifications from master.
func
(
c
*
Client
)
recvMaster
(
ctx
context
.
Context
,
mlink
*
neonet
.
NodeLink
)
(
err
error
)
{
func
(
c
*
Client
)
recvMaster
(
ctx
context
.
Context
)
(
err
error
)
{
defer
task
.
Running
(
&
ctx
,
"rx"
)(
&
err
)
defer
task
.
Running
(
&
ctx
,
"rx"
)(
&
err
)
// XXX recheck vs talkMaster
for
{
for
{
req
,
err
:=
mlink
.
Recv1
()
// XXX -> Recv1M
req
,
err
:=
c
.
node
.
RecvM1
()
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
// XXX eventReconnect
}
}
err
=
c
.
recvMaster1
(
ctx
,
req
)
err
=
c
.
recvMaster1
(
req
.
Msg
)
req
.
Close
()
req
.
Close
()
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
}
}
}
}
// recvMaster1 handles 1 message from master.
// recvMaster1 handles 1 message from master.
func
(
c
*
Client
)
recvMaster1
(
ctx
context
.
Context
,
req
neonet
.
Request
)
error
{
func
(
c
*
Client
)
recvMaster1
(
msg
proto
.
Msg
)
error
{
switch
msg
:=
req
.
M
sg
.
(
type
)
{
switch
msg
:=
m
sg
.
(
type
)
{
// <- committed txn
// <- committed txn
case
*
proto
.
InvalidateObjects
:
case
*
proto
.
InvalidateObjects
:
return
c
.
invalidateObjects
(
msg
)
return
c
.
invalidateObjects
(
msg
)
default
:
default
:
return
fmt
.
Errorf
(
"unexpected message: %T"
,
msg
)
return
fmt
.
Errorf
(
"unexpected message: %T"
,
msg
)
}
}
/*
// messages for state changes
// XXX -> NodeApp into common code to handle NodeTab + PartTab updates from M?
c.node.StateMu.Lock()
switch msg := req.Msg.(type) {
default:
c.node.statemu.unlock()
return fmt.Errorf("unexpected message: %T", msg)
// <- whole partTab
case *proto.SendPartitionTable:
c.node.UpdatePartTab(ctx, msg)
// <- δ(partTab)
case *proto.NotifyPartitionChanges:
panic("TODO δ(partTab)")
// <- δ(nodeTab)
case *proto.NotifyNodeInformation:
c.node.UpdateNodeTab(ctx, msg)
case *proto.NotifyClusterState:
c.node.UpdateClusterState(ctx, msg)
}
// update .operational + notify those who was waiting for it
opready := c.updateOperational()
c.node.StateMu.Unlock()
opready()
return nil
*/
}
}
// invalidateObjects is called by recvMaster1 on receiving invalidateObjects notification.
// invalidateObjects is called by recvMaster1 on receiving invalidateObjects notification.
...
...
go/neo/mastered.go
View file @
1d3db8a3
...
@@ -53,20 +53,16 @@ type _MasteredNode struct {
...
@@ -53,20 +53,16 @@ type _MasteredNode struct {
Net
xnet
.
Networker
// network AP we are sending/receiving on
Net
xnet
.
Networker
// network AP we are sending/receiving on
MasterAddr
string
// address of current master TODO -> masterRegistry
MasterAddr
string
// address of current master TODO -> masterRegistry
// nodeTab/partTab/clusterState
stateMu
sync
.
RWMutex
stateMu
sync
.
RWMutex
state
xneo
.
ClusterState
state
xneo
.
ClusterState
// nodeTab *xneo.NodeTable // information about nodes in the cluster
// partTab *xneo.PartitionTable // information about data distribution in the cluster
// clusterState proto.ClusterState // master idea about cluster state
// operational state in node is maintained by talkMaster.
// operational state in node is maintained by talkMaster.
// users retrieve it via withOperational(). XXX recheck
// users retrieve it via withOperational(). XXX recheck
//
//
// NOTE being operational means:
// NOTE being operational means:
// - link to master established and is ok
// - link to master established and is ok
// - .partTab is operational wrt .nodeTab
// - .state is operational
// - .clusterState = RUNNING <- XXX needed?
//
//
// however master link is accessed separately (see ^^^ and masterLink)
// however master link is accessed separately (see ^^^ and masterLink)
//
//
...
@@ -75,19 +71,40 @@ type _MasteredNode struct {
...
@@ -75,19 +71,40 @@ type _MasteredNode struct {
opReady
chan
struct
{}
// reinitialized each time state becomes non-operational
opReady
chan
struct
{}
// reinitialized each time state becomes non-operational
flags
_MasteredNodeFlags
rxm
chan
_RxM
// talkMaster -> RecvM1
/*
// TODO -> RecvM1 instead
// TODO -> RecvM1 instead
// OnNotify, if !nil, is called when master notifies this node with a message.
// OnNotify, if !nil, is called when master notifies this node with a message.
// XXX not called for δstate
// XXX not called for δstate
OnNotify
func
(
msg
proto
.
Msg
)
error
OnNotify func(msg proto.Msg) error
// XXX kill
// OnNotifyδPartTab, if !nil, is called when master notifies this node
// OnNotifyδPartTab, if !nil, is called when master notifies this node
// with a change to partition table. (used by S to persist partTab)
// with a change to partition table. (used by S to persist partTab)
OnNotifyδPartTab
func
(
pt
*
xneo
.
PartitionTable
)
error
OnNotifyδPartTab func(pt *xneo.PartitionTable) error // XXX kill
*/
}
// _RxM represents a request or event received from master.
type
_RxM
struct
{
Req
neonet
.
Request
Err
error
// event*
}
}
type
_MasteredNodeFlags
int
const
(
// δPartTabPassThrough tells RecvM1 not to filter out messages related
// to partition table changes. When RecvM1 receives such messages there
// are already processed internally to update .state.PartTab correspondingly.
//
// Storage uses this mode to receive δPartTab notifications to know
// when to persist it.
δPartTabPassThrough
_MasteredNodeFlags
=
iota
)
// XXX doc
func
newMasteredNode
(
typ
proto
.
NodeType
,
clusterName
string
,
net
xnet
.
Networker
,
masterAddr
string
)
*
_MasteredNode
{
func
newMasteredNode
(
typ
proto
.
NodeType
,
clusterName
string
,
net
xnet
.
Networker
,
masterAddr
string
)
*
_MasteredNode
{
node
:=
&
_MasteredNode
{
node
:=
&
_MasteredNode
{
myInfo
:
proto
.
NodeInfo
{
myInfo
:
proto
.
NodeInfo
{
...
@@ -106,6 +123,8 @@ func newMasteredNode(typ proto.NodeType, clusterName string, net xnet.Networker,
...
@@ -106,6 +123,8 @@ func newMasteredNode(typ proto.NodeType, clusterName string, net xnet.Networker,
PartTab
:
&
xneo
.
PartitionTable
{},
PartTab
:
&
xneo
.
PartitionTable
{},
Code
:
-
1
,
// invalid
Code
:
-
1
,
// invalid
},
},
rxm
:
make
(
chan
_RxM
),
}
}
return
node
return
node
...
@@ -188,12 +207,19 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
...
@@ -188,12 +207,19 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
err
=
node
.
updateNodeTab
(
ctx
,
&
mnt
)
err
=
node
.
updateNodeTab
(
ctx
,
&
mnt
)
node
.
state
.
PartTab
=
pt
node
.
state
.
PartTab
=
pt
// XXX update "operational"
// XXX update "operational"
/*
// update .operational + notify those who was waiting for it
opready := c.updateOperational()
c.node.StateMu.Unlock()
opready()
*/
node
.
stateMu
.
Unlock
()
node
.
stateMu
.
Unlock
()
if
err
!=
nil
{
// might be command to shutdown
if
err
!=
nil
{
// might be command to shutdown
return
err
return
err
}
}
// XXX update .masterLink + notify waiters
// XXX update .masterLink + notify waiters
// XXX rxm <- eventReconnect
// receive and handle notifications from master
// receive and handle notifications from master
defer
task
.
Running
(
&
ctx
,
"rx"
)(
&
err
)
defer
task
.
Running
(
&
ctx
,
"rx"
)(
&
err
)
...
@@ -202,8 +228,7 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
...
@@ -202,8 +228,7 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
err
=
node
.
recvMaster1
(
ctx
,
req
.
Msg
)
err
=
node
.
recvMaster1
(
ctx
,
req
)
// req ownership is passed in
req
.
Close
()
if
err
!=
nil
{
if
err
!=
nil
{
return
err
return
err
}
}
...
@@ -213,10 +238,10 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
...
@@ -213,10 +238,10 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
}
}
// recvMaster1 handles 1 message from master.
// recvMaster1 handles 1 message from master.
func
(
node
*
_MasteredNode
)
recvMaster1
(
ctx
context
.
Context
,
msg
proto
.
Msg
)
(
err
error
)
{
func
(
node
*
_MasteredNode
)
recvMaster1
(
ctx
context
.
Context
,
req
neonet
.
Request
)
(
err
error
)
{
// messages for state changes are handled internally
// messages for state changes are handled internally
δstate
:=
true
δstate
:=
true
switch
m
sg
.
(
type
)
{
switch
req
.
M
sg
.
(
type
)
{
default
:
δstate
=
false
default
:
δstate
=
false
case
*
proto
.
SendPartitionTable
:
// whole partTab
case
*
proto
.
SendPartitionTable
:
// whole partTab
case
*
proto
.
NotifyPartitionChanges
:
// δ(partTab)
case
*
proto
.
NotifyPartitionChanges
:
// δ(partTab)
...
@@ -225,25 +250,46 @@ func (node *_MasteredNode) recvMaster1(ctx context.Context, msg proto.Msg) (err
...
@@ -225,25 +250,46 @@ func (node *_MasteredNode) recvMaster1(ctx context.Context, msg proto.Msg) (err
}
}
if
δstate
{
if
δstate
{
err
=
node
.
recvδstate
(
ctx
,
msg
)
δpt
,
err
:=
node
.
recvδstate
(
ctx
,
req
.
Msg
)
}
else
{
toRecvM1
:=
false
// XXX other messages? -> particular user
if
δpt
&&
(
node
.
flags
&
δPartTabPassThrough
!=
0
)
{
// XXX rework protocol so that M sends δstate on dedicated connection and other messages on other connections?
toRecvM1
=
true
if
node
.
OnNotify
!=
nil
{
err
=
node
.
OnNotify
(
msg
)
}
else
{
err
=
fmt
.
Errorf
(
"unexpected message: %T"
,
msg
)
}
}
if
!
toRecvM1
{
req
.
Close
()
return
err
}
}
// pass request -> RecvM1
// NOTE req ownership is passed into RecvM1 caller who becomes responsible to close it
select
{
case
<-
ctx
.
Done
()
:
req
.
Close
()
return
ctx
.
Err
()
case
node
.
rxm
<-
_RxM
{
Req
:
req
}
:
// ok
}
}
return
err
return
nil
}
// RecvM1 receives request from master filtered through δstate handler.
//
// XXX eventReconnect
// XXX link down ?
func
(
node
*
_MasteredNode
)
RecvM1
()
(
neonet
.
Request
,
error
)
{
rx
:=
<-
node
.
rxm
// XXX close -> EOF?
return
rx
.
Req
,
rx
.
Err
}
}
//trace:event traceClusterStateChanged(cs *proto.ClusterState)
//trace:event traceClusterStateChanged(cs *proto.ClusterState)
// recvδstate handles reception of δstate messages.
// recvδstate handles reception of δstate messages.
func
(
node
*
_MasteredNode
)
recvδstate
(
ctx
context
.
Context
,
msg
proto
.
Msg
)
(
err
error
)
{
func
(
node
*
_MasteredNode
)
recvδstate
(
ctx
context
.
Context
,
msg
proto
.
Msg
)
(
δpt
bool
,
err
error
)
{
δpt
:
=
false
δpt
=
false
node
.
stateMu
.
Lock
()
node
.
stateMu
.
Lock
()
// XXX defer unlock ?
// XXX defer unlock ?
...
@@ -255,6 +301,7 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
...
@@ -255,6 +301,7 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
// <- whole partTab
// <- whole partTab
case
*
proto
.
SendPartitionTable
:
case
*
proto
.
SendPartitionTable
:
δpt
=
true
pt
:=
xneo
.
PartTabFromDump
(
msg
.
PTid
,
msg
.
RowList
)
// FIXME handle msg.NumReplicas
pt
:=
xneo
.
PartTabFromDump
(
msg
.
PTid
,
msg
.
RowList
)
// FIXME handle msg.NumReplicas
// XXX logging under lock ok?
// XXX logging under lock ok?
log
.
Infof
(
ctx
,
"parttab update: %s"
,
pt
)
log
.
Infof
(
ctx
,
"parttab update: %s"
,
pt
)
...
@@ -262,11 +309,12 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
...
@@ -262,11 +309,12 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
// <- δ(partTab)
// <- δ(partTab)
case
*
proto
.
NotifyPartitionChanges
:
case
*
proto
.
NotifyPartitionChanges
:
δpt
=
true
panic
(
"TODO δ(partTab)"
)
panic
(
"TODO δ(partTab)"
)
// <- δ(nodeTab)
// <- δ(nodeTab)
case
*
proto
.
NotifyNodeInformation
:
case
*
proto
.
NotifyNodeInformation
:
node
.
updateNodeTab
(
ctx
,
msg
)
err
=
node
.
updateNodeTab
(
ctx
,
msg
)
// XXX recheck return (might be command to shutdown
)
case
*
proto
.
NotifyClusterState
:
case
*
proto
.
NotifyClusterState
:
log
.
Infof
(
ctx
,
"state update: %s"
,
msg
.
State
)
log
.
Infof
(
ctx
,
"state update: %s"
,
msg
.
State
)
...
@@ -274,18 +322,20 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
...
@@ -274,18 +322,20 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
traceClusterStateChanged
(
&
node
.
state
.
Code
)
traceClusterStateChanged
(
&
node
.
state
.
Code
)
}
}
/* XXX kill
if δpt && node.OnNotifyδPartTab != nil {
if δpt && node.OnNotifyδPartTab != nil {
err = node.OnNotifyδPartTab(node.state.PartTab)
err = node.OnNotifyδPartTab(node.state.PartTab)
// XXX err -> return without notify?
// XXX err -> return without notify?
panic("TODO")
panic("TODO")
}
}
*/
// update .operational + notify those who was waiting for it
// update .operational + notify those who was waiting for it
opready
:=
node
.
updateOperational
()
opready
:=
node
.
updateOperational
()
node
.
stateMu
.
Unlock
()
node
.
stateMu
.
Unlock
()
opready
()
opready
()
return
nil
return
δpt
,
err
}
}
// updateOperational updates .operational from current state.
// updateOperational updates .operational from current state.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment