Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
N
neoppod
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Levin Zimmermann
neoppod
Commits
1d3db8a3
Commit
1d3db8a3
authored
Jan 28, 2021
by
Kirill Smelkov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
.
parent
5a6ce3e2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
96 additions
and
106 deletions
+96
-106
go/neo/client.go
go/neo/client.go
+21
-81
go/neo/mastered.go
go/neo/mastered.go
+75
-25
No files found.
go/neo/client.go
View file @
1d3db8a3
...
...
@@ -35,6 +35,7 @@ import (
"lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/xerr"
"lab.nexedi.com/kirr/go123/xnet"
"lab.nexedi.com/kirr/go123/xsync"
// "lab.nexedi.com/kirr/neo/go/internal/log"
"lab.nexedi.com/kirr/neo/go/internal/task"
...
...
@@ -54,7 +55,8 @@ type Client struct {
// node *xneo.NodeApp
node
*
_MasteredNode
talkMasterCancel
func
()
runWG
*
xsync
.
WorkGroup
runCancel
func
()
// link to master - established and maintained by talkMaster.
// users retrieve it via masterLink().
...
...
@@ -116,21 +118,26 @@ func (cli *Client) Run(ctx context.Context) (err error) {
// run process which performs master talk
ctx
,
cancel
:=
context
.
WithCancel
(
ctx
)
cli
.
talkMaster
Cancel
=
cancel
cli
.
run
Cancel
=
cancel
// cli.node.OnShutdown = cancel // XXX ok?
// return cli.talkMaster(ctx)
return
cli
.
node
.
talkMaster
(
ctx
)
cli
.
runWG
.
Go
(
cli
.
node
.
talkMaster
)
cli
.
runWG
.
Go
(
cli
.
recvMaster
)
return
cli
.
runWG
.
Wait
()
}
// Close implements zodb.IStorageDriver.
func
(
c
*
Client
)
Close
()
(
err
error
)
{
c
.
talkMasterCancel
()
// XXX wait talkMaster finishes -> XXX return err from that?
// XXX what else?
c
.
runCancel
()
err
=
c
.
runWG
.
Wait
()
// close networker if configured to do so
if
c
.
ownNet
{
err
=
c
.
node
.
Net
.
Close
()
err2
:=
c
.
node
.
Net
.
Close
()
if
err
==
nil
{
err
=
err2
}
}
return
err
}
...
...
@@ -236,37 +243,6 @@ func (c *Client) withOperational(ctx context.Context) error {
*/
/*
// talkMaster connects to master, announces self and receives notifications.
// it tries to persist master link reconnecting as needed.
//
// TODO C -> M for commit (-> another channel)
//
// XXX always error (dup Storage.talkMaster) ?
func (c *Client) talkMaster(ctx context.Context) (err error) {
defer task.Runningf(&ctx, "client: talk master(%v)", c.node.MasterAddr)(&err)
// XXX dup wrt Storage.talkMaster
for {
// XXX .nodeTab.Reset() ?
err := c.talkMaster1(ctx)
log.Warning(ctx, err) // XXX Warning ok? -> Error?
// TODO if err == "reject identification / protocol error" -> shutdown client
// TODO if err = shutdown -> return
// exit on cancel / throttle reconnecting
select {
case <-ctx.Done():
return ctx.Err()
// XXX 1s hardcoded -> move out of here
case <-time.After(1*time.Second):
// ok
}
}
}
func (c *Client) talkMaster1(ctx context.Context) (err error) {
mlink, accept, err := c.node.Dial(ctx, proto.MASTER, c.node.MasterAddr)
if err != nil {
...
...
@@ -385,67 +361,31 @@ func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (er
// recvMaster receives and handles notifications from master.
func
(
c
*
Client
)
recvMaster
(
ctx
context
.
Context
,
mlink
*
neonet
.
NodeLink
)
(
err
error
)
{
defer
task
.
Running
(
&
ctx
,
"rx"
)(
&
err
)
func
(
c
*
Client
)
recvMaster
(
ctx
context
.
Context
)
(
err
error
)
{
defer
task
.
Running
(
&
ctx
,
"rx"
)(
&
err
)
// XXX recheck vs talkMaster
for
{
req
,
err
:=
mlink
.
Recv1
()
// XXX -> Recv1M
req
,
err
:=
c
.
node
.
RecvM1
()
if
err
!=
nil
{
return
err
return
err
// XXX eventReconnect
}
err
=
c
.
recvMaster1
(
ctx
,
req
)
err
=
c
.
recvMaster1
(
req
.
Msg
)
req
.
Close
()
if
err
!=
nil
{
return
err
}
}
}
// recvMaster1 handles 1 message from master.
func
(
c
*
Client
)
recvMaster1
(
ctx
context
.
Context
,
req
neonet
.
Request
)
error
{
switch
msg
:=
req
.
M
sg
.
(
type
)
{
func
(
c
*
Client
)
recvMaster1
(
msg
proto
.
Msg
)
error
{
switch
msg
:=
m
sg
.
(
type
)
{
// <- committed txn
case
*
proto
.
InvalidateObjects
:
return
c
.
invalidateObjects
(
msg
)
default
:
return
fmt
.
Errorf
(
"unexpected message: %T"
,
msg
)
}
/*
// messages for state changes
// XXX -> NodeApp into common code to handle NodeTab + PartTab updates from M?
c.node.StateMu.Lock()
switch msg := req.Msg.(type) {
default:
c.node.statemu.unlock()
return fmt.Errorf("unexpected message: %T", msg)
// <- whole partTab
case *proto.SendPartitionTable:
c.node.UpdatePartTab(ctx, msg)
// <- δ(partTab)
case *proto.NotifyPartitionChanges:
panic("TODO δ(partTab)")
// <- δ(nodeTab)
case *proto.NotifyNodeInformation:
c.node.UpdateNodeTab(ctx, msg)
case *proto.NotifyClusterState:
c.node.UpdateClusterState(ctx, msg)
}
// update .operational + notify those who was waiting for it
opready := c.updateOperational()
c.node.StateMu.Unlock()
opready()
return nil
*/
}
// invalidateObjects is called by recvMaster1 on receiving invalidateObjects notification.
...
...
go/neo/mastered.go
View file @
1d3db8a3
...
...
@@ -53,20 +53,16 @@ type _MasteredNode struct {
Net
xnet
.
Networker
// network AP we are sending/receiving on
MasterAddr
string
// address of current master TODO -> masterRegistry
// nodeTab/partTab/clusterState
stateMu
sync
.
RWMutex
state
xneo
.
ClusterState
// nodeTab *xneo.NodeTable // information about nodes in the cluster
// partTab *xneo.PartitionTable // information about data distribution in the cluster
// clusterState proto.ClusterState // master idea about cluster state
// operational state in node is maintained by talkMaster.
// users retrieve it via withOperational(). XXX recheck
//
// NOTE being operational means:
// - link to master established and is ok
// - .partTab is operational wrt .nodeTab
// - .clusterState = RUNNING <- XXX needed?
// - .state is operational
//
// however master link is accessed separately (see ^^^ and masterLink)
//
...
...
@@ -75,19 +71,40 @@ type _MasteredNode struct {
opReady
chan
struct
{}
// reinitialized each time state becomes non-operational
flags
_MasteredNodeFlags
rxm
chan
_RxM
// talkMaster -> RecvM1
/*
// TODO -> RecvM1 instead
// OnNotify, if !nil, is called when master notifies this node with a message.
// XXX not called for δstate
OnNotify
func
(
msg
proto
.
Msg
)
error
OnNotify func(msg proto.Msg) error
// XXX kill
// OnNotifyδPartTab, if !nil, is called when master notifies this node
// with a change to partition table. (used by S to persist partTab)
OnNotifyδPartTab
func
(
pt
*
xneo
.
PartitionTable
)
error
OnNotifyδPartTab func(pt *xneo.PartitionTable) error // XXX kill
*/
}
// _RxM represents a request or event received from master.
type
_RxM
struct
{
Req
neonet
.
Request
Err
error
// event*
}
type
_MasteredNodeFlags
int
const
(
// δPartTabPassThrough tells RecvM1 not to filter out messages related
// to partition table changes. When RecvM1 receives such messages there
// are already processed internally to update .state.PartTab correspondingly.
//
// Storage uses this mode to receive δPartTab notifications to know
// when to persist it.
δPartTabPassThrough
_MasteredNodeFlags
=
iota
)
// XXX doc
func
newMasteredNode
(
typ
proto
.
NodeType
,
clusterName
string
,
net
xnet
.
Networker
,
masterAddr
string
)
*
_MasteredNode
{
node
:=
&
_MasteredNode
{
myInfo
:
proto
.
NodeInfo
{
...
...
@@ -106,6 +123,8 @@ func newMasteredNode(typ proto.NodeType, clusterName string, net xnet.Networker,
PartTab
:
&
xneo
.
PartitionTable
{},
Code
:
-
1
,
// invalid
},
rxm
:
make
(
chan
_RxM
),
}
return
node
...
...
@@ -188,12 +207,19 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
err
=
node
.
updateNodeTab
(
ctx
,
&
mnt
)
node
.
state
.
PartTab
=
pt
// XXX update "operational"
/*
// update .operational + notify those who was waiting for it
opready := c.updateOperational()
c.node.StateMu.Unlock()
opready()
*/
node
.
stateMu
.
Unlock
()
if
err
!=
nil
{
// might be command to shutdown
return
err
}
// XXX update .masterLink + notify waiters
// XXX rxm <- eventReconnect
// receive and handle notifications from master
defer
task
.
Running
(
&
ctx
,
"rx"
)(
&
err
)
...
...
@@ -202,8 +228,7 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
if
err
!=
nil
{
return
err
}
err
=
node
.
recvMaster1
(
ctx
,
req
.
Msg
)
req
.
Close
()
err
=
node
.
recvMaster1
(
ctx
,
req
)
// req ownership is passed in
if
err
!=
nil
{
return
err
}
...
...
@@ -213,10 +238,10 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
}
// recvMaster1 handles 1 message from master.
func
(
node
*
_MasteredNode
)
recvMaster1
(
ctx
context
.
Context
,
msg
proto
.
Msg
)
(
err
error
)
{
func
(
node
*
_MasteredNode
)
recvMaster1
(
ctx
context
.
Context
,
req
neonet
.
Request
)
(
err
error
)
{
// messages for state changes are handled internally
δstate
:=
true
switch
m
sg
.
(
type
)
{
switch
req
.
M
sg
.
(
type
)
{
default
:
δstate
=
false
case
*
proto
.
SendPartitionTable
:
// whole partTab
case
*
proto
.
NotifyPartitionChanges
:
// δ(partTab)
...
...
@@ -225,25 +250,46 @@ func (node *_MasteredNode) recvMaster1(ctx context.Context, msg proto.Msg) (err
}
if
δstate
{
err
=
node
.
recvδstate
(
ctx
,
msg
)
}
else
{
// XXX other messages? -> particular user
// XXX rework protocol so that M sends δstate on dedicated connection and other messages on other connections?
if
node
.
OnNotify
!=
nil
{
err
=
node
.
OnNotify
(
msg
)
}
else
{
err
=
fmt
.
Errorf
(
"unexpected message: %T"
,
msg
)
δpt
,
err
:=
node
.
recvδstate
(
ctx
,
req
.
Msg
)
toRecvM1
:=
false
if
δpt
&&
(
node
.
flags
&
δPartTabPassThrough
!=
0
)
{
toRecvM1
=
true
}
if
!
toRecvM1
{
req
.
Close
()
return
err
}
}
// pass request -> RecvM1
// NOTE req ownership is passed into RecvM1 caller who becomes responsible to close it
select
{
case
<-
ctx
.
Done
()
:
req
.
Close
()
return
ctx
.
Err
()
case
node
.
rxm
<-
_RxM
{
Req
:
req
}
:
// ok
}
return
err
return
nil
}
// RecvM1 receives request from master filtered through δstate handler.
//
// XXX eventReconnect
// XXX link down ?
func
(
node
*
_MasteredNode
)
RecvM1
()
(
neonet
.
Request
,
error
)
{
rx
:=
<-
node
.
rxm
// XXX close -> EOF?
return
rx
.
Req
,
rx
.
Err
}
//trace:event traceClusterStateChanged(cs *proto.ClusterState)
// recvδstate handles reception of δstate messages.
func
(
node
*
_MasteredNode
)
recvδstate
(
ctx
context
.
Context
,
msg
proto
.
Msg
)
(
err
error
)
{
δpt
:
=
false
func
(
node
*
_MasteredNode
)
recvδstate
(
ctx
context
.
Context
,
msg
proto
.
Msg
)
(
δpt
bool
,
err
error
)
{
δpt
=
false
node
.
stateMu
.
Lock
()
// XXX defer unlock ?
...
...
@@ -255,6 +301,7 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
// <- whole partTab
case
*
proto
.
SendPartitionTable
:
δpt
=
true
pt
:=
xneo
.
PartTabFromDump
(
msg
.
PTid
,
msg
.
RowList
)
// FIXME handle msg.NumReplicas
// XXX logging under lock ok?
log
.
Infof
(
ctx
,
"parttab update: %s"
,
pt
)
...
...
@@ -262,11 +309,12 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
// <- δ(partTab)
case
*
proto
.
NotifyPartitionChanges
:
δpt
=
true
panic
(
"TODO δ(partTab)"
)
// <- δ(nodeTab)
case
*
proto
.
NotifyNodeInformation
:
node
.
updateNodeTab
(
ctx
,
msg
)
err
=
node
.
updateNodeTab
(
ctx
,
msg
)
// XXX recheck return (might be command to shutdown
)
case
*
proto
.
NotifyClusterState
:
log
.
Infof
(
ctx
,
"state update: %s"
,
msg
.
State
)
...
...
@@ -274,18 +322,20 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
traceClusterStateChanged
(
&
node
.
state
.
Code
)
}
/* XXX kill
if δpt && node.OnNotifyδPartTab != nil {
err = node.OnNotifyδPartTab(node.state.PartTab)
// XXX err -> return without notify?
panic("TODO")
}
*/
// update .operational + notify those who was waiting for it
opready
:=
node
.
updateOperational
()
node
.
stateMu
.
Unlock
()
opready
()
return
nil
return
δpt
,
err
}
// updateOperational updates .operational from current state.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment