Commit 1d3db8a3 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 5a6ce3e2
...@@ -35,6 +35,7 @@ import ( ...@@ -35,6 +35,7 @@ import (
"lab.nexedi.com/kirr/go123/mem" "lab.nexedi.com/kirr/go123/mem"
"lab.nexedi.com/kirr/go123/xerr" "lab.nexedi.com/kirr/go123/xerr"
"lab.nexedi.com/kirr/go123/xnet" "lab.nexedi.com/kirr/go123/xnet"
"lab.nexedi.com/kirr/go123/xsync"
// "lab.nexedi.com/kirr/neo/go/internal/log" // "lab.nexedi.com/kirr/neo/go/internal/log"
"lab.nexedi.com/kirr/neo/go/internal/task" "lab.nexedi.com/kirr/neo/go/internal/task"
...@@ -54,7 +55,8 @@ type Client struct { ...@@ -54,7 +55,8 @@ type Client struct {
// node *xneo.NodeApp // node *xneo.NodeApp
node *_MasteredNode node *_MasteredNode
talkMasterCancel func() runWG *xsync.WorkGroup
runCancel func()
// link to master - established and maintained by talkMaster. // link to master - established and maintained by talkMaster.
// users retrieve it via masterLink(). // users retrieve it via masterLink().
...@@ -116,21 +118,26 @@ func (cli *Client) Run(ctx context.Context) (err error) { ...@@ -116,21 +118,26 @@ func (cli *Client) Run(ctx context.Context) (err error) {
// run process which performs master talk // run process which performs master talk
ctx, cancel := context.WithCancel(ctx) ctx, cancel := context.WithCancel(ctx)
cli.talkMasterCancel = cancel cli.runCancel = cancel
// cli.node.OnShutdown = cancel // XXX ok? // cli.node.OnShutdown = cancel // XXX ok?
// return cli.talkMaster(ctx) // return cli.talkMaster(ctx)
return cli.node.talkMaster(ctx) cli.runWG.Go(cli.node.talkMaster)
cli.runWG.Go(cli.recvMaster)
return cli.runWG.Wait()
} }
// Close implements zodb.IStorageDriver. // Close implements zodb.IStorageDriver.
func (c *Client) Close() (err error) { func (c *Client) Close() (err error) {
c.talkMasterCancel() c.runCancel()
// XXX wait talkMaster finishes -> XXX return err from that? err = c.runWG.Wait()
// XXX what else?
// close networker if configured to do so // close networker if configured to do so
if c.ownNet { if c.ownNet {
err = c.node.Net.Close() err2 := c.node.Net.Close()
if err == nil {
err = err2
}
} }
return err return err
} }
...@@ -236,37 +243,6 @@ func (c *Client) withOperational(ctx context.Context) error { ...@@ -236,37 +243,6 @@ func (c *Client) withOperational(ctx context.Context) error {
*/ */
/* /*
// talkMaster connects to master, announces self and receives notifications.
// it tries to persist master link reconnecting as needed.
//
// TODO C -> M for commit (-> another channel)
//
// XXX always error (dup Storage.talkMaster) ?
func (c *Client) talkMaster(ctx context.Context) (err error) {
defer task.Runningf(&ctx, "client: talk master(%v)", c.node.MasterAddr)(&err)
// XXX dup wrt Storage.talkMaster
for {
// XXX .nodeTab.Reset() ?
err := c.talkMaster1(ctx)
log.Warning(ctx, err) // XXX Warning ok? -> Error?
// TODO if err == "reject identification / protocol error" -> shutdown client
// TODO if err = shutdown -> return
// exit on cancel / throttle reconnecting
select {
case <-ctx.Done():
return ctx.Err()
// XXX 1s hardcoded -> move out of here
case <-time.After(1*time.Second):
// ok
}
}
}
func (c *Client) talkMaster1(ctx context.Context) (err error) { func (c *Client) talkMaster1(ctx context.Context) (err error) {
mlink, accept, err := c.node.Dial(ctx, proto.MASTER, c.node.MasterAddr) mlink, accept, err := c.node.Dial(ctx, proto.MASTER, c.node.MasterAddr)
if err != nil { if err != nil {
...@@ -385,67 +361,31 @@ func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (er ...@@ -385,67 +361,31 @@ func (c *Client) initFromMaster(ctx context.Context, mlink *neonet.NodeLink) (er
// recvMaster receives and handles notifications from master. // recvMaster receives and handles notifications from master.
func (c *Client) recvMaster(ctx context.Context, mlink *neonet.NodeLink) (err error) { func (c *Client) recvMaster(ctx context.Context) (err error) {
defer task.Running(&ctx, "rx")(&err) defer task.Running(&ctx, "rx")(&err) // XXX recheck vs talkMaster
for { for {
req, err := mlink.Recv1() // XXX -> Recv1M req, err := c.node.RecvM1()
if err != nil { if err != nil {
return err return err // XXX eventReconnect
} }
err = c.recvMaster1(ctx, req) err = c.recvMaster1(req.Msg)
req.Close() req.Close()
if err != nil { if err != nil {
return err return err
} }
} }
} }
// recvMaster1 handles 1 message from master. // recvMaster1 handles 1 message from master.
func (c *Client) recvMaster1(ctx context.Context, req neonet.Request) error { func (c *Client) recvMaster1(msg proto.Msg) error {
switch msg := req.Msg.(type) { switch msg := msg.(type) {
// <- committed txn // <- committed txn
case *proto.InvalidateObjects: case *proto.InvalidateObjects:
return c.invalidateObjects(msg) return c.invalidateObjects(msg)
default: default:
return fmt.Errorf("unexpected message: %T", msg) return fmt.Errorf("unexpected message: %T", msg)
} }
/*
// messages for state changes
// XXX -> NodeApp into common code to handle NodeTab + PartTab updates from M?
c.node.StateMu.Lock()
switch msg := req.Msg.(type) {
default:
c.node.statemu.unlock()
return fmt.Errorf("unexpected message: %T", msg)
// <- whole partTab
case *proto.SendPartitionTable:
c.node.UpdatePartTab(ctx, msg)
// <- δ(partTab)
case *proto.NotifyPartitionChanges:
panic("TODO δ(partTab)")
// <- δ(nodeTab)
case *proto.NotifyNodeInformation:
c.node.UpdateNodeTab(ctx, msg)
case *proto.NotifyClusterState:
c.node.UpdateClusterState(ctx, msg)
}
// update .operational + notify those who was waiting for it
opready := c.updateOperational()
c.node.StateMu.Unlock()
opready()
return nil
*/
} }
// invalidateObjects is called by recvMaster1 on receiving invalidateObjects notification. // invalidateObjects is called by recvMaster1 on receiving invalidateObjects notification.
......
...@@ -53,20 +53,16 @@ type _MasteredNode struct { ...@@ -53,20 +53,16 @@ type _MasteredNode struct {
Net xnet.Networker // network AP we are sending/receiving on Net xnet.Networker // network AP we are sending/receiving on
MasterAddr string // address of current master TODO -> masterRegistry MasterAddr string // address of current master TODO -> masterRegistry
// nodeTab/partTab/clusterState
stateMu sync.RWMutex stateMu sync.RWMutex
state xneo.ClusterState state xneo.ClusterState
// nodeTab *xneo.NodeTable // information about nodes in the cluster
// partTab *xneo.PartitionTable // information about data distribution in the cluster
// clusterState proto.ClusterState // master idea about cluster state
// operational state in node is maintained by talkMaster. // operational state in node is maintained by talkMaster.
// users retrieve it via withOperational(). XXX recheck // users retrieve it via withOperational(). XXX recheck
// //
// NOTE being operational means: // NOTE being operational means:
// - link to master established and is ok // - link to master established and is ok
// - .partTab is operational wrt .nodeTab // - .state is operational
// - .clusterState = RUNNING <- XXX needed?
// //
// however master link is accessed separately (see ^^^ and masterLink) // however master link is accessed separately (see ^^^ and masterLink)
// //
...@@ -75,19 +71,40 @@ type _MasteredNode struct { ...@@ -75,19 +71,40 @@ type _MasteredNode struct {
opReady chan struct{} // reinitialized each time state becomes non-operational opReady chan struct{} // reinitialized each time state becomes non-operational
flags _MasteredNodeFlags
rxm chan _RxM // talkMaster -> RecvM1
/*
// TODO -> RecvM1 instead // TODO -> RecvM1 instead
// OnNotify, if !nil, is called when master notifies this node with a message. // OnNotify, if !nil, is called when master notifies this node with a message.
// XXX not called for δstate // XXX not called for δstate
OnNotify func(msg proto.Msg) error OnNotify func(msg proto.Msg) error // XXX kill
// OnNotifyδPartTab, if !nil, is called when master notifies this node // OnNotifyδPartTab, if !nil, is called when master notifies this node
// with a change to partition table. (used by S to persist partTab) // with a change to partition table. (used by S to persist partTab)
OnNotifyδPartTab func(pt *xneo.PartitionTable) error OnNotifyδPartTab func(pt *xneo.PartitionTable) error // XXX kill
*/
}
// _RxM represents a request or event received from master.
type _RxM struct {
Req neonet.Request
Err error // event*
} }
type _MasteredNodeFlags int
const (
// δPartTabPassThrough tells RecvM1 not to filter out messages related
// to partition table changes. When RecvM1 receives such messages there
// are already processed internally to update .state.PartTab correspondingly.
//
// Storage uses this mode to receive δPartTab notifications to know
// when to persist it.
δPartTabPassThrough _MasteredNodeFlags = iota
)
// XXX doc
func newMasteredNode(typ proto.NodeType, clusterName string, net xnet.Networker, masterAddr string) *_MasteredNode { func newMasteredNode(typ proto.NodeType, clusterName string, net xnet.Networker, masterAddr string) *_MasteredNode {
node := &_MasteredNode{ node := &_MasteredNode{
myInfo: proto.NodeInfo{ myInfo: proto.NodeInfo{
...@@ -106,6 +123,8 @@ func newMasteredNode(typ proto.NodeType, clusterName string, net xnet.Networker, ...@@ -106,6 +123,8 @@ func newMasteredNode(typ proto.NodeType, clusterName string, net xnet.Networker,
PartTab: &xneo.PartitionTable{}, PartTab: &xneo.PartitionTable{},
Code: -1, // invalid Code: -1, // invalid
}, },
rxm: make(chan _RxM),
} }
return node return node
...@@ -188,12 +207,19 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error { ...@@ -188,12 +207,19 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
err = node.updateNodeTab(ctx, &mnt) err = node.updateNodeTab(ctx, &mnt)
node.state.PartTab = pt node.state.PartTab = pt
// XXX update "operational" // XXX update "operational"
/*
// update .operational + notify those who was waiting for it
opready := c.updateOperational()
c.node.StateMu.Unlock()
opready()
*/
node.stateMu.Unlock() node.stateMu.Unlock()
if err != nil { // might be command to shutdown if err != nil { // might be command to shutdown
return err return err
} }
// XXX update .masterLink + notify waiters // XXX update .masterLink + notify waiters
// XXX rxm <- eventReconnect
// receive and handle notifications from master // receive and handle notifications from master
defer task.Running(&ctx, "rx")(&err) defer task.Running(&ctx, "rx")(&err)
...@@ -202,8 +228,7 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error { ...@@ -202,8 +228,7 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
if err != nil { if err != nil {
return err return err
} }
err = node.recvMaster1(ctx, req.Msg) err = node.recvMaster1(ctx, req) // req ownership is passed in
req.Close()
if err != nil { if err != nil {
return err return err
} }
...@@ -213,10 +238,10 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error { ...@@ -213,10 +238,10 @@ func (node *_MasteredNode) talkMaster1(ctx context.Context) error {
} }
// recvMaster1 handles 1 message from master. // recvMaster1 handles 1 message from master.
func (node *_MasteredNode) recvMaster1(ctx context.Context, msg proto.Msg) (err error) { func (node *_MasteredNode) recvMaster1(ctx context.Context, req neonet.Request) (err error) {
// messages for state changes are handled internally // messages for state changes are handled internally
δstate := true δstate := true
switch msg.(type) { switch req.Msg.(type) {
default: δstate = false default: δstate = false
case *proto.SendPartitionTable: // whole partTab case *proto.SendPartitionTable: // whole partTab
case *proto.NotifyPartitionChanges: // δ(partTab) case *proto.NotifyPartitionChanges: // δ(partTab)
...@@ -225,25 +250,46 @@ func (node *_MasteredNode) recvMaster1(ctx context.Context, msg proto.Msg) (err ...@@ -225,25 +250,46 @@ func (node *_MasteredNode) recvMaster1(ctx context.Context, msg proto.Msg) (err
} }
if δstate { if δstate {
err = node.recvδstate(ctx, msg) δpt, err := node.recvδstate(ctx, req.Msg)
} else { toRecvM1 := false
// XXX other messages? -> particular user if δpt && (node.flags & δPartTabPassThrough != 0) {
// XXX rework protocol so that M sends δstate on dedicated connection and other messages on other connections? toRecvM1 = true
if node.OnNotify != nil {
err = node.OnNotify(msg)
} else {
err = fmt.Errorf("unexpected message: %T", msg)
} }
if !toRecvM1 {
req.Close()
return err
}
}
// pass request -> RecvM1
// NOTE req ownership is passed into RecvM1 caller who becomes responsible to close it
select {
case <-ctx.Done():
req.Close()
return ctx.Err()
case node.rxm <- _RxM{Req: req}:
// ok
} }
return err return nil
}
// RecvM1 receives request from master filtered through δstate handler.
//
// XXX eventReconnect
// XXX link down ?
func (node *_MasteredNode) RecvM1() (neonet.Request, error) {
rx := <-node.rxm
// XXX close -> EOF?
return rx.Req, rx.Err
} }
//trace:event traceClusterStateChanged(cs *proto.ClusterState) //trace:event traceClusterStateChanged(cs *proto.ClusterState)
// recvδstate handles reception of δstate messages. // recvδstate handles reception of δstate messages.
func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err error) { func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (δpt bool, err error) {
δpt := false δpt = false
node.stateMu.Lock() node.stateMu.Lock()
// XXX defer unlock ? // XXX defer unlock ?
...@@ -255,6 +301,7 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err ...@@ -255,6 +301,7 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
// <- whole partTab // <- whole partTab
case *proto.SendPartitionTable: case *proto.SendPartitionTable:
δpt = true
pt := xneo.PartTabFromDump(msg.PTid, msg.RowList) // FIXME handle msg.NumReplicas pt := xneo.PartTabFromDump(msg.PTid, msg.RowList) // FIXME handle msg.NumReplicas
// XXX logging under lock ok? // XXX logging under lock ok?
log.Infof(ctx, "parttab update: %s", pt) log.Infof(ctx, "parttab update: %s", pt)
...@@ -262,11 +309,12 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err ...@@ -262,11 +309,12 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
// <- δ(partTab) // <- δ(partTab)
case *proto.NotifyPartitionChanges: case *proto.NotifyPartitionChanges:
δpt = true
panic("TODO δ(partTab)") panic("TODO δ(partTab)")
// <- δ(nodeTab) // <- δ(nodeTab)
case *proto.NotifyNodeInformation: case *proto.NotifyNodeInformation:
node.updateNodeTab(ctx, msg) err = node.updateNodeTab(ctx, msg) // XXX recheck return (might be command to shutdown)
case *proto.NotifyClusterState: case *proto.NotifyClusterState:
log.Infof(ctx, "state update: %s", msg.State) log.Infof(ctx, "state update: %s", msg.State)
...@@ -274,18 +322,20 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err ...@@ -274,18 +322,20 @@ func (node *_MasteredNode) recvδstate(ctx context.Context, msg proto.Msg) (err
traceClusterStateChanged(&node.state.Code) traceClusterStateChanged(&node.state.Code)
} }
/* XXX kill
if δpt && node.OnNotifyδPartTab != nil { if δpt && node.OnNotifyδPartTab != nil {
err = node.OnNotifyδPartTab(node.state.PartTab) err = node.OnNotifyδPartTab(node.state.PartTab)
// XXX err -> return without notify? // XXX err -> return without notify?
panic("TODO") panic("TODO")
} }
*/
// update .operational + notify those who was waiting for it // update .operational + notify those who was waiting for it
opready := node.updateOperational() opready := node.updateOperational()
node.stateMu.Unlock() node.stateMu.Unlock()
opready() opready()
return nil return δpt, err
} }
// updateOperational updates .operational from current state. // updateOperational updates .operational from current state.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment