.

336fc1be · Kirill Smelkov · 043561f4 · 336fc1be
Commit 336fc1be authored Feb 11, 2021 by Kirill Smelkov
Hide whitespace changes
Inline Side-by-side

Showing with 95 additions and 54 deletions

go/neo/master.go go/neo/master.go +95 -54

No files found.
--- a/go/neo/master.go
+++ b/go/neo/master.go
@@ -20,11 +20,25 @@
 package neo
 // master node

-// XXX master organization
+// Master organization
+//
+// Master is organized as follows:
+//
+// - main task that controls whole logic of master working. It spawns
+//   subtasks to implement that logic and communicate with the subtask via channels. XXX
+//   Main is the only mutator of nodeTab, partTab, etc.
+//
+// - accept task that accepts incoming connections and hands them over to main
+//   via nodeComeq.
 //
-// - main goroutine that is the only mutator of nodeTab, partTab, etc
 // - per peer workers are spawned that interact with main via channels
-// - δnodeTab, δpartTab updates are proxied to peer by another per-peer goroutine
+// - δnodeTab, δpartTab updates are proxied to peer by another per-peer task
+//
+// XXX
+//
+// XXX
+// master manages node and partition tables and broadcast their updates
+// to all connected nodes.

 import (
 	"context"
@@ -52,17 +66,21 @@ import (
 type Master struct {
 	node *xneo.Node

-	// main Runs under runCtx
+	// whole Runs runs under runCtx
 	runCtx context.Context

-	// master manages node and partition tables and broadcast their updates
-	// to all connected nodes. δnodeTab/δpartTab updates are proxied to
-	// a peer by per-peer goroutine reading from .notifyTab[peer.nid] channel.
-	notifyWG  sync.WaitGroup // XXX -> runWG ?
-//	notifyTab map[proto.NodeID]chan _ΔClusterState // XXX -> struct peerWG{.wg, .notifyq} ?
-	// XXX ^^^ -> peerTab ?  XXX make it part of .nodeTab through PeerNode.private?
-	// XXX ^^^ -> peerWorkTab ?
-	peerWorkTab map[proto.NodeID]*peerWork
+	// "global" workgroup under which main, accept and tasks, that should
+	// last for whole Run time, are spawned.
+	mainWG *xsync.WorkGroup
+
+	// main <- node come or go
+	nodeComeq    chan nodeCome	// main <- accept "node connected"
+	nodeLeaveq   chan nodeLeave	// main <- peerWG.wait "node (should be) disconnected"
+
+	// in addition to nodeTab (which keeps information about a node) tasks
+	// that are specific to a peer are organized around peerWorkTab[peer.nid].
+	peerWorkTab map[proto.NodeID]*_MasteredPeer
+

 	// last allocated oid & tid
 	// XXX how to start allocating oid from 0, not 1 ?
@@ -75,36 +93,38 @@ type Master struct {
 	ctlStop     chan chan struct{}	// request to stop  cluster
 	ctlShutdown chan chan error	// request to shutdown cluster XXX with ctx ?

-	// channels from workers directly serving peers to main driver
-	nodeComeq    chan nodeCome	// node connected	XXX -> acceptq?
-//	nodeLeaveq   chan nodeLeave	// node disconnected	XXX -> don't need ?
-
 	// so tests could override
 	monotime func() float64
 }

 // nodeCome represents "node connects" event.
+// XXX main <- accept
 type nodeCome struct {
 	req    *neonet.Request
 	idReq  *proto.RequestIdentification // we received this identification request
 }

-/*
-// nodeLeave represents "node disconnects" event.
+// nodeLeave represents "node (should be) disconnected" event.
 type nodeLeave struct {
-	node *neo.PeerNode
+	node *xneo.PeerNode
 }
-*/

-// peerWork represents context for all tasks related to one peer.
-type peerWork struct {
+// _MasteredPeer represents context for all tasks related to one peer driven by master.
+//
+// .notify
+// .wait (run under mainWG)
+type _MasteredPeer struct {
+	peer *xneo.PeerNode // XXX naming -> node ?
+
 	// all tasks are spawned under wg. If any task fails - whole wg is canceled.
 	wg *xsync.WorkGroup
+	// XXX +cancel
 	// snapshot of nodeTab/partTab/stateCode when peer was accepted by main.
 	state0 *xneo.ClusterStateSnapshot
-	// main sends δnodeTab/δpartTab/δstateCode to notifyq.
+	// main -> peerWG.notify  δnodeTab/δpartTab/δstateCode.
 	notifyq chan _ΔClusterState
 	// notifyqOverflow becomes ready if main detects that peer is to slow to consume updates
+	// XXX no need? (peer.notify is canceled via peerWork.cancel)
 	notifyqOverflow chan struct{}
 }

@@ -189,10 +209,12 @@ func (m *Master) setClusterState(state proto.ClusterState) {
 func (m *Master) Run(ctx context.Context, l xnet.Listener) (err error) {
 	ctx, cancel := context.WithCancel(ctx)
 	defer cancel() // so that .runCtx is canceled if we return due to an error
+		       // XXX ^^^ not needed - we first must wait for all spawned subtasks

 	addr := l.Addr()
 	defer task.Runningf(&ctx, "master(%v)", addr)(&err)
 	m.runCtx = ctx
+	m.mainWG = xsync.NewWorkGroup(m.runCtx)


 	// update our master & serving address in node
@@ -211,17 +233,12 @@ func (m *Master) Run(ctx context.Context, l xnet.Listener) (err error) {

 	// update nodeTab with self
 	m.updateNodeTab(ctx, m.node.MyInfo)
-//	m.node.State.NodeTab.Update(m.node.MyInfo)

 	// wrap listener with link / identificaton hello checker
 	lli := xneo.NewListener(neonet.NewLinkListener(l))

-	// accept incoming connections and pass them to main driver
-	wg := sync.WaitGroup{}
-	serveCtx, serveCancel := context.WithCancel(ctx)
-	wg.Add(1)
-	go func(ctx context.Context) (err error) {
-		defer wg.Done()
+	// accept: accept incoming connections and pass them to main driver
+	m.mainWG.Go(func(ctx context.Context) (err error) {
 		defer task.Running(&ctx, "accept")(&err)

 		// XXX dup in storage
@@ -262,21 +279,26 @@ func (m *Master) Run(ctx context.Context, l xnet.Listener) (err error) {
 				continue
 			}
 		}
-	}(serveCtx)
+	})
+
+	// main: main driving logic
+	m.mainWG.Go(m.main)

-	// main driving logic
-	err = m.runMain(ctx)
+	err = m.mainWG.Wait()
+	// change `... canceled` to just canceled?
+	// (e.g. `master: accept: canceled` or `master: main: canceled` -> `master: canceled`)
+	if ctx.Err() != nil {
+		err = ctx.Err()
+	}

-	serveCancel()
-	xio.LClose(ctx, lli)	// XXX here ok?
-	wg.Wait()
+	xio.LClose(ctx, lli)	// XXX here ok? (probbly not)

 	return err
 }

-// runMain is the process that implements main master cluster management logic: node tracking, cluster
+// main is the process that implements main master cluster management logic: node tracking, cluster
 // state updates, scheduling data movement between storage nodes, etc.
-func (m *Master) runMain(ctx context.Context) (err error) {
+func (m *Master) main(ctx context.Context) (err error) {
 	defer task.Running(&ctx, "main")(&err)

 	// NOTE Run's goroutine is the only mutator of nodeTab, partTab and other cluster state
@@ -1236,8 +1258,8 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *xneo.PeerNode,
 	node.SetLink(n.req.Link())

 	// make nodeTab/partTab snapshot to push to accepted node and subscribe it for updates
-	m.peerWorkTab[node.NID] = &peerWork{
-		wg:     xsync.NewWorkGroup(m.runCtx),
+	m.peerWorkTab[node.NID] = &_MasteredPeer{
+		wg:     xsync.NewWorkGroup(m.runCtx), // XXX wrong -> per peer ctx (derived from runCtx)
 		state0: m.node.State.Snapshot(),
 		// TODO change limiting by buffer size -> to limiting by time
 		//      (see updateNodeTab for details)
@@ -1250,7 +1272,8 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *xneo.PeerNode,

 // accept sends acceptance to just identified peer, sends nodeTab and partTab
 // and spawns task to proxy their updates to the peer. XXX
-func (m *Master) accept(peer *xneo.PeerNode, state0 *xneo.ClusterStateSnapshot, idReq *neonet.Request, idResp proto.Msg) error {
+func (m *Master) accept(p *_MasteredPeer, idReq *neonet.Request, idResp proto.Msg) error {
+	// XXX errctx?
 	err := idReq.Reply(idResp)
 	if err != nil {
 		return fmt.Errorf("send accept: %w", err)
@@ -1258,17 +1281,17 @@ func (m *Master) accept(peer *xneo.PeerNode, state0 *xneo.ClusterStateSnapshot,
 	// XXX idReq close?

 	// send initial state snapshot to accepted node
-	link := peer.Link() // XXX -> idReq.Link() instead?
+	link := p.peer.Link() // XXX -> idReq.Link() instead?

 	// nodeTab
-	err = link.Send1(&state0.NodeTab)
+	err = link.Send1(&p.state0.NodeTab)
 	if err != nil {
 		return fmt.Errorf("send nodeTab: %w", err)
 	}

 	// partTab (not to S until cluster is RUNNING)
 	if !(peer.Type == proto.STORAGE && state0.Code != proto.ClusterRunning) {
-		err = link.Send1(&state0.PartTab)
+		err = link.Send1(&p.state0.PartTab)
 		if err != nil {
 			return fmt.Errorf("send partTab: %w", err)
 		}
@@ -1276,27 +1299,31 @@ func (m *Master) accept(peer *xneo.PeerNode, state0 *xneo.ClusterStateSnapshot,

 	// XXX send clusterState too? (NEO/py does not send it)

-	var w *peerWork // XXX stub <- = .peerWorkTab[peer.NID] set from main
+	// spawn p.notify to proxy δnodeTab/δpartTab/δcluterState to peer
+	p.wg.Go(p.notify)
+	return nil
+}

-	// go proxy δstate ... XXX
-	// XXX under which wg? -> under per-peer wg
-	w.wg.Go(func(ctx context.Context) (err error) {
-		defer task.Runningf(&ctx, "send cluster updates")(&err)
+// notify proxies δnodeTab/δpeerTab/δClusterState update to the peer.
+func (p *_MasteredPeer) notify(ctx context.Context) (err error) {
+	defer task.Runningf(&ctx, "notify")(&err)

-		stateCode := state0.Code
+	stateCode := p.state0.Code

+	// XXX vvv right?
+	return xxcontext.WithCloseOnErrCancel(ctx, link, func() error {
 		for {
 			var δstate _ΔClusterState

 			select {
 			case <-ctx.Done():
-				return ctx.Err() // XXX signal to nodeLeaveq ?
+				return ctx.Err()

-			case <-w.notifyqOverflow:
-				// XXX err -> ?   XXX signal to nodeLeaveq ?
+			case <-p.notifyqOverflow:
+				// XXX err -> ?
 				return fmt.Errorf("detaching (peer is too slow to consume updates)")

-			case δstate = <-w.notifyq:	// XXX could be also closed?
+			case δstate = <-p.notifyq:	// XXX could be also closed?
 			}

 			var msg proto.Msg
@@ -1340,6 +1367,20 @@ func (m *Master) accept(peer *xneo.PeerNode, state0 *xneo.ClusterStateSnapshot,
 	return nil
 }

+// waitAll waits for all tasks related to peer to complete and then notifies
+// main that peer node should go. It is spawned under mainWG.
+// XXX naming -> wait?
+func (p *_MasteredPeer) waitAll(_ context.Context) error {
+	// don't take our ctx into account - it is ~ runCtx and should be
+	// parent of context under which per-peer tasks are spawned. This way
+	// if runCtx is canceled -> any per-peer ctx should be canceled too and
+	// wg.Wait should not block.
+	err := p.wg.Wait()
+
+	m.nodeLeaveq <- nodeLeave{p, err} // XXX detect if if main is already done
+	return nil // XXX or ctx.Err() ?
+}
+
 // allocNID allocates new node ID for a node of kind nodeType.
 // XXX it is bad idea for master to assign node ID to coming node
 // -> better nodes generate really unique UUID themselves and always show with them