Commit e8ac7104 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent e17082f1
...@@ -68,7 +68,6 @@ type Master struct { ...@@ -68,7 +68,6 @@ type Master struct {
type nodeCome struct { type nodeCome struct {
conn *neo.Conn conn *neo.Conn
idReq *neo.RequestIdentification // we received this identification request idReq *neo.RequestIdentification // we received this identification request
idResp chan neo.Msg // what we reply (AcceptIdentification | Error) XXX kill
} }
// event: node disconnects // event: node disconnects
...@@ -168,7 +167,7 @@ func (m *Master) Run(ctx context.Context) error { ...@@ -168,7 +167,7 @@ func (m *Master) Run(ctx context.Context) error {
} }
select { select {
case m.nodeCome <- nodeCome{conn, idReq, nil/*XXX kill*/}: case m.nodeCome <- nodeCome{conn, idReq}:
// ok // ok
case <-serveCtx.Done(): case <-serveCtx.Done():
...@@ -256,29 +255,52 @@ func (m *Master) recovery(ctx context.Context) (err error) { ...@@ -256,29 +255,52 @@ func (m *Master) recovery(ctx context.Context) (err error) {
defer rcancel() defer rcancel()
recovery := make(chan storRecovery) recovery := make(chan storRecovery)
inprogress := 0 inprogress := sync.WaitGroup{}
// start recovery on all storages we are currently in touch with // start recovery on all storages we are currently in touch with
for _, stor := range m.nodeTab.StorageList() { for _, stor := range m.nodeTab.StorageList() {
if stor.NodeState > neo.DOWN { // XXX state cmp ok ? XXX or stor.Link != nil ? if stor.NodeState > neo.DOWN { // XXX state cmp ok ? XXX or stor.Link != nil ?
inprogress++ inprogress.Add(1)
go storCtlRecovery(rctx, stor.Link, recovery) go func() {
defer inprogress.Done()
storCtlRecovery(rctx, stor.Link, recovery)
}()
} }
} }
loop: loop:
for { for {
select { select {
// XXX this is m.Accept() and semantic must be semantic of net.Accept() ! // new connection comes in
case n := <-m.nodeCome: case n := <-m.nodeCome:
node, ok := m.accept(n, /* XXX only accept storages -> PENDING */) node, resp, ok := m.accept(n, /* XXX only accept storages -> PENDING */)
if !ok {
break // if new storage arrived - start recovery on it too
inprogress.Add(1)
go func() {
defer inprogress.Done()
// send identification response
// XXX cancel on ctx
err := n.conn.Send(resp)
if err != nil {
// XXX log
}
n.conn.Close() // XXX err
if err != nil || !ok {
// XXX must let recovery know it failed to update nodeTab
if ok {
recovery <- storRecovery{err: err}
}
c.conn.Link().Close() // XXX err
return
} }
// new storage arrived - start recovery on it too err := welcome(n.conn, resp)
inprogress++
go storCtlRecovery(rctx, node.Link, recovery) // start recovery
storCtlRecovery(rctx, node.Link, recovery)
}()
case n := <-m.nodeLeave: case n := <-m.nodeLeave:
m.nodeTab.UpdateLinkDown(n.link) m.nodeTab.UpdateLinkDown(n.link)
...@@ -287,12 +309,14 @@ loop: ...@@ -287,12 +309,14 @@ loop:
// a storage node came through recovery - let's see whether // a storage node came through recovery - let's see whether
// ptid ↑ and if so we should take partition table from there // ptid ↑ and if so we should take partition table from there
case r := <-recovery: case r := <-recovery:
inprogress-- // inprogress--
if r.err != nil { if r.err != nil {
// XXX err ctx? // XXX err ctx?
// XXX log here or in producer? // XXX log here or in producer?
fmt.Printf("master: %v\n", r.err) fmt.Printf("master: %v\n", r.err)
// XXX close stor link / update .nodeTab ?
break break
} }
...@@ -337,9 +361,22 @@ loop: ...@@ -337,9 +361,22 @@ loop:
} }
} }
// consume left recovery responses (which should come without delay since it was cancelled) // // consume left recovery responses (which should come without delay since it was cancelled)
for ; inprogress > 0; inprogress-- { // for ; inprogress > 0; inprogress-- {
<-recovery // <-recovery
// }
done := make(chan struct{})
go func() {
inprogress.Wait()
close(done)
}()
for {
select {
case <-recovery:
case <-done:
return err
} }
return err return err
...@@ -376,7 +413,7 @@ func storCtlRecovery(ctx context.Context, link *neo.NodeLink, res chan storRecov ...@@ -376,7 +413,7 @@ func storCtlRecovery(ctx context.Context, link *neo.NodeLink, res chan storRecov
}() }()
defer xerr.Contextf(&err, "%s: stor recovery", link) defer xerr.Contextf(&err, "%s: stor recovery", link)
conn, err := link.NewConn() // FIXME bad conn, err := link.NewConn() // FIXME bad -> not bad
if err != nil { if err != nil {
return return
} }
...@@ -514,6 +551,8 @@ loop: ...@@ -514,6 +551,8 @@ loop:
} }
} }
// XXX if m.partTab.OperationalWith(&.nodeTab, RUNNING) -> break (ok)
case ech := <-m.ctlStart: case ech := <-m.ctlStart:
ech <- nil // we are already starting ech <- nil // we are already starting
...@@ -541,7 +580,7 @@ loop: ...@@ -541,7 +580,7 @@ loop:
type storVerify struct { type storVerify struct {
lastOid zodb.Oid lastOid zodb.Oid
lastTid zodb.Tid lastTid zodb.Tid
link *neo.NodeLink link *neo.NodeLink // XXX -> Node
err error err error
} }
...@@ -561,6 +600,8 @@ func storCtlVerify(ctx context.Context, link *neo.NodeLink, res chan storVerify) ...@@ -561,6 +600,8 @@ func storCtlVerify(ctx context.Context, link *neo.NodeLink, res chan storVerify)
// FIXME stub // FIXME stub
conn, _ := link.NewConn() conn, _ := link.NewConn()
// XXX NotifyPT (so storages save locally recovered PT)
locked := neo.AnswerLockedTransactions{} locked := neo.AnswerLockedTransactions{}
err = conn.Ask(&neo.LockedTransactions{}, &locked) err = conn.Ask(&neo.LockedTransactions{}, &locked)
if err != nil { if err != nil {
...@@ -648,16 +689,18 @@ loop: ...@@ -648,16 +689,18 @@ loop:
return err return err
} }
// accept processes identification request of just connected node and either accepts or declines it // accept processes identification request of just connected node and either accepts or declines it.
// if node identification is accepted nodeTab is updated and corresponding node entry is returned // if node identification is accepted nodeTab is updated and corresponding node entry is returned
func (m *Master) accept(n nodeCome) (node *neo.Node, ok bool) { // XXX no need for ok
func (m *Master) accept(n nodeCome) (node *neo.Node, resp neo.Msg, ok bool) {
// TODO log node accept/rejected
// XXX also verify ? : // XXX also verify ? :
// - NodeType valid // - NodeType valid
// - IdTimestamp ? // - IdTimestamp ?
if n.idReq.ClusterName != m.node.ClusterName { if n.idReq.ClusterName != m.node.ClusterName {
n.idResp <- &neo.Error{neo.PROTOCOL_ERROR, "cluster name mismatch"} // XXX return nil, &neo.Error{neo.PROTOCOL_ERROR, "cluster name mismatch"}, false
return nil, false
} }
nodeType := n.idReq.NodeType nodeType := n.idReq.NodeType
...@@ -672,20 +715,19 @@ func (m *Master) accept(n nodeCome) (node *neo.Node, ok bool) { ...@@ -672,20 +715,19 @@ func (m *Master) accept(n nodeCome) (node *neo.Node, ok bool) {
if node != nil { if node != nil {
// reject - uuid is already occupied by someone else // reject - uuid is already occupied by someone else
// XXX check also for down state - it could be the same node reconnecting // XXX check also for down state - it could be the same node reconnecting
n.idResp <- &neo.Error{neo.PROTOCOL_ERROR, "uuid %v already used by another node"} // XXX return nil, &neo.Error{neo.PROTOCOL_ERROR, "uuid %v already used by another node" /*XXX*/}, false
return nil, false
} }
// XXX accept only certain kind of nodes depending on .clusterState, e.g. // XXX accept only certain kind of nodes depending on .clusterState, e.g.
switch nodeType { switch nodeType {
case neo.CLIENT: case neo.CLIENT:
n.idResp <- &neo.Error{neo.NOT_READY, "cluster not operational"} return nil, &neo.Error{neo.NOT_READY, "cluster not operational"}
// XXX ... // XXX ...
} }
n.idResp <- &neo.AcceptIdentification{ accept := &neo.AcceptIdentification{
NodeType: neo.MASTER, NodeType: neo.MASTER,
MyNodeUUID: m.node.MyInfo.NodeUUID, MyNodeUUID: m.node.MyInfo.NodeUUID,
NumPartitions: 1, // FIXME hardcoded NumPartitions: 1, // FIXME hardcoded
...@@ -713,7 +755,7 @@ func (m *Master) accept(n nodeCome) (node *neo.Node, ok bool) { ...@@ -713,7 +755,7 @@ func (m *Master) accept(n nodeCome) (node *neo.Node, ok bool) {
} }
node = m.nodeTab.Update(nodeInfo, n.conn.Link()) // NOTE this notifies all nodeTab subscribers node = m.nodeTab.Update(nodeInfo, n.conn.Link()) // NOTE this notifies all nodeTab subscribers
return node, true return node, accept, true
} }
// allocUUID allocates new node uuid for a node of kind nodeType // allocUUID allocates new node uuid for a node of kind nodeType
...@@ -734,6 +776,7 @@ func (m *Master) allocUUID(nodeType neo.NodeType) neo.NodeUUID { ...@@ -734,6 +776,7 @@ func (m *Master) allocUUID(nodeType neo.NodeType) neo.NodeUUID {
panic("all uuid allocated ???") // XXX more robust ? panic("all uuid allocated ???") // XXX more robust ?
} }
/* XXX goes away
// ServeLink serves incoming node-node link connection // ServeLink serves incoming node-node link connection
// XXX +error return? // XXX +error return?
func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) { func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) {
...@@ -781,9 +824,9 @@ func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) { ...@@ -781,9 +824,9 @@ func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) {
} }
// convey identification request to master and we are done here - the master takes on the torch // convey identification request to master and we are done here - the master takes on the torch
m.nodeCome <- nodeCome{conn, idReq, nil/*XXX kill*/} m.nodeCome <- nodeCome{conn, idReq, nilXXX kill}
/* //////////////////////////////////////////////////
// if master accepted this node - don't forget to notify when it leaves // if master accepted this node - don't forget to notify when it leaves
_, rejected := idResp.(error) _, rejected := idResp.(error)
if !rejected { if !rejected {
...@@ -877,8 +920,9 @@ func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) { ...@@ -877,8 +920,9 @@ func (m *Master) ServeLink(ctx context.Context, link *neo.NodeLink) {
// storage: // storage:
m.DriveStorage(ctx, link) m.DriveStorage(ctx, link)
*/ /////////////////
} }
*/
// ServeClient serves incoming connection on which peer identified itself as client // ServeClient serves incoming connection on which peer identified itself as client
// XXX +error return? // XXX +error return?
......
...@@ -79,6 +79,7 @@ func Serve(ctx context.Context, l *neo.Listener, srv Server) error { ...@@ -79,6 +79,7 @@ func Serve(ctx context.Context, l *neo.Listener, srv Server) error {
} }
*/ */
/*
// ---------------------------------------- // ----------------------------------------
// XXX goes away? (we need a func to make sure to recv RequestIdentification // XXX goes away? (we need a func to make sure to recv RequestIdentification
...@@ -128,3 +129,4 @@ func IdentifyPeer(link *neo.NodeLink, myNodeType neo.NodeType) (nodeInfo neo.Req ...@@ -128,3 +129,4 @@ func IdentifyPeer(link *neo.NodeLink, myNodeType neo.NodeType) (nodeInfo neo.Req
return req, nil return req, nil
} }
*/
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment