Commit e6fbafe8 authored by Kirill Smelkov's avatar Kirill Smelkov

.

parent 620df4c7
...@@ -272,7 +272,7 @@ func (c *Client) recvMaster(ctx context.Context, mlink *neo.NodeLink) error { ...@@ -272,7 +272,7 @@ func (c *Client) recvMaster(ctx context.Context, mlink *neo.NodeLink) error {
// XXX msg.IdTimestamp ? // XXX msg.IdTimestamp ?
for _, nodeInfo := range msg.NodeList { for _, nodeInfo := range msg.NodeList {
log.Infof(ctx, "rx node update: %v", nodeInfo) log.Infof(ctx, "rx node update: %v", nodeInfo)
c.node.NodeTab.Update(nodeInfo, /*XXX conn should not be here*/nil) c.node.NodeTab.Update(nodeInfo)
} }
// FIXME logging under lock // FIXME logging under lock
...@@ -399,7 +399,7 @@ func (c *Client) Load(ctx context.Context, xid zodb.Xid) (data []byte, serial zo ...@@ -399,7 +399,7 @@ func (c *Client) Load(ctx context.Context, xid zodb.Xid) (data []byte, serial zo
// retry from the beginning if all are found to fail? // retry from the beginning if all are found to fail?
stor := storv[rand.Intn(len(storv))] stor := storv[rand.Intn(len(storv))]
slink, err := stor.Link() slink, err := stor.Dial(ctx)
if err != nil { if err != nil {
return nil, 0, err // XXX err ctx return nil, 0, err // XXX err ctx
} }
......
This diff is collapsed.
...@@ -176,7 +176,7 @@ func (m *Master) Run(ctx context.Context) (err error) { ...@@ -176,7 +176,7 @@ func (m *Master) Run(ctx context.Context) (err error) {
} }
// update nodeTab with self // update nodeTab with self
m.node.NodeTab.Update(m.node.MyInfo, nil /*XXX ok? we are not connecting to self*/) m.node.NodeTab.Update(m.node.MyInfo)
// accept incoming connections and pass them to main driver // accept incoming connections and pass them to main driver
...@@ -368,13 +368,8 @@ loop: ...@@ -368,13 +368,8 @@ loop:
log.Error(ctx, r.err) log.Error(ctx, r.err)
if !xcontext.Canceled(errors.Cause(r.err)) { if !xcontext.Canceled(errors.Cause(r.err)) {
// XXX dup wrt vvv (loop2) r.stor.CloseLink(ctx)
log.Infof(ctx, "%v: closing link", r.stor.Link) r.stor.SetState(neo.DOWN)
// close stor link / update .nodeTab
lclose(ctx, r.stor.Link)
// r.stor.SetState(neo.DOWN)
m.node.NodeTab.SetNodeState(r.stor, neo.DOWN)
} }
} else { } else {
...@@ -457,12 +452,8 @@ loop2: ...@@ -457,12 +452,8 @@ loop2:
log.Error(ctx, r.err) log.Error(ctx, r.err)
if !xcontext.Canceled(errors.Cause(r.err)) { if !xcontext.Canceled(errors.Cause(r.err)) {
// XXX -> r.stor.CloseLink(ctx) ? r.stor.CloseLink(ctx)
log.Infof(ctx, "%v: closing link", r.stor.Link) r.stor.SetState(neo.DOWN)
// close stor link / update .nodeTab
lclose(ctx, r.stor.Link)
m.node.NodeTab.SetNodeState(r.stor, neo.DOWN)
} }
case <-done: case <-done:
...@@ -480,7 +471,7 @@ loop2: ...@@ -480,7 +471,7 @@ loop2:
// XXX recheck logic is ok for when starting existing cluster // XXX recheck logic is ok for when starting existing cluster
for _, stor := range m.node.NodeTab.StorageList() { for _, stor := range m.node.NodeTab.StorageList() {
if stor.State == neo.PENDING { if stor.State == neo.PENDING {
m.node.NodeTab.SetNodeState(stor, neo.RUNNING) stor.SetState(neo.RUNNING)
} }
} }
...@@ -513,28 +504,19 @@ func storCtlRecovery(ctx context.Context, stor *neo.Node, res chan storRecovery) ...@@ -513,28 +504,19 @@ func storCtlRecovery(ctx context.Context, stor *neo.Node, res chan storRecovery)
// on error provide feedback to storRecovery chan // on error provide feedback to storRecovery chan
res <- storRecovery{stor: stor, err: err} res <- storRecovery{stor: stor, err: err}
}() }()
defer task.Runningf(&ctx, "%s: stor recovery", stor.Link.RemoteAddr())(&err) slink := stor.Link()
defer task.Runningf(&ctx, "%s: stor recovery", slink.RemoteAddr())(&err)
conn := stor.Conn
// conn, err := stor.Link.NewConn()
// if err != nil {
// return
// }
// defer func() {
// err2 := conn.Close()
// err = xerr.First(err, err2)
// }()
// XXX cancel on ctx // XXX cancel on ctx
recovery := neo.AnswerRecovery{} recovery := neo.AnswerRecovery{}
err = conn.Ask(&neo.Recovery{}, &recovery) err = slink.Ask1(&neo.Recovery{}, &recovery)
if err != nil { if err != nil {
return return
} }
resp := neo.AnswerPartitionTable{} resp := neo.AnswerPartitionTable{}
err = conn.Ask(&neo.AskPartitionTable{}, &resp) err = slink.Ask1(&neo.AskPartitionTable{}, &resp)
if err != nil { if err != nil {
return return
} }
...@@ -621,7 +603,7 @@ loop: ...@@ -621,7 +603,7 @@ loop:
}() }()
case n := <-m.nodeLeave: case n := <-m.nodeLeave:
m.node.NodeTab.SetNodeState(n.node, neo.DOWN) n.node.SetState(neo.DOWN)
// if cluster became non-operational - we cancel verification // if cluster became non-operational - we cancel verification
if !m.node.PartTab.OperationalWith(m.node.NodeTab) { if !m.node.PartTab.OperationalWith(m.node.NodeTab) {
...@@ -643,12 +625,8 @@ loop: ...@@ -643,12 +625,8 @@ loop:
log.Error(ctx, v.err) log.Error(ctx, v.err)
if !xcontext.Canceled(errors.Cause(v.err)) { if !xcontext.Canceled(errors.Cause(v.err)) {
// XXX dup wrt recovery ^^^ v.stor.CloseLink(ctx)
log.Infof(ctx, "%s: closing link", v.stor.Link) v.stor.SetState(neo.DOWN)
// mark storage as non-working in nodeTab
lclose(ctx, v.stor.Link)
m.node.NodeTab.SetNodeState(v.stor, neo.DOWN)
} }
// check partTab is still operational // check partTab is still operational
...@@ -696,11 +674,8 @@ loop2: ...@@ -696,11 +674,8 @@ loop2:
log.Error(ctx, v.err) log.Error(ctx, v.err)
if !xcontext.Canceled(errors.Cause(v.err)) { if !xcontext.Canceled(errors.Cause(v.err)) {
log.Infof(ctx, "%v: closing link", v.stor.Link) v.stor.CloseLink(ctx)
v.stor.SetState(neo.DOWN)
// close stor link / update .nodeTab
lclose(ctx, v.stor.Link)
m.node.NodeTab.SetNodeState(v.stor, neo.DOWN)
} }
case <-done: case <-done:
...@@ -730,12 +705,11 @@ func storCtlVerify(ctx context.Context, stor *neo.Node, pt *neo.PartitionTable, ...@@ -730,12 +705,11 @@ func storCtlVerify(ctx context.Context, stor *neo.Node, pt *neo.PartitionTable,
res <- storVerify{stor: stor, err: err} res <- storVerify{stor: stor, err: err}
} }
}() }()
defer task.Runningf(&ctx, "%s: stor verify", stor.Link)(&err) slink := stor.Link()
defer task.Runningf(&ctx, "%s: stor verify", slink)(&err)
conn := stor.Conn
// send just recovered parttab so storage saves it // send just recovered parttab so storage saves it
err = conn.Send(&neo.NotifyPartitionTable{ err = slink.Send1(&neo.NotifyPartitionTable{
PTid: pt.PTid, PTid: pt.PTid,
RowList: pt.Dump(), RowList: pt.Dump(),
}) })
...@@ -744,7 +718,7 @@ func storCtlVerify(ctx context.Context, stor *neo.Node, pt *neo.PartitionTable, ...@@ -744,7 +718,7 @@ func storCtlVerify(ctx context.Context, stor *neo.Node, pt *neo.PartitionTable,
} }
locked := neo.AnswerLockedTransactions{} locked := neo.AnswerLockedTransactions{}
err = conn.Ask(&neo.LockedTransactions{}, &locked) err = slink.Ask1(&neo.LockedTransactions{}, &locked)
if err != nil { if err != nil {
return return
} }
...@@ -756,7 +730,7 @@ func storCtlVerify(ctx context.Context, stor *neo.Node, pt *neo.PartitionTable, ...@@ -756,7 +730,7 @@ func storCtlVerify(ctx context.Context, stor *neo.Node, pt *neo.PartitionTable,
} }
last := neo.AnswerLastIDs{} last := neo.AnswerLastIDs{}
err = conn.Ask(&neo.LastIDs{}, &last) err = slink.Ask1(&neo.LastIDs{}, &last)
if err != nil { if err != nil {
return return
} }
...@@ -850,7 +824,7 @@ loop: ...@@ -850,7 +824,7 @@ loop:
// XXX who sends here? // XXX who sends here?
case n := <-m.nodeLeave: case n := <-m.nodeLeave:
m.node.NodeTab.SetNodeState(n.node, neo.DOWN) n.node.SetState(neo.DOWN)
// if cluster became non-operational - cancel service // if cluster became non-operational - cancel service
if !m.node.PartTab.OperationalWith(m.node.NodeTab) { if !m.node.PartTab.OperationalWith(m.node.NodeTab) {
...@@ -884,15 +858,14 @@ loop: ...@@ -884,15 +858,14 @@ loop:
// storCtlService drives a storage node during cluster service state // storCtlService drives a storage node during cluster service state
func storCtlService(ctx context.Context, stor *neo.Node) (err error) { func storCtlService(ctx context.Context, stor *neo.Node) (err error) {
defer task.Runningf(&ctx, "%s: stor service", stor.Link.RemoteAddr())(&err) slink := stor.Link()
defer task.Runningf(&ctx, "%s: stor service", slink.RemoteAddr())(&err)
conn := stor.Conn
// XXX send nodeTab ? // XXX send nodeTab ?
// XXX send clusterInformation ? // XXX send clusterInformation ?
ready := neo.NotifyReady{} ready := neo.NotifyReady{}
err = conn.Ask(&neo.StartOperation{Backup: false}, &ready) err = slink.Ask1(&neo.StartOperation{Backup: false}, &ready)
if err != nil { if err != nil {
return err return err
} }
...@@ -915,12 +888,11 @@ func storCtlService(ctx context.Context, stor *neo.Node) (err error) { ...@@ -915,12 +888,11 @@ func storCtlService(ctx context.Context, stor *neo.Node) (err error) {
// serveClient serves incoming client link // serveClient serves incoming client link
func (m *Master) serveClient(ctx context.Context, cli *neo.Node) (err error) { func (m *Master) serveClient(ctx context.Context, cli *neo.Node) (err error) {
defer task.Runningf(&ctx, "%s: client service", cli.Link.RemoteAddr())(&err) clink := cli.Link()
defer task.Runningf(&ctx, "%s: client service", clink.RemoteAddr())(&err)
wg, ctx := errgroup.WithContext(ctx) wg, ctx := errgroup.WithContext(ctx)
defer xio.CloseWhenDone(ctx, clink)() // XXX -> cli.CloseLink?
clink := cli.Link
defer xio.CloseWhenDone(ctx, clink)()
// M -> C notifications about cluster state // M -> C notifications about cluster state
wg.Go(func() error { wg.Go(func() error {
...@@ -1128,7 +1100,8 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp ...@@ -1128,7 +1100,8 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp
IdTimestamp: m.monotime(), IdTimestamp: m.monotime(),
} }
node = m.node.NodeTab.Update(nodeInfo, n.conn) // NOTE this notifies all nodeTab subscribers node = m.node.NodeTab.Update(nodeInfo) // NOTE this notifies all nodeTab subscribers
node.SetLink(n.conn.Link())
return node, accept return node, accept
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment