Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
Kirill Smelkov
neo
Commits
e993689f
Commit
e993689f
authored
7 years ago
by
Kirill Smelkov
Browse files
Options
Download
Email Patches
Plain Diff
.
parent
7f7169ce
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
55 additions
and
57 deletions
+55
-57
go/neo/server/cluster_test.go
go/neo/server/cluster_test.go
+9
-9
go/neo/server/master.go
go/neo/server/master.go
+44
-39
go/neo/server/storage.go
go/neo/server/storage.go
+2
-9
No files found.
go/neo/server/cluster_test.go
View file @
e993689f
...
...
@@ -199,9 +199,9 @@ func TestMasterStorage(t *testing.T) {
}
// shortcut for nodetab change
node
:=
func
(
nt
*
neo
.
Node
Table
,
laddr
string
,
typ
neo
.
NodeType
,
num
int32
,
state
neo
.
NodeState
,
idtstamp
float64
)
*
traceNode
{
node
:=
func
(
x
*
neo
.
Node
Common
,
laddr
string
,
typ
neo
.
NodeType
,
num
int32
,
state
neo
.
NodeState
,
idtstamp
float64
)
*
traceNode
{
return
&
traceNode
{
NodeTab
:
unsafe
.
Pointer
(
nt
),
NodeTab
:
unsafe
.
Pointer
(
x
.
NodeTab
),
NodeInfo
:
neo
.
NodeInfo
{
Type
:
typ
,
Addr
:
xnaddr
(
laddr
),
...
...
@@ -232,8 +232,8 @@ func TestMasterStorage(t *testing.T) {
// M starts listening
tc
.
Expect
(
netlisten
(
"m:1"
))
tc
.
Expect
(
node
(
M
.
node
Tab
,
"m:1"
,
neo
.
MASTER
,
1
,
neo
.
RUNNING
,
0.0
))
tc
.
Expect
(
clusterState
(
&
M
.
c
lusterState
,
neo
.
ClusterRecovering
))
tc
.
Expect
(
node
(
&
M
.
node
,
"m:1"
,
neo
.
MASTER
,
1
,
neo
.
RUNNING
,
0.0
))
tc
.
Expect
(
clusterState
(
&
M
.
node
.
C
lusterState
,
neo
.
ClusterRecovering
))
// TODO create C; C tries connect to master - rejected ("not yet operational")
...
...
@@ -260,7 +260,7 @@ func TestMasterStorage(t *testing.T) {
IdTimestamp
:
0
,
}))
tc
.
Expect
(
node
(
M
.
node
Tab
,
"s:1"
,
neo
.
STORAGE
,
1
,
neo
.
PENDING
,
0.01
))
tc
.
Expect
(
node
(
&
M
.
node
,
"s:1"
,
neo
.
STORAGE
,
1
,
neo
.
PENDING
,
0.01
))
tc
.
Expect
(
conntx
(
"m:2"
,
"s:2"
,
1
,
&
neo
.
AcceptIdentification
{
NodeType
:
neo
.
MASTER
,
...
...
@@ -297,13 +297,13 @@ func TestMasterStorage(t *testing.T) {
exc
.
Raiseif
(
err
)
})
tc
.
Expect
(
node
(
M
.
node
Tab
,
"s:1"
,
neo
.
STORAGE
,
1
,
neo
.
RUNNING
,
0.01
))
tc
.
Expect
(
node
(
&
M
.
node
,
"s:1"
,
neo
.
STORAGE
,
1
,
neo
.
RUNNING
,
0.01
))
xwait
(
wg
)
// XXX M.partTab <- S1
// M starts verification
tc
.
Expect
(
clusterState
(
&
M
.
c
lusterState
,
neo
.
ClusterVerifying
))
tc
.
Expect
(
clusterState
(
&
M
.
node
.
C
lusterState
,
neo
.
ClusterVerifying
))
tc
.
Expect
(
conntx
(
"m:2"
,
"s:2"
,
1
,
&
neo
.
NotifyPartitionTable
{
PTid
:
1
,
...
...
@@ -334,7 +334,7 @@ func TestMasterStorage(t *testing.T) {
// TODO M.Stop() while verify
// verification ok; M start service
tc
.
Expect
(
clusterState
(
&
M
.
c
lusterState
,
neo
.
ClusterRunning
))
tc
.
Expect
(
clusterState
(
&
M
.
node
.
C
lusterState
,
neo
.
ClusterRunning
))
// TODO ^^^ should be sent to S
tc
.
Expect
(
conntx
(
"m:2"
,
"s:2"
,
1
,
&
neo
.
StartOperation
{
Backup
:
false
}))
...
...
@@ -358,7 +358,7 @@ func TestMasterStorage(t *testing.T) {
IdTimestamp
:
0
,
}))
tc
.
Expect
(
node
(
M
.
node
Tab
,
""
,
neo
.
CLIENT
,
1
,
neo
.
RUNNING
,
0.02
))
tc
.
Expect
(
node
(
&
M
.
node
,
""
,
neo
.
CLIENT
,
1
,
neo
.
RUNNING
,
0.02
))
tc
.
Expect
(
conntx
(
"m:3"
,
"c:1"
,
1
,
&
neo
.
AcceptIdentification
{
NodeType
:
neo
.
MASTER
,
...
...
This diff is collapsed.
Click to expand it.
go/neo/server/master.go
View file @
e993689f
...
...
@@ -54,12 +54,12 @@ type Master struct {
// to all nodes in cluster
// XXX dup from .node - kill here
//
/*
/*
stateMu sync.RWMutex // XXX recheck: needed ?
nodeTab *neo.NodeTable
partTab *neo.PartitionTable
clusterState neo.ClusterState
//
*/
*/
// channels controlling main driver
ctlStart
chan
chan
error
// request to start cluster
...
...
@@ -101,10 +101,12 @@ func NewMaster(clusterName, serveAddr string, net xnet.Networker) *Master {
ClusterName
:
clusterName
,
Net
:
net
,
MasterAddr
:
serveAddr
,
// XXX ok?
},
nodeTab
:
&
neo
.
NodeTable
{},
partTab
:
&
neo
.
PartitionTable
{},
NodeTab
:
&
neo
.
NodeTable
{},
PartTab
:
&
neo
.
PartitionTable
{},
ClusterState
:
-
1
,
// invalid
},
ctlStart
:
make
(
chan
chan
error
),
ctlStop
:
make
(
chan
chan
struct
{}),
...
...
@@ -116,7 +118,6 @@ func NewMaster(clusterName, serveAddr string, net xnet.Networker) *Master {
monotime
:
monotime
,
}
m
.
clusterState
=
-
1
// invalid
return
m
}
...
...
@@ -148,7 +149,7 @@ func (m *Master) Shutdown() error {
// setClusterState sets .clusterState and notifies subscribers
func
(
m
*
Master
)
setClusterState
(
state
neo
.
ClusterState
)
{
m
.
c
lusterState
.
Set
(
state
)
m
.
node
.
C
lusterState
.
Set
(
state
)
// TODO notify subscribers
}
...
...
@@ -181,7 +182,7 @@ func (m *Master) Run(ctx context.Context) (err error) {
}
// update nodeTab with self
m
.
nodeTab
.
Update
(
m
.
node
.
MyInfo
,
nil
/*XXX ok? we are not connecting to self*/
)
m
.
node
.
Node
Tab
.
Update
(
m
.
node
.
MyInfo
,
nil
/*XXX ok? we are not connecting to self*/
)
// accept incoming connections and pass them to main driver
...
...
@@ -318,7 +319,7 @@ func (m *Master) recovery(ctx context.Context) (err error) {
// start recovery on all storages we are currently in touch with
// XXX close links to clients
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
for
_
,
stor
:=
range
m
.
node
.
Node
Tab
.
StorageList
()
{
if
stor
.
State
>
neo
.
DOWN
{
// XXX state cmp ok ? XXX or stor.Link != nil ?
inprogress
++
wg
.
Add
(
1
)
...
...
@@ -374,25 +375,25 @@ loop:
// close stor link / update .nodeTab
lclose
(
ctx
,
r
.
stor
.
Link
)
m
.
nodeTab
.
SetNodeState
(
r
.
stor
,
neo
.
DOWN
)
m
.
node
.
Node
Tab
.
SetNodeState
(
r
.
stor
,
neo
.
DOWN
)
}
}
else
{
// we are interested in latest partTab
// NOTE during recovery no one must be subscribed to
// partTab so it is ok to simply change whole m.partTab
if
r
.
partTab
.
PTid
>
m
.
p
artTab
.
PTid
{
m
.
p
artTab
=
r
.
partTab
if
r
.
partTab
.
PTid
>
m
.
node
.
P
artTab
.
PTid
{
m
.
node
.
P
artTab
=
r
.
partTab
}
}
// update indicator whether cluster currently can be operational or not
var
ready
bool
if
m
.
p
artTab
.
PTid
==
0
{
if
m
.
node
.
P
artTab
.
PTid
==
0
{
// new cluster - allow startup if we have some storages passed
// recovery and there is no in-progress recovery running
nup
:=
0
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
for
_
,
stor
:=
range
m
.
node
.
Node
Tab
.
StorageList
()
{
if
stor
.
State
>
neo
.
DOWN
{
nup
++
}
...
...
@@ -400,7 +401,7 @@ loop:
ready
=
(
nup
>
0
&&
inprogress
==
0
)
}
else
{
ready
=
m
.
p
artTab
.
OperationalWith
(
m
.
nodeTab
)
// XXX + node state
ready
=
m
.
node
.
P
artTab
.
OperationalWith
(
m
.
node
.
Node
Tab
)
// XXX + node state
}
if
readyToStart
!=
ready
{
...
...
@@ -462,7 +463,7 @@ loop2:
// close stor link / update .nodeTab
lclose
(
ctx
,
r
.
stor
.
Link
)
m
.
nodeTab
.
SetNodeState
(
r
.
stor
,
neo
.
DOWN
)
m
.
node
.
Node
Tab
.
SetNodeState
(
r
.
stor
,
neo
.
DOWN
)
}
case
<-
done
:
...
...
@@ -478,24 +479,24 @@ loop2:
// S PENDING -> RUNNING
// XXX recheck logic is ok for when starting existing cluster
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
for
_
,
stor
:=
range
m
.
node
.
Node
Tab
.
StorageList
()
{
if
stor
.
State
==
neo
.
PENDING
{
m
.
nodeTab
.
SetNodeState
(
stor
,
neo
.
RUNNING
)
m
.
node
.
Node
Tab
.
SetNodeState
(
stor
,
neo
.
RUNNING
)
}
}
// if we are starting for new cluster - create partition table
if
m
.
p
artTab
.
PTid
==
0
{
if
m
.
node
.
P
artTab
.
PTid
==
0
{
log
.
Infof
(
ctx
,
"creating new partition table"
)
// XXX -> m.nodeTab.StorageList(State > DOWN)
storv
:=
[]
*
neo
.
Node
{}
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
for
_
,
stor
:=
range
m
.
node
.
Node
Tab
.
StorageList
()
{
if
stor
.
State
>
neo
.
DOWN
{
storv
=
append
(
storv
,
stor
)
}
}
m
.
p
artTab
=
neo
.
MakePartTab
(
1
/* XXX hardcoded */
,
storv
)
m
.
p
artTab
.
PTid
=
1
m
.
node
.
P
artTab
=
neo
.
MakePartTab
(
1
/* XXX hardcoded */
,
storv
)
m
.
node
.
P
artTab
.
PTid
=
1
}
return
nil
...
...
@@ -583,13 +584,13 @@ func (m *Master) verify(ctx context.Context) (err error) {
// XXX (= py), rationale=?
// start verification on all storages we are currently in touch with
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
for
_
,
stor
:=
range
m
.
node
.
Node
Tab
.
StorageList
()
{
if
stor
.
State
>
neo
.
DOWN
{
// XXX state cmp ok ? XXX or stor.Link != nil ?
inprogress
++
wg
.
Add
(
1
)
go
func
()
{
defer
wg
.
Done
()
storCtlVerify
(
ctx
,
stor
,
m
.
p
artTab
,
verify
)
storCtlVerify
(
ctx
,
stor
,
m
.
node
.
P
artTab
,
verify
)
}()
}
}
...
...
@@ -617,14 +618,14 @@ loop:
return
}
storCtlVerify
(
ctx
,
node
,
m
.
p
artTab
,
verify
)
storCtlVerify
(
ctx
,
node
,
m
.
node
.
P
artTab
,
verify
)
}()
case
n
:=
<-
m
.
nodeLeave
:
m
.
nodeTab
.
SetNodeState
(
n
.
node
,
neo
.
DOWN
)
m
.
node
.
Node
Tab
.
SetNodeState
(
n
.
node
,
neo
.
DOWN
)
// if cluster became non-operational - we cancel verification
if
!
m
.
p
artTab
.
OperationalWith
(
m
.
nodeTab
)
{
if
!
m
.
node
.
P
artTab
.
OperationalWith
(
m
.
node
.
Node
Tab
)
{
// XXX ok to instantly cancel? or better
// graceful shutdown in-flight verifications?
vcancel
()
...
...
@@ -648,12 +649,12 @@ loop:
// mark storage as non-working in nodeTab
lclose
(
ctx
,
v
.
stor
.
Link
)
m
.
nodeTab
.
SetNodeState
(
v
.
stor
,
neo
.
DOWN
)
m
.
node
.
Node
Tab
.
SetNodeState
(
v
.
stor
,
neo
.
DOWN
)
}
// check partTab is still operational
// if not -> cancel to go back to recovery
if
!
m
.
p
artTab
.
OperationalWith
(
m
.
nodeTab
)
{
if
!
m
.
node
.
P
artTab
.
OperationalWith
(
m
.
node
.
Node
Tab
)
{
vcancel
()
err
=
errClusterDegraded
break
loop
...
...
@@ -700,7 +701,7 @@ loop2:
// close stor link / update .nodeTab
lclose
(
ctx
,
v
.
stor
.
Link
)
m
.
nodeTab
.
SetNodeState
(
v
.
stor
,
neo
.
DOWN
)
m
.
node
.
Node
Tab
.
SetNodeState
(
v
.
stor
,
neo
.
DOWN
)
}
case
<-
done
:
...
...
@@ -798,7 +799,7 @@ func (m *Master) service(ctx context.Context) (err error) {
wg
:=
&
sync
.
WaitGroup
{}
// spawn per-storage service driver
for
_
,
stor
:=
range
m
.
nodeTab
.
StorageList
()
{
for
_
,
stor
:=
range
m
.
node
.
Node
Tab
.
StorageList
()
{
if
stor
.
State
==
neo
.
RUNNING
{
// XXX note PENDING - not adding to service; ok?
wg
.
Add
(
1
)
go
func
()
{
...
...
@@ -850,10 +851,10 @@ loop:
// XXX who sends here?
case
n
:=
<-
m
.
nodeLeave
:
m
.
nodeTab
.
SetNodeState
(
n
.
node
,
neo
.
DOWN
)
m
.
node
.
Node
Tab
.
SetNodeState
(
n
.
node
,
neo
.
DOWN
)
// if cluster became non-operational - cancel service
if
!
m
.
p
artTab
.
OperationalWith
(
m
.
nodeTab
)
{
if
!
m
.
node
.
P
artTab
.
OperationalWith
(
m
.
node
.
Node
Tab
)
{
err
=
errClusterDegraded
break
loop
}
...
...
@@ -942,8 +943,12 @@ func (m *Master) serveClient(ctx context.Context, cli *neo.Node) (err error) {
func
(
m
*
Master
)
serveClient1
(
ctx
context
.
Context
,
req
neo
.
Msg
)
(
resp
neo
.
Msg
)
{
switch
req
:=
req
.
(
type
)
{
case
*
neo
.
AskPartitionTable
:
// XXX
panic
(
"TODO"
)
// XXX lock
rpt
:=
&
neo
.
AnswerPartitionTable
{
PTid
:
m
.
node
.
PartTab
.
PTid
,
RowList
:
m
.
node
.
PartTab
.
Dump
(),
}
return
rpt
default
:
...
...
@@ -978,7 +983,7 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp
// XXX check uuid matches NodeType
node
=
m
.
nodeTab
.
Get
(
uuid
)
node
=
m
.
node
.
Node
Tab
.
Get
(
uuid
)
if
node
!=
nil
{
// reject - uuid is already occupied by someone else
// XXX check also for down state - it could be the same node reconnecting
...
...
@@ -989,7 +994,7 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp
// XXX ok to have this logic inside identify? (better provide from outside ?)
switch
nodeType
{
case
neo
.
CLIENT
:
if
m
.
c
lusterState
!=
neo
.
ClusterRunning
{
if
m
.
node
.
C
lusterState
!=
neo
.
ClusterRunning
{
return
&
neo
.
Error
{
neo
.
NOT_READY
,
"cluster not operational"
}
}
...
...
@@ -1039,7 +1044,7 @@ func (m *Master) identify(ctx context.Context, n nodeCome) (node *neo.Node, resp
IdTimestamp
:
m
.
monotime
(),
}
node
=
m
.
nodeTab
.
Update
(
nodeInfo
,
n
.
conn
)
// NOTE this notifies all nodeTab subscribers
node
=
m
.
node
.
Node
Tab
.
Update
(
nodeInfo
,
n
.
conn
)
// NOTE this notifies all nodeTab subscribers
return
node
,
accept
}
...
...
@@ -1080,7 +1085,7 @@ func (m *Master) accept(ctx context.Context, conn *neo.Conn, resp neo.Msg) error
func
(
m
*
Master
)
allocUUID
(
nodeType
neo
.
NodeType
)
neo
.
NodeUUID
{
for
num
:=
int32
(
1
);
num
<
1
<<
24
;
num
++
{
uuid
:=
neo
.
UUID
(
nodeType
,
num
)
if
m
.
nodeTab
.
Get
(
uuid
)
==
nil
{
if
m
.
node
.
Node
Tab
.
Get
(
uuid
)
==
nil
{
return
uuid
}
}
...
...
This diff is collapsed.
Click to expand it.
go/neo/server/storage.go
View file @
e993689f
...
...
@@ -314,6 +314,8 @@ func (stor *Storage) m1initialize(ctx context.Context, Mconn *neo.Conn) (err err
return
err
}
// XXX vvv move Send out of reply preparing logic
switch
msg
.
(
type
)
{
default
:
return
fmt
.
Errorf
(
"unexpected message: %T"
,
msg
)
...
...
@@ -554,11 +556,6 @@ func (stor *Storage) serveClient(ctx context.Context, conn *neo.Conn) {
// serveClient1 prepares response for 1 request from client
func
(
stor
*
Storage
)
serveClient1
(
ctx
context
.
Context
,
req
neo
.
Msg
)
(
resp
neo
.
Msg
)
{
// req, err := conn.Recv()
// if err != nil {
// return err // XXX log / err / send error before closing
// }
switch
req
:=
req
.
(
type
)
{
case
*
neo
.
GetObject
:
xid
:=
zodb
.
Xid
{
Oid
:
req
.
Oid
}
...
...
@@ -588,8 +585,6 @@ func (stor *Storage) serveClient1(ctx context.Context, req neo.Msg) (resp neo.Ms
// XXX .DataSerial
}
// req.Reply(reply) // XXX err
case
*
neo
.
LastTransaction
:
lastTid
,
err
:=
stor
.
zstor
.
LastTid
(
ctx
)
if
err
!=
nil
{
...
...
@@ -598,8 +593,6 @@ func (stor *Storage) serveClient1(ctx context.Context, req neo.Msg) (resp neo.Ms
return
&
neo
.
AnswerLastTransaction
{
lastTid
}
// conn.Send(reply) // XXX err
//case *ObjectHistory:
//case *StoreObject:
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment