Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Jérome Perrin
slapos.core
Commits
4e44bf95
Commit
4e44bf95
authored
Sep 11, 2023
by
Xavier Thompson
Browse files
Options
Browse Files
Download
Plain Diff
slapgrid: Fix connectionless instance processing
See merge request
nexedi/slapos.core!572
parents
be11fd4f
c9b394c1
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
105 additions
and
3 deletions
+105
-3
slapos/grid/slapgrid.py
slapos/grid/slapgrid.py
+37
-2
slapos/tests/test_slapgrid.py
slapos/tests/test_slapgrid.py
+68
-1
No files found.
slapos/grid/slapgrid.py
View file @
4e44bf95
...
@@ -59,6 +59,7 @@ from requests.exceptions import RequestException
...
@@ -59,6 +59,7 @@ from requests.exceptions import RequestException
from
lxml
import
etree
from
lxml
import
etree
from
slapos
import
manager
as
slapmanager
from
slapos
import
manager
as
slapmanager
from
slapos.slap.exception
import
ConnectionError
from
slapos.slap.slap
import
NotFoundError
from
slapos.slap.slap
import
NotFoundError
from
slapos.slap.slap
import
ServerError
from
slapos.slap.slap
import
ServerError
from
slapos.slap.slap
import
COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
from
slapos.slap.slap
import
COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
...
@@ -1425,7 +1426,7 @@ stderr_logfile_backups=1
...
@@ -1425,7 +1426,7 @@ stderr_logfile_backups=1
def
processComputerPartitionList
(
self
):
def
processComputerPartitionList
(
self
):
try
:
try
:
return
self
.
processComputerPartitionListOnline
()
return
self
.
processComputerPartitionListOnline
()
except
RequestException
:
except
(
RequestException
,
ConnectionError
)
:
return
self
.
processComputerPartitionListOffline
()
return
self
.
processComputerPartitionListOffline
()
def
processComputerPartitionListOnline
(
self
):
def
processComputerPartitionListOnline
(
self
):
...
@@ -1456,7 +1457,7 @@ stderr_logfile_backups=1
...
@@ -1456,7 +1457,7 @@ stderr_logfile_backups=1
self
.
processComputerPartition
(
computer_partition
)
self
.
processComputerPartition
(
computer_partition
)
# Handle connection loss at the next level
# Handle connection loss at the next level
except
RequestException
:
except
(
RequestException
,
ConnectionError
)
:
raise
raise
# Send log before exiting
# Send log before exiting
...
@@ -1517,6 +1518,40 @@ stderr_logfile_backups=1
...
@@ -1517,6 +1518,40 @@ stderr_logfile_backups=1
def
processComputerPartitionListOffline
(
self
):
def
processComputerPartitionListOffline
(
self
):
self
.
logger
.
info
(
'Processing computer partitions offline...'
)
self
.
logger
.
info
(
'Processing computer partitions offline...'
)
# Backwards compatibility: remove stopped services
for
name
in
os
.
listdir
(
self
.
instance_root
):
instance_path
=
os
.
path
.
join
(
self
.
instance_root
,
name
)
state_path
=
os
.
path
.
join
(
instance_path
,
'.requested_state'
)
try
:
with
open
(
state_path
)
as
f
:
requested_state
=
f
.
read
()
os
.
remove
(
state_path
)
except
(
IOError
,
OSError
)
as
e
:
if
e
.
errno
!=
errno
.
ENOENT
and
e
.
errno
!=
errno
.
ENOTDIR
:
raise
requested_state
=
None
if
requested_state
==
'stopped'
:
local_partition
=
Partition
(
software_path
=
None
,
instance_path
=
instance_path
,
shared_part_list
=
''
,
supervisord_partition_configuration_dir
=
(
_getSupervisordConfigurationDirectory
(
self
.
instance_root
)),
supervisord_socket
=
self
.
supervisord_socket
,
computer_partition
=
None
,
computer_id
=
self
.
computer_id
,
partition_id
=
name
,
server_url
=
self
.
master_url
,
software_release_url
=
'toto'
,
certificate_repository_path
=
self
.
certificate_repository_path
,
buildout
=
self
.
buildout
,
buildout_debug
=
self
.
buildout_debug
,
logger
=
self
.
logger
,
instance_storage_home
=
self
.
instance_storage_home
,
ipv4_global_network
=
self
.
ipv4_global_network
,
)
local_partition
.
stop
()
# Offline: start all existing services
try
:
try
:
supervisord_socket_path
=
_getSupervisordSocketPath
(
supervisord_socket_path
=
_getSupervisordSocketPath
(
self
.
instance_root
,
self
.
instance_root
,
...
...
slapos/tests/test_slapgrid.py
View file @
4e44bf95
...
@@ -308,7 +308,9 @@ class TestBasicSlapgridCP(BasicMixin, unittest.TestCase):
...
@@ -308,7 +308,9 @@ class TestBasicSlapgridCP(BasicMixin, unittest.TestCase):
def
test_no_master
(
self
):
def
test_no_master
(
self
):
os
.
mkdir
(
self
.
software_root
)
os
.
mkdir
(
self
.
software_root
)
os
.
mkdir
(
self
.
instance_root
)
os
.
mkdir
(
self
.
instance_root
)
self
.
assertRaises
(
ConnectionError
,
self
.
grid
.
processComputerPartitionList
)
self
.
assertEqual
(
self
.
grid
.
processComputerPartitionList
(),
slapgrid
.
SLAPGRID_OFFLINE_SUCCESS
)
def
test_environment_variable_HOME
(
self
):
def
test_environment_variable_HOME
(
self
):
# When running instance, $HOME is set to the partition path
# When running instance, $HOME is set to the partition path
...
@@ -1087,6 +1089,71 @@ exit 1
...
@@ -1087,6 +1089,71 @@ exit 1
'/getComputerPartitionCertificate'
# /getFullComputerInformation is cached
'/getComputerPartitionCertificate'
# /getFullComputerInformation is cached
])
])
def
test_stopped_partition_remains_stopped_after_master_connection_loss
(
self
):
computer
=
self
.
getTestComputerClass
()(
self
.
software_root
,
self
.
instance_root
,
instance_amount
=
2
)
for
i
in
range
(
2
):
partition
=
computer
.
instance_list
[
i
]
partition
.
requested_state
=
'started'
partition
.
software
.
setBuildout
()
run_path
=
os
.
path
.
join
(
partition
.
partition_path
,
'etc'
,
'run'
)
os
.
makedirs
(
run_path
)
with
open
(
os
.
path
.
join
(
run_path
,
'runner'
),
'w'
)
as
f
:
f
.
write
(
"#!/bin/sh
\
n
echo 'Working'
\
n
touch 'runner_worked'"
)
os
.
fchmod
(
f
.
fileno
(),
0o755
)
control_partition
=
computer
.
instance_list
[
0
]
test_partition
=
computer
.
instance_list
[
1
]
control_file
=
os
.
path
.
join
(
control_partition
.
partition_path
,
'runner_worked'
)
test_file
=
os
.
path
.
join
(
test_partition
.
partition_path
,
'runner_worked'
)
def
assertRunnerWorked
(
path
):
for
_
in
range
(
50
):
if
os
.
path
.
exists
(
path
):
break
time
.
sleep
(
0.1
)
else
:
self
.
assertTrue
(
os
.
path
.
exists
(
path
))
with
httmock
.
HTTMock
(
computer
.
request_handler
):
self
.
assertEqual
(
self
.
grid
.
processComputerPartitionList
(),
slapgrid
.
SLAPGRID_SUCCESS
)
self
.
assertInstanceDirectoryListEqual
([
'0'
,
'1'
])
assertRunnerWorked
(
control_file
)
assertRunnerWorked
(
test_file
)
for
i
in
range
(
2
):
six
.
assertCountEqual
(
self
,
os
.
listdir
(
computer
.
instance_list
[
i
].
partition_path
),
[
'.slapgrid'
,
'.%d_runner.log'
%
i
,
'buildout.cfg'
,
'etc'
,
'runner_worked'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
self
.
assertEqual
(
control_partition
.
state
,
'started'
)
self
.
assertEqual
(
test_partition
.
state
,
'started'
)
# simulate stopping the partition with old version
test_partition
.
state
=
'stopped'
state_path
=
os
.
path
.
join
(
test_partition
.
partition_path
,
'.requested_state'
)
with
open
(
state_path
,
'w'
)
as
f
:
f
.
write
(
'stopped'
)
computer
.
status_code
=
503
# connection loss
os
.
unlink
(
control_file
)
os
.
unlink
(
test_file
)
with
httmock
.
HTTMock
(
computer
.
request_handler
):
self
.
assertEqual
(
self
.
grid
.
processComputerPartitionList
(),
slapgrid
.
SLAPGRID_OFFLINE_SUCCESS
)
self
.
assertInstanceDirectoryListEqual
([
'0'
,
'1'
])
assertRunnerWorked
(
control_file
)
self
.
assertFalse
(
os
.
path
.
exists
(
test_file
))
self
.
assertEqual
(
computer
.
sequence
,
[
'/getFullComputerInformation'
,
'/getComputerPartitionCertificate'
,
'/startedComputerPartition'
,
'/getComputerPartitionCertificate'
,
'/startedComputerPartition'
,
'/getComputerPartitionCertificate'
# /getFullComputerInformation is cached
])
class
TestSlapgridCPWithMasterWatchdog
(
MasterMixin
,
unittest
.
TestCase
):
class
TestSlapgridCPWithMasterWatchdog
(
MasterMixin
,
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment