Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.core
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Roque
slapos.core
Commits
4e44bf95
Commit
4e44bf95
authored
Sep 11, 2023
by
Xavier Thompson
Browse files
Options
Browse Files
Download
Plain Diff
slapgrid: Fix connectionless instance processing
See merge request
nexedi/slapos.core!572
parents
be11fd4f
c9b394c1
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
105 additions
and
3 deletions
+105
-3
slapos/grid/slapgrid.py
slapos/grid/slapgrid.py
+37
-2
slapos/tests/test_slapgrid.py
slapos/tests/test_slapgrid.py
+68
-1
No files found.
slapos/grid/slapgrid.py
View file @
4e44bf95
...
...
@@ -59,6 +59,7 @@ from requests.exceptions import RequestException
from
lxml
import
etree
from
slapos
import
manager
as
slapmanager
from
slapos.slap.exception
import
ConnectionError
from
slapos.slap.slap
import
NotFoundError
from
slapos.slap.slap
import
ServerError
from
slapos.slap.slap
import
COMPUTER_PARTITION_REQUEST_LIST_TEMPLATE_FILENAME
...
...
@@ -1425,7 +1426,7 @@ stderr_logfile_backups=1
def
processComputerPartitionList
(
self
):
try
:
return
self
.
processComputerPartitionListOnline
()
except
RequestException
:
except
(
RequestException
,
ConnectionError
)
:
return
self
.
processComputerPartitionListOffline
()
def
processComputerPartitionListOnline
(
self
):
...
...
@@ -1456,7 +1457,7 @@ stderr_logfile_backups=1
self
.
processComputerPartition
(
computer_partition
)
# Handle connection loss at the next level
except
RequestException
:
except
(
RequestException
,
ConnectionError
)
:
raise
# Send log before exiting
...
...
@@ -1517,6 +1518,40 @@ stderr_logfile_backups=1
def
processComputerPartitionListOffline
(
self
):
self
.
logger
.
info
(
'Processing computer partitions offline...'
)
# Backwards compatibility: remove stopped services
for
name
in
os
.
listdir
(
self
.
instance_root
):
instance_path
=
os
.
path
.
join
(
self
.
instance_root
,
name
)
state_path
=
os
.
path
.
join
(
instance_path
,
'.requested_state'
)
try
:
with
open
(
state_path
)
as
f
:
requested_state
=
f
.
read
()
os
.
remove
(
state_path
)
except
(
IOError
,
OSError
)
as
e
:
if
e
.
errno
!=
errno
.
ENOENT
and
e
.
errno
!=
errno
.
ENOTDIR
:
raise
requested_state
=
None
if
requested_state
==
'stopped'
:
local_partition
=
Partition
(
software_path
=
None
,
instance_path
=
instance_path
,
shared_part_list
=
''
,
supervisord_partition_configuration_dir
=
(
_getSupervisordConfigurationDirectory
(
self
.
instance_root
)),
supervisord_socket
=
self
.
supervisord_socket
,
computer_partition
=
None
,
computer_id
=
self
.
computer_id
,
partition_id
=
name
,
server_url
=
self
.
master_url
,
software_release_url
=
'toto'
,
certificate_repository_path
=
self
.
certificate_repository_path
,
buildout
=
self
.
buildout
,
buildout_debug
=
self
.
buildout_debug
,
logger
=
self
.
logger
,
instance_storage_home
=
self
.
instance_storage_home
,
ipv4_global_network
=
self
.
ipv4_global_network
,
)
local_partition
.
stop
()
# Offline: start all existing services
try
:
supervisord_socket_path
=
_getSupervisordSocketPath
(
self
.
instance_root
,
...
...
slapos/tests/test_slapgrid.py
View file @
4e44bf95
...
...
@@ -308,7 +308,9 @@ class TestBasicSlapgridCP(BasicMixin, unittest.TestCase):
def
test_no_master
(
self
):
os
.
mkdir
(
self
.
software_root
)
os
.
mkdir
(
self
.
instance_root
)
self
.
assertRaises
(
ConnectionError
,
self
.
grid
.
processComputerPartitionList
)
self
.
assertEqual
(
self
.
grid
.
processComputerPartitionList
(),
slapgrid
.
SLAPGRID_OFFLINE_SUCCESS
)
def
test_environment_variable_HOME
(
self
):
# When running instance, $HOME is set to the partition path
...
...
@@ -1087,6 +1089,71 @@ exit 1
'/getComputerPartitionCertificate'
# /getFullComputerInformation is cached
])
def
test_stopped_partition_remains_stopped_after_master_connection_loss
(
self
):
computer
=
self
.
getTestComputerClass
()(
self
.
software_root
,
self
.
instance_root
,
instance_amount
=
2
)
for
i
in
range
(
2
):
partition
=
computer
.
instance_list
[
i
]
partition
.
requested_state
=
'started'
partition
.
software
.
setBuildout
()
run_path
=
os
.
path
.
join
(
partition
.
partition_path
,
'etc'
,
'run'
)
os
.
makedirs
(
run_path
)
with
open
(
os
.
path
.
join
(
run_path
,
'runner'
),
'w'
)
as
f
:
f
.
write
(
"#!/bin/sh
\
n
echo 'Working'
\
n
touch 'runner_worked'"
)
os
.
fchmod
(
f
.
fileno
(),
0o755
)
control_partition
=
computer
.
instance_list
[
0
]
test_partition
=
computer
.
instance_list
[
1
]
control_file
=
os
.
path
.
join
(
control_partition
.
partition_path
,
'runner_worked'
)
test_file
=
os
.
path
.
join
(
test_partition
.
partition_path
,
'runner_worked'
)
def
assertRunnerWorked
(
path
):
for
_
in
range
(
50
):
if
os
.
path
.
exists
(
path
):
break
time
.
sleep
(
0.1
)
else
:
self
.
assertTrue
(
os
.
path
.
exists
(
path
))
with
httmock
.
HTTMock
(
computer
.
request_handler
):
self
.
assertEqual
(
self
.
grid
.
processComputerPartitionList
(),
slapgrid
.
SLAPGRID_SUCCESS
)
self
.
assertInstanceDirectoryListEqual
([
'0'
,
'1'
])
assertRunnerWorked
(
control_file
)
assertRunnerWorked
(
test_file
)
for
i
in
range
(
2
):
six
.
assertCountEqual
(
self
,
os
.
listdir
(
computer
.
instance_list
[
i
].
partition_path
),
[
'.slapgrid'
,
'.%d_runner.log'
%
i
,
'buildout.cfg'
,
'etc'
,
'runner_worked'
,
'software_release'
,
'worked'
,
'.slapos-retention-lock-delay'
])
self
.
assertEqual
(
control_partition
.
state
,
'started'
)
self
.
assertEqual
(
test_partition
.
state
,
'started'
)
# simulate stopping the partition with old version
test_partition
.
state
=
'stopped'
state_path
=
os
.
path
.
join
(
test_partition
.
partition_path
,
'.requested_state'
)
with
open
(
state_path
,
'w'
)
as
f
:
f
.
write
(
'stopped'
)
computer
.
status_code
=
503
# connection loss
os
.
unlink
(
control_file
)
os
.
unlink
(
test_file
)
with
httmock
.
HTTMock
(
computer
.
request_handler
):
self
.
assertEqual
(
self
.
grid
.
processComputerPartitionList
(),
slapgrid
.
SLAPGRID_OFFLINE_SUCCESS
)
self
.
assertInstanceDirectoryListEqual
([
'0'
,
'1'
])
assertRunnerWorked
(
control_file
)
self
.
assertFalse
(
os
.
path
.
exists
(
test_file
))
self
.
assertEqual
(
computer
.
sequence
,
[
'/getFullComputerInformation'
,
'/getComputerPartitionCertificate'
,
'/startedComputerPartition'
,
'/getComputerPartitionCertificate'
,
'/startedComputerPartition'
,
'/getComputerPartitionCertificate'
# /getFullComputerInformation is cached
])
class
TestSlapgridCPWithMasterWatchdog
(
MasterMixin
,
unittest
.
TestCase
):
def
setUp
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment