Commit 6e6e62f5 authored by Rafael Monnerat's avatar Rafael Monnerat

slapos_crm: Move alarm into SoftwareInstance_getReportedErrorDict

   Drop the specific alarm and update SoftwareInstance_getReportedErrorDict to issue a ticket if the user has an instance on a 'close/forever' compute node.

   Keep in mind that in the future, we can use "close for termination" in addition to close forever to create tickets when a computer is going to be removed, so the administrator or user can take actions in relation to it.
parent 53edd20d
Pipeline #37235 failed with stage
in 0 seconds
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Alarm" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>active_sense_method_id</string> </key>
<value> <string>Alarm_searchInstanceOnClosedComputeNode</string> </value>
</item>
<item>
<key> <string>automatic_solve</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>description</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>enabled</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>slapos_crm_check_instance_on_closed_compute_node</string> </value>
</item>
<item>
<key> <string>periodicity_hour</string> </key>
<value>
<tuple>
<int>1</int>
</tuple>
</value>
</item>
<item>
<key> <string>periodicity_minute</string> </key>
<value>
<tuple>
<int>0</int>
</tuple>
</value>
</item>
<item>
<key> <string>periodicity_month</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_month_day</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_week</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_week_day</string> </key>
<value>
<tuple>
<string>Monday</string>
</tuple>
</value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Alarm</string> </value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Check Instance on closed Compute Nodes</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
portal = context.getPortalObject()
active_process = context.newActiveProcess().getRelativeUrl()
# Closed compute_nodes like this might contains unremoved instances hanging there.
category_close_forever = portal.restrictedTraverse(
"portal_categories/allocation_scope/close/forever", None)
category_close_outdated = portal.restrictedTraverse(
"portal_categories/allocation_scope/close/outdated", None)
return portal.portal_catalog.searchAndActivate(
method_kw=dict(fixit=fixit, active_process=active_process),
method_id="ComputeNode_checkInstanceOnCloseAllocation",
portal_type='Compute Node',
default_allocation_scope_uid=[category_close_forever.getUid(), category_close_outdated.getUid()],
validation_state="validated",
activite_kw={"tag": tag} )
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>fixit, tag, **kw</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Alarm_searchInstanceOnClosedComputeNode</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
from Products.CMFActivity.ActiveResult import ActiveResult
portal = context.getPortalObject()
active_process = portal.restrictedTraverse(active_process)
partition_uid_list = [compute_partition.getUid() for compute_partition in context.objectValues(portal_type="Compute Partition")]
if not partition_uid_list:
return
for software_instance in portal.portal_catalog(
portal_type="Software Instance",
default_aggregate_uid=partition_uid_list):
if software_instance.getSlapState() == "destroy_requested":
continue
active_process.postResult(ActiveResult(
summary="%s" % software_instance.getRelativeUrl(),
severity=100,
detail="%s on %s" % (software_instance.getRelativeUrl(), context.getRelativeUrl())))
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>fixit, active_process</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ComputeNode_checkInstanceOnCloseAllocation</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
...@@ -19,8 +19,8 @@ if context.getSlapState() != "start_requested": ...@@ -19,8 +19,8 @@ if context.getSlapState() != "start_requested":
def updateErrorDictWithError(_error_dict): def updateErrorDictWithError(_error_dict):
_error_dict['should_notify'] = True _error_dict['should_notify'] = True
_error_dict['ticket_title'] = "Instance Tree %s is failing." % context.getTitle() _error_dict['ticket_title'] = "Instance Tree %s is failing." % _error_dict['instance_tree_title']
description = "%s contains software instances which are unallocated or reporting errors." % ( description = "%s is unallocated, reporting errors or allocated on a closed compute node." % (
context.getTitle()) context.getTitle())
if _error_dict['message']: if _error_dict['message']:
description += "\n\nMessage: %s" % str(_error_dict['message']) description += "\n\nMessage: %s" % str(_error_dict['message'])
...@@ -32,6 +32,16 @@ if compute_partition is None: ...@@ -32,6 +32,16 @@ if compute_partition is None:
error_dict['notification_message_reference'] = 'slapos-crm-instance-tree-instance-allocation.notification' error_dict['notification_message_reference'] = 'slapos-crm-instance-tree-instance-allocation.notification'
return updateErrorDictWithError(error_dict) return updateErrorDictWithError(error_dict)
compute_node = compute_partition.getParentValue()
if compute_node.getPortalType() == "Compute Node" and \
compute_node.getAllocationScope() == 'close/forever':
# Closed compute_nodes like this might contains unremoved instances hanging there
error_dict['notification_message_reference'] = 'slapos-crm-instance-tree-instance-on-close-computer.notification'
error_dict = updateErrorDictWithError(error_dict)
error_dict['message'] = "%s is allocated on a Compute node that is closed forever." % context.getTitle()
error_dict['ticket_description'] = error_dict['message']
return error_dict
if context.getPortalType() == 'Slave Instance': if context.getPortalType() == 'Slave Instance':
# We skip if the the slave is already allocated. # We skip if the the slave is already allocated.
if batch_mode: if batch_mode:
...@@ -40,7 +50,7 @@ if context.getPortalType() == 'Slave Instance': ...@@ -40,7 +50,7 @@ if context.getPortalType() == 'Slave Instance':
# Skip to check if monitor disabled on the compute node. # Skip to check if monitor disabled on the compute node.
# Remote node has no state. # Remote node has no state.
if compute_partition.getParentValue().getPortalType() != "Compute Node": if compute_node.getPortalType() != "Compute Node":
if batch_mode: if batch_mode:
return return
portal_type = compute_partition.getParentValue().getPortalType() portal_type = compute_partition.getParentValue().getPortalType()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment