Commit 37984224 authored by Rafael Monnerat's avatar Rafael Monnerat

slapos_crm: Reimplement check Instance Tree check State

    Merge into Project_checkMonitoring for launch per-project basis.
parent 328d35fa
Pipeline #37205 failed with stage
in 0 seconds
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="Alarm" module="erp5.portal_type"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>active_sense_method_id</string> </key>
<value> <string>Alarm_checkInstanceTreeState</string> </value>
</item>
<item>
<key> <string>automatic_solve</string> </key>
<value> <int>0</int> </value>
</item>
<item>
<key> <string>description</string> </key>
<value> <string>Check and create a Ticket when an instance is partially allocated for more than 4 hours.</string> </value>
</item>
<item>
<key> <string>enabled</string> </key>
<value> <int>1</int> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>slapos_crm_check_instance_in_error</string> </value>
</item>
<item>
<key> <string>periodicity_hour</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_hour_frequency</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>periodicity_minute</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_minute_frequency</string> </key>
<value> <int>5</int> </value>
</item>
<item>
<key> <string>periodicity_month</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_month_day</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>periodicity_start_date</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="DateTime" module="DateTime.DateTime"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<tuple>
<float>1406073600.0</float>
<string>GMT</string>
</tuple>
</state>
</object>
</value>
</item>
<item>
<key> <string>periodicity_week</string> </key>
<value>
<tuple/>
</value>
</item>
<item>
<key> <string>portal_type</string> </key>
<value> <string>Alarm</string> </value>
</item>
<item>
<key> <string>sense_method_id</string> </key>
<value>
<none/>
</value>
</item>
<item>
<key> <string>title</string> </key>
<value> <string>Check partially allocated Instance for more than 4 hours</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
portal = context.getPortalObject()
portal.portal_catalog.searchAndActivate(
portal_type='Instance Tree',
validation_state='validated',
method_id='InstanceTree_checkSoftwareInstanceState',
activate_kw = {'tag':tag}
)
context.activate(after_tag=tag).getId()
......@@ -9,7 +9,6 @@ if project.Project_isSupportRequestCreationClosed():
return
date_check_limit = addToDate(DateTime(), to_add={'hour': -1})
if (date_check_limit - instance_tree.getCreationDate()) < 0:
# Too early to check
return
......@@ -20,55 +19,26 @@ software_instance_list = context.portal_catalog(
**{"slapos_item.slap_state": ["start_requested"]})
# Check if at least one software Instance is Allocated
notification_message_reference = None
for instance in software_instance_list:
if (date_check_limit - instance.getCreationDate()) < 0:
continue
if instance.getSlapState() != "start_requested":
continue
compute_partition = instance.getAggregateValue()
if compute_partition is None:
notification_message_reference = 'slapos-crm-instance-tree-instance-allocation.notification'
elif (instance.getPortalType() == "Software Instance") and \
(compute_partition.getParentValue().getPortalType() == "Compute Node") and \
(compute_partition.getParentValue().getMonitorScope() == "enabled") and \
instance.SoftwareInstance_hasReportedError(tolerance=30):
notification_message_reference = 'slapos-crm-instance-tree-instance-state.notification'
if notification_message_reference is not None:
ticket_title = "Instance Tree %s is failing." % context.getTitle()
error_message = instance.SoftwareInstance_hasReportedError(include_message=True)
description = "%s contains software instances which are unallocated or reporting errors." % (
context.getTitle())
if error_message:
description += "\n\nMessage: %s" % str(error_message)
else:
error_message = "No message!"
error_dict = instance.SoftwareInstance_getReportedErrorDict(tolerance=30)
if not error_dict['should_notify']:
support_request = project.Project_createSupportRequestWithCausality(
ticket_title,
description,
error_dict['ticket_title'],
error_dict['ticket_description'],
causality=context.getRelativeUrl(),
destination_decision=context.getDestinationSection()
)
if support_request is None:
return
support_request.Ticket_createProjectEvent(
ticket_title, 'outgoing', 'Web Message',
portal.service_module.slapos_crm_information.getRelativeUrl(),
text_content=description,
content_type='text/plain',
notification_message=notification_message_reference,
#language=XXX,
substitution_method_parameter_dict={
'instance_tree_title':context.getTitle(),
'instance': instance.getTitle(),
'error_text': error_message
}
)
return
if support_request is not None:
support_request.Ticket_createProjectEvent(
error_dict['ticket_title'], 'outgoing', 'Web Message',
portal.service_module.slapos_crm_information.getRelativeUrl(),
text_content=error_dict['ticket_description'],
content_type='text/plain',
notification_message=error_dict['notification_message_reference'],
#language=XXX,
substitution_method_parameter_dict=error_dict
)
return support_request
......@@ -54,7 +54,7 @@
</item>
<item>
<key> <string>id</string> </key>
<value> <string>InstanceTree_checkSoftwareInstanceState</string> </value>
<value> <string>InstanceTree_checkMonitoringState</string> </value>
</item>
</dictionary>
</pickle>
......
......@@ -6,12 +6,22 @@ if context.Project_isSupportRequestCreationClosed():
return
if monitor_enabled_category is not None:
project_uid = context.getUid()
portal.portal_catalog.searchAndActivate(
portal_type='Compute Node',
validation_state='validated',
monitor_scope__uid=monitor_enabled_category.getUid(),
follow_up__uid=context.getUid(),
follow_up__uid=project_uid,
method_id='ComputeNode_checkMonitoringState',
activate_kw={'tag': tag}
)
portal.portal_catalog.searchAndActivate(
portal_type='Instance Tree',
validation_state='validated',
follow_up__uid=project_uid,
method_id='InstanceTree_checkMonitoringState',
activate_kw = {'tag':tag}
)
context.activate(after_tag=tag).getId()
from DateTime import DateTime
error_dict = {
'should_notify': None,
'ticket_title': None,
'ticket_description': None,
'instance_tree_title':context.getSpecialiseTitle(),
'instance': context.getTitle(),
'notification_message_reference': None,
'last_contact': None,
'since': None,
'message': None
}
# Nothing to do
if context.getSlapState() != "start_requested":
if batch_mode:
return
return error_dict
def updateErrorDictWithError(_error_dict):
_error_dict['should_notify'] = True
_error_dict['ticket_title'] = "Instance Tree %s is failing." % context.getTitle()
description = "%s contains software instances which are unallocated or reporting errors." % (
context.getTitle())
if _error_dict['message']:
description += "\n\nMessage: %s" % str(_error_dict['message'])
_error_dict['ticket_description'] = description
return _error_dict
compute_partition = context.getAggregateValue(portal_type="Compute Partition")
if compute_partition is None:
error_dict['notification_message_reference'] = 'slapos-crm-instance-tree-instance-allocation.notification'
return updateErrorDictWithError(error_dict)
if context.getPortalType() == 'Slave Instance':
# We skip if the the slave is already allocated.
if batch_mode:
return
return error_dict
# Skip to check if monitor disabled on the compute node.
# Remote node has no state.
if compute_partition.getParentValue().getPortalType() != "Compute Node":
if batch_mode:
return
portal_type = compute_partition.getParentValue().getPortalType()
error_dict['ticket_title'] = "Instance is allocated on a %s" % portal_type
error_dict['ticket_description'] = error_dict['ticket_title']
return error_dict
if compute_partition.getParentValue().getMonitorScope() != "enabled":
if batch_mode:
return
error_dict['ticket_title'] = "Monitor is disabled on the Compute Node"
error_dict['ticket_description'] = error_dict['ticket_title']
return error_dict
d = context.getAccessStatus()
# Ignore if data isn't present.
if d.get("no_data", None) == 1:
if batch_mode:
return
error_dict['ticket_title'] = "Not possible to connect"
error_dict['ticket_description'] = "Not possible to connect"
return error_dict
error_dict['message'] = d['text']
error_dict['last_contact'] = DateTime(d.get('created_at'))
error_dict['since'] = DateTime(d.get('since'))
if error_dict['message'].startswith('#error '):
if ((DateTime()-error_dict['since'])*24*60) > tolerance:
error_dict['notification_message_reference'] = 'slapos-crm-instance-tree-instance-state.notification'
if batch_mode:
return True
return updateErrorDictWithError(error_dict)
if batch_mode:
return None
return error_dict
......@@ -50,11 +50,11 @@
</item>
<item>
<key> <string>_params</string> </key>
<value> <string>tag, fixit, params</string> </value>
<value> <string>tolerance=0, batch_mode=False</string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>Alarm_checkInstanceTreeState</string> </value>
<value> <string>SoftwareInstance_getReportedErrorDict</string> </value>
</item>
</dictionary>
</pickle>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment