Commit aaf06d29 authored by Rafael Monnerat's avatar Rafael Monnerat

slapos_crm: Merge checks, check Remote Node, check for ongoing upgrades...

  a lot of stuff changed that will be probably squased anyway.
parent c4050294
from Products.ZSQLCatalog.SQLCatalog import SimpleQuery, ComplexQuery
portal = context.getPortalObject() portal = context.getPortalObject()
monitor_enabled_category = portal.restrictedTraverse( monitor_enabled_category = portal.restrictedTraverse(
"portal_categories/monitor_scope/enabled", None) "portal_categories/monitor_scope/enabled", None)
portal = context.getPortalObject()
portal.portal_catalog.searchAndActivate( portal.portal_catalog.searchAndActivate(
portal_type='Compute Node',
validation_state='validated', validation_state='validated',
method_id='ComputeNode_checkProjectAllocationConsistencyState', method_id='ComputeNode_checkProjectAllocationConsistencyState',
monitor_scope__uid=monitor_enabled_category.getUid(), node=ComplexQuery(
SimpleQuery(portal_type='Remote Node'),
ComplexQuery(
SimpleQuery(portal_type='Compute Node'),
SimpleQuery(monitor_scope__uid=monitor_enabled_category.getUid()),
logical_operator='and'
),
logical_operator='or'
),
group_by=['follow_up_uid'], group_by=['follow_up_uid'],
method_kw={'tag': tag}, method_kw={'tag': tag},
activate_kw={'tag': tag, 'priority': 2} activate_kw={'tag': tag, 'priority': 2}
) )
context.activate(after_tag=tag).getId() context.activate(after_tag=tag).getId()
...@@ -33,20 +33,16 @@ error_dict = { ...@@ -33,20 +33,16 @@ error_dict = {
# Since we would like a single ticket per compute node do all at once: # Since we would like a single ticket per compute node do all at once:
for compute_partition in context.contentValues(portal_type='Compute Partition'): for compute_partition in context.contentValues(portal_type='Compute Partition'):
if compute_partition.getSlapState() == 'busy': if compute_partition.getSlapState() == 'busy':
sla_error = compute_partition.ComputePartition_checkAllocatedSlaState() compute_partition_error_dict = compute_partition.ComputePartition_checkAllocationConsistencyState()
allocation_supply_error = compute_partition.ComputePartition_checkAllocatedSupplyState() if compute_partition_error_dict:
compute_node_error_dict[compute_partition.getRelativeUrl()] = { error_dict['compute_node_error_dict'][compute_partition.getId()] = compute_partition_error_dict
'sla_error': sla_error,
'allocation_supply_error': allocation_supply_error
}
if sla_error is not None or allocation_supply_error is not None:
error_dict['should_notify'] = True error_dict['should_notify'] = True
if not error_dict['should_notify']: if not error_dict['should_notify']:
return return
## Write minimal message here, and replace the dict ## Write minimal message here, and replace the dict
error_dict['message'] = compute_node_error_dict error_dict['message'] = error_dict['compute_node_error_dict']
support_request = project.Project_createTicketWithCausality( support_request = project.Project_createTicketWithCausality(
'Support Request', 'Support Request',
......
from Products.ZSQLCatalog.SQLCatalog import SimpleQuery, ComplexQuery
portal = context.getPortalObject() portal = context.getPortalObject()
monitor_enabled_category = portal.restrictedTraverse( monitor_enabled_category = portal.restrictedTraverse(
"portal_categories/monitor_scope/enabled", None) "portal_categories/monitor_scope/enabled", None)
...@@ -11,9 +13,16 @@ if project.Project_isSupportRequestCreationClosed(): ...@@ -11,9 +13,16 @@ if project.Project_isSupportRequestCreationClosed():
if monitor_enabled_category is not None: if monitor_enabled_category is not None:
project_uid = project.getUid() project_uid = project.getUid()
portal.portal_catalog.searchAndActivate( portal.portal_catalog.searchAndActivate(
portal_type='Compute Node', node=ComplexQuery(
SimpleQuery(portal_type='Remote Node'),
ComplexQuery(
SimpleQuery(portal_type='Compute Node'),
SimpleQuery(monitor_scope__uid=monitor_enabled_category.getUid()),
logical_operator='and'
),
logical_operator='or'
),
validation_state='validated', validation_state='validated',
monitor_scope__uid=monitor_enabled_category.getUid(),
follow_up__uid=project_uid, follow_up__uid=project_uid,
method_id='ComputeNode_checkAllocationConsistencyState', method_id='ComputeNode_checkAllocationConsistencyState',
# This alarm bruteforce checking all documents, # This alarm bruteforce checking all documents,
......
compute_partition = context
sla_error_list = []
compute_node = compute_partition.getParentValue()
assert compute_node.getPortalType() == 'Compute Node'
instance = compute_partition.getAggregateRelatedValue(portal_type='Software Instance')
assert instance is not None, 'Instance is None'
if instance.getValidationState() != 'validated' or \
instance.getSlapState() == 'destroy_requested':
# Outdated catalog or instance under garbage collection,
# we skip for now.
return sla_error_list
sla_dict = instance.getSlaXmlAsDict()
if not sla_dict:
return sla_error_list
# Simple check of instance SLAs
if "computer_guid" in sla_dict:
computer_guid = sla_dict.pop("computer_guid")
if compute_node.getReference() != computer_guid:
sla_error_list.append('computer_guid do not match (%s != %s)' % (
computer_guid, compute_node.getReference()))
if "instance_guid" in sla_dict:
if instance.getPortalType() != 'Slave Instance':
sla_error_list.append('instance_guid is provided to a Software Instance')
else:
instance_guid = sla_dict.pop("instance_guid")
software_instance = compute_partition.getAggregateRelatedValue(portal_type='Software Instance')
if software_instance is None:
sla_error_list.append('instance_guid provided but no Software Instance was found')
if software_instance.getReference() != instance_guid:
sla_error_list.append('instance_guid do not match (%s != %s)' % (
instance_guid != software_instance.getReference()))
if 'network_guid' in sla_dict:
network_guid = sla_dict.pop('network_guid')
network_reference = compute_node.getSubordinationReference()
if network_reference != network_guid:
sla_error_list.append('network_guid do not match (%s != %s)' % (
network_guid, network_reference))
project_reference = compute_node.getFollowUpReference()
if 'project_guid' in sla_dict:
project_guid = sla_dict.pop("project_guid")
if project_reference != project_guid:
sla_error_list.append('project_guid do not match (%s != %s)' % (
project_guid, project_reference))
instance_project_reference = instance.getFollowUpReference()
if project_reference != instance_project_reference:
sla_error_list.append("Instance and Compute node project don't match (%s != %s)" % (
project_reference, instance_project_reference))
return sla_error_list
compute_partition = context
compute_node = compute_partition.getParentValue()
assert compute_node.getPortalType() == 'Compute Node'
instance = compute_partition.getAggregateRelatedValue(portal_type='Software Instance')
assert instance is not None, 'Instance is None'
if instance.getValidationState() != 'validated' or \
instance.getSlapState() == 'destroy_requested':
# Outdated catalog or instance under garbage collection,
# we skip for now.
return
project = instance.getFollowUpValue()
assert project is not None, 'Project is None'
instance_tree = instance.getSpecialiseValue(portal_type="Instance Tree")
instance_tree_context = instance_tree.asContext(
source_reference=instance.getSourceReference(),
url_string=instance.getUrlString()
)
software_product, software_release, software_type = instance_tree_context.InstanceTree_getSoftwareProduct()
if software_product is None:
return 'No Software Product matching'
person = instance_tree.getDestinationSectionValue()
allocation_cell_list = project.Project_getSoftwareProductPredicateList(
software_product=software_product,
software_product_type=software_type,
software_product_release=software_release,
destination_value=person,
node_value=compute_node,
predicate_portal_type='Allocation Supply Cell'
)
if not allocation_cell_list:
return 'No Allocation Supply'
<?xml version="1.0"?>
<ZopeData>
<record id="1" aka="AAAAAAAAAAE=">
<pickle>
<global name="PythonScript" module="Products.PythonScripts.PythonScript"/>
</pickle>
<pickle>
<dictionary>
<item>
<key> <string>_bind_names</string> </key>
<value>
<object>
<klass>
<global name="_reconstructor" module="copy_reg"/>
</klass>
<tuple>
<global name="NameAssignments" module="Shared.DC.Scripts.Bindings"/>
<global name="object" module="__builtin__"/>
<none/>
</tuple>
<state>
<dictionary>
<item>
<key> <string>_asgns</string> </key>
<value>
<dictionary>
<item>
<key> <string>name_container</string> </key>
<value> <string>container</string> </value>
</item>
<item>
<key> <string>name_context</string> </key>
<value> <string>context</string> </value>
</item>
<item>
<key> <string>name_m_self</string> </key>
<value> <string>script</string> </value>
</item>
<item>
<key> <string>name_subpath</string> </key>
<value> <string>traverse_subpath</string> </value>
</item>
</dictionary>
</value>
</item>
</dictionary>
</state>
</object>
</value>
</item>
<item>
<key> <string>_params</string> </key>
<value> <string></string> </value>
</item>
<item>
<key> <string>id</string> </key>
<value> <string>ComputePartition_checkAllocatedSupplyState</string> </value>
</item>
</dictionary>
</pickle>
</record>
</ZopeData>
portal = context.getPortalObject()
compute_partition = context
error_dict = {}
compute_node = compute_partition.getParentValue()
assert compute_node.getPortalType() == 'Compute Node'
instance_list = compute_partition.getAggregateRelatedValueList(portal_type=[
'Software Instance', 'Slave Instance'])
assert len(instance_list), 'No instance was found'
for instance in instance_list:
instance_sla_error_list = []
if instance.getValidationState() != 'validated' or \
instance.getSlapState() == 'destroy_requested':
# Outdated catalog or instance under garbage collection,
# we skip for now.
continue
sla_dict = instance.getSlaXmlAsDict()
if sla_dict:
# Simple check of instance SLAs
if "computer_guid" in sla_dict:
computer_guid = sla_dict.pop("computer_guid")
if compute_node.getReference() != computer_guid:
instance_sla_error_list.append('computer_guid do not match (%s != %s)' % (
computer_guid, compute_node.getReference()))
if "instance_guid" in sla_dict:
if instance.getPortalType() != 'Slave Instance':
instance_sla_error_list.append('instance_guid is provided to a Software Instance')
else:
instance_guid = sla_dict.pop("instance_guid")
software_instance = compute_partition.getAggregateRelatedValue(portal_type='Software Instance')
if software_instance is None:
instance_sla_error_list.append('instance_guid provided but no Software Instance was found')
if software_instance.getReference() != instance_guid:
instance_sla_error_list.append('instance_guid do not match (%s != %s)' % (
instance_guid != software_instance.getReference()))
if 'network_guid' in sla_dict:
network_guid = sla_dict.pop('network_guid')
network_reference = compute_node.getSubordinationReference()
if network_reference != network_guid:
instance_sla_error_list.append('network_guid do not match (%s != %s)' % (
network_guid, network_reference))
project_reference = compute_node.getFollowUpReference()
if 'project_guid' in sla_dict:
project_guid = sla_dict.pop("project_guid")
if project_reference != project_guid:
instance_sla_error_list.append('project_guid do not match (%s != %s)' % (
project_guid, project_reference))
instance_project_reference = instance.getFollowUpReference()
if project_reference != instance_project_reference:
instance_sla_error_list.append("Instance and Compute node project don't match (%s != %s)" % (
project_reference, instance_project_reference))
if instance_sla_error_list:
error_dict[instance.getRelativeUrl()] = {
'instance': instance,
'sla_error_list': instance_sla_error_list
}
# Now check allocation supply consistency
instance_tree = instance.getSpecialiseValue(portal_type="Instance Tree")
# if there is an ongoing upgrade decision, skip, since there is already
# a ticket for handle the inconsistency.
if portal.portal_catalog.getResultValue(
portal_type='Upgrade Decision',
aggregate__uid=instance_tree.getUid(),
simulation_state=['started', 'stopped', 'planned', 'confirmed']) is not None:
continue
instance_tree_context = instance_tree.asContext(
source_reference=instance.getSourceReference(),
url_string=instance.getUrlString())
software_product, software_release, software_type = instance_tree_context.InstanceTree_getSoftwareProduct()
if software_product is None:
if instance.getRelativeUrl() not in error_dict:
error_dict[instance.getRelativeUrl()] = {'instance': instance}
message = 'No Software Product matching for %s' % instance.getTitle()
error_dict[instance.getRelativeUrl()]['allocation_supply_error'] = message
continue
project = instance.getFollowUpValue()
assert project is not None, 'Project is None'
person = instance_tree.getDestinationSectionValue()
allocation_cell_list = project.Project_getSoftwareProductPredicateList(
software_product=software_product,
software_product_type=software_type,
software_product_release=software_release,
destination_value=person,
node_value=compute_node,
predicate_portal_type='Allocation Supply Cell'
)
if not allocation_cell_list:
if instance.getRelativeUrl() not in error_dict:
error_dict[instance.getRelativeUrl()] = {'instance': instance}
message = 'No Allocation Supply for %s' % instance.getTitle()
error_dict[instance.getRelativeUrl()]['allocation_supply_error'] = message
return error_dict
...@@ -54,7 +54,7 @@ ...@@ -54,7 +54,7 @@
</item> </item>
<item> <item>
<key> <string>id</string> </key> <key> <string>id</string> </key>
<value> <string>ComputePartition_checkAllocatedSlaState</string> </value> <value> <string>ComputePartition_checkAllocationConsistencyState</string> </value>
</item> </item>
</dictionary> </dictionary>
</pickle> </pickle>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment