Commit 4e507c48 authored by Alain Takoudjou's avatar Alain Takoudjou

Update Condor recipe, allow to submit jobs

parent 9d7e9520
......@@ -22,6 +22,6 @@ extends =
[condor]
recipe = hexagonit.recipe.download
url = http://parrot.cs.wisc.edu//symlink/20120818101503/7/7.8/7.8.1/1db87306f7222aa1cb500b4f59b479e8/condor-7.8.1-x86_64_deb_6.0-stripped.tar.gz
md5sum = 83020d420599948dfcbfc3f1f6b52a5e
url = http://vlwjcg.blu.livefilestore.com/y1m01nZ1F3tAe9NX0OCuAYMzhob7YmkIONHi3jrjYNwlfVjqlnWwBMmoYnIKJjJ-abKFFuai57-ffobcFlMKk7v7NmrSDvEcry2/condor-7.9.0-x86_64_deb_6.0-stripped.tar.gz?download&psid=1
md5sum = 630ce96e5c1172391febba2aa1bad8fc
strip-top-level-dir = true
......@@ -58,6 +58,7 @@ setup(name=name,
'cloud9 = slapos.recipe.cloud9:Recipe',
'cloudooo.test = slapos.recipe.erp5_test:CloudoooRecipe',
'condor = slapos.recipe.condor:Recipe',
'condor.submit = slapos.recipe.condor:AppSubmit',
'cron.d = slapos.recipe.dcron:Part',
'cron = slapos.recipe.dcron:Recipe',
'davstorage = slapos.recipe.davstorage:Recipe',
......
......@@ -138,7 +138,7 @@ class Recipe(GenericBaseRecipe):
with open(self.options['pid-file']) as pid_file:
pid = int(pid_file.read().strip(), 10)
try:
os.kill(pid, signal.SIGUSR1) # Graceful restart
os.kill(pid, signal.SIGHUP) #restart now
except OSError:
pass
return path_list
......@@ -27,10 +27,34 @@
from slapos.recipe.librecipe import GenericBaseRecipe
import os
import subprocess
import zc.buildout
import filecmp
import urlparse
import shutil
class Recipe(GenericBaseRecipe):
"""Deploy a fully operational condor architecture."""
def __init__(self, buildout, name, options):
self.environ = {}
self.role = ''
environment_section = options.get('environment-section', '').strip()
if environment_section and environment_section in buildout:
# Use environment variables from the designated config section.
self.environ.update(buildout[environment_section])
for variable in options.get('environment', '').splitlines():
if variable.strip():
try:
key, value = variable.split('=', 1)
self.environ[key.strip()] = value
except ValueError:
raise zc.buildout.UserError('Invalid environment variable definition: %s', variable)
# Extrapolate the environment variables using values from the current
# environment.
for key in self.environ:
self.environ[key] = self.environ[key] % os.environ
return GenericBaseRecipe.__init__(self, buildout, name, options)
def _options(self, options):
#Path of condor compiled package
self.package = options['package'].strip()
......@@ -42,59 +66,56 @@ class Recipe(GenericBaseRecipe):
#Directory to deploy condor
self.prefix = options['rootdirectory'].strip()
self.localdir = options['local-dir'].strip()
self.config_wrapper = options['config_wrapper'].strip()
self.wrapperdir = options['wrapper-dir'].strip()
self.wrapper_bin = options['bin'].strip()
self.wrapper_sbin = options['sbin'].strip()
self.diskspace = options['disk-space'].strip()
self.ipv6 = options['ip'].strip()
self.condor_host = options['condor_host'].strip()
self.domain = options['domain'].strip()
self.collector = options['collector_name'].strip()
self.linkdir = options['linkdir'].strip()
self.path = options['path'].strip()
if options['machine-role'].strip() == "manager":
self.role = "manager,submit"
elif options['machine-role'].strip() == "worker":
self.role = "submit,execute"
self.centralhost = self.options['central-host'].strip()
def install(self):
path_list = []
#get UID and GID for current slapuser
stat_info = os.stat(self.rootdir)
slapuser = str(stat_info.st_uid)+"."+str(stat_info.st_gid)
domain_name = 'slapos%s.com' % stat_info.st_uid
#Configure condor
environment = os.environ.copy()
environment['PATH'] = os.path.dirname(self.perlbin) + ':' + environment['PATH']
environment['LD_LIBRARY_PATH'] = os.path.dirname(self.perlbin) + ':' + os.environ['PATH']
environment['HOME'] = self.localdir
environment['HOSTNAME'] = self.condor_host
configure_script = os.path.join(self.package, 'condor_configure')
install_args = [configure_script, '--install='+self.package,
'--prefix='+self.prefix, '--overwrite',
'--local-dir='+self.localdir, '--type='+self.role]
configure = subprocess.Popen(install_args, env=environment,
'--prefix='+self.prefix, '--overwrite', '--verbose',
'--local-dir='+self.localdir] #--ignore-missing-libs
if self.options['machine-role'].strip() == "manager":
self.role = "manager,submit"
elif self.options['machine-role'].strip() == "worker":
if not self.centralhost:
raise Exception("ERROR: Cannot deploy condor worker without specify the central manager")
self.role = "execute"
install_args += ['--central-manager='+self.centralhost,
'--type='+self.role]
configure = subprocess.Popen(install_args, env=self.environ,
stdout=subprocess.PIPE)
configure.wait()
configure.communicate()[0]
if configure.returncode is None or configure.returncode != 0:
return path_list
#Generate condor_configure file
condor_config = os.path.join(self.rootdir, 'etc/condor_config')
config_local = os.path.join(self.localdir, 'condor_config.local')
condor_configure = dict(condor_host=self.condor_host, releasedir=self.prefix,
localdir=self.localdir, config_local=config_local,
slapuser=slapuser, domain=self.domain, ipv6=self.ipv6,
diskspace=self.diskspace, javabin=self.javabin)
slapuser=slapuser, ipv6=self.ipv6,
diskspace=self.diskspace, javabin=self.javabin,
domain_name=domain_name)
destination = os.path.join(condor_config)
config = self.createFile(destination,
self.substituteTemplate(self.getTemplateFilename('condor_config.generic'),
condor_configure))
path_list.append(config)
#Update condor_configure.local file
#config_local_path = os.path.join(self.localdir, 'condor_config.local')
#create condor binary launcher for slapos
if not os.path.exists(self.wrapper_bin):
os.makedirs(self.wrapper_bin, int('0744', 8))
......@@ -107,7 +128,6 @@ class Recipe(GenericBaseRecipe):
current_exe = os.path.join(self.prefix, 'bin', binary)
wrapper = open(wrapper_location, 'w')
content = """#!%s
cd %s
export LD_LIBRARY_PATH=%s
export PATH=%s
export CONDOR_CONFIG=%s
......@@ -115,7 +135,8 @@ class Recipe(GenericBaseRecipe):
export CONDOR_IDS=%s
export HOME=%s
export HOSTNAME=%s
exec %s $*""" % (self.dash, self.wrapper_bin, self.linkdir, self.path,
exec %s $*""" % (self.dash,
self.environ['LD_LIBRARY_PATH'], self.environ['PATH'],
condor_config, self.prefix, slapuser, self.localdir,
self.condor_host, current_exe)
wrapper.write(content)
......@@ -129,7 +150,6 @@ class Recipe(GenericBaseRecipe):
current_exe = os.path.join(self.prefix, 'sbin', binary)
wrapper = open(wrapper_location, 'w')
content = """#!%s
cd %s
export LD_LIBRARY_PATH=%s
export PATH=%s
export CONDOR_CONFIG=%s
......@@ -137,11 +157,127 @@ class Recipe(GenericBaseRecipe):
export CONDOR_IDS=%s
export HOME=%s
export HOSTNAME=%s
exec %s $*""" % (self.dash, self.wrapper_sbin, self.linkdir, self.path,
exec %s $*""" % (self.dash,
self.environ['LD_LIBRARY_PATH'], self.environ['PATH'],
condor_config, self.prefix, slapuser, self.localdir,
self.condor_host, current_exe)
wrapper.write(content)
wrapper.close()
path_list.append(wrapper_location)
os.chmod(wrapper_location, 0744)
#update environment variable
self.environ['CONDOR_CONFIG'] = condor_config
self.environ['CONDOR_LOCATION'] = self.prefix
self.environ['CONDOR_IDS'] = slapuser
#generate script for start condor
start_condor = os.path.join(self.wrapperdir, 'start_condor')
#if self.role == "manager,submit":
# binary = os.path.join(self.wrapper_sbin, 'condor_master')
#elif self.role == "execute":
# binary = os.path.join(self.wrapper_bin, 'condor_run')
start_bin = os.path.join(self.wrapper_sbin, 'condor_master')
restart_bin = os.path.join(self.wrapper_sbin, 'condor_restart')
wrapper = self.createPythonScript(start_condor,
'%s.configure.condorStart' % __name__,
dict(start_bin=start_bin, restart_bin=restart_bin)
)
path_list.append(wrapper)
return path_list
class AppSubmit(GenericBaseRecipe):
"""Submit a condor job into an existing Condor master instance"""
def download(self, url, filename=None, md5sum=None):
cache = os.path.join(self.options['rootdirectory'].strip(), 'tmp')
if not os.path.exists(cache):
os.mkdir(cache)
downloader = zc.buildout.download.Download(self.buildout['buildout'],
hash_name=True, cache=cache)
path, _ = downloader(url, md5sum)
if filename:
name = os.path.join(cache, filename)
os.rename(path, name)
return name
return path
def copy_file(self, source, dest):
""""Copy file with source to dest with auto replace
return True if file has been copied and dest ha been replaced
"""
result = False
if source and os.path.exists(source):
if os.path.exists(dest):
if filecmp.cmp(dest, source):
return False
os.unlink(dest)
result = True
shutil.copy(source, dest)
return result
def getFiles(self):
"""This is used to download app files if necessary and update options values"""
self.options['file-number'] = 0
if self.options['files']:
files_list = self.options['files'].splitlines()
files_list = [f for f in files_list if f] #remove empty elements
self.options['file-number'] = len(files_list)
for i in range(self.options['file-number']):
value = files_list[i].strip()
pos = str(i)
if value and (value.startswith('http') or value.startswith('ftp')):
self.options['name_'+pos] = os.path.basename(urlparse.urlparse(value)[2])
self.options['file_'+pos] = self.download(value)
else:
self.options['file_'+pos] = value
os.chmod(self.options['file_'+pos], 0600)
executable = self.options['executable']
if executable and (executable.startswith('http') or executable.startswith('ftp')):
self.options['executable'] = self.download(executable,
self.options['executable-name'].strip())
os.chmod(self.options['executable'], 0700)
submit_file = self.options['description-file']
if submit_file and (submit_file.startswith('http') or submit_file.startswith('ftp')):
self.options['description-file'] = self.download(submit_file, 'submit')
os.chmod(self.options['description-file'], 0600)
def install(self):
path_list = []
#check if curent condor instance is an condor master
if self.options['machine-role'].strip() != "manager":
print "ERROR: cannot submit a job to a worker condor instance"
return []
#Setup directory
jobdir = self.options['job-dir'].strip()
appdir = os.path.join(jobdir, self.options['app-name'].strip())
submitfile = os.path.join(appdir, 'submit')
appname = self.options['app-name'].strip()
if not os.path.exists(jobdir):
os.mkdir(jobdir)
if not os.path.exists(appdir):
os.mkdir(appdir)
self.getFiles()
self.copy_file(self.options['executable'],
os.path.join(appdir, self.options['executable-name'].strip())
)
install = self.copy_file(self.options['description-file'], submitfile)
sig_install = os.path.join(appdir, '.install')
if install:
with open(sig_install, 'w') as f:
f.write('to_install')
for i in range(self.options['file-number']):
destination = os.path.join(appdir, self.options['name_'+str(i)])
if os.path.exists(destination):
os.unlink(destination)
os.symlink(self.options['file_'+str(i)], destination)
#generate wrapper for submitting job
condor_submit = os.path.join(self.options['bin'].strip(), 'condor_submit')
parameter = dict(submit=condor_submit, sig_install=sig_install,
submit_file='submit',
appname=appname, appdir=appdir)
submit_job = self.createPythonScript(
os.path.join(self.options['wrapper-dir'].strip(), appname),
'%s.configure.submitJob' % __name__, parameter
)
path_list.append(submit_job)
return path_list
\ No newline at end of file
##############################################################################
#
# Copyright (c) 2010 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import os
import subprocess
import time
def submitJob(args):
"""Run condor_submit (if needed) for job deployment"""
time.sleep(10)
print "Check if needed to submit %s job's" % args['appname']
if not os.path.exists(args['sig_install']):
print "Nothing for install or update...Exited"
return
# '-a', "log = out.log", '-a', "error = error.log",
launch_args = [args['submit'], '-verbose', args['submit_file']]
process = subprocess.Popen(launch_args, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, cwd=args['appdir'])
result = process.communicate()[0]
if process.returncode is None or process.returncode != 0:
print "Failed to execute condor_submit.\nThe error was: %s" % result
else:
os.unlink(args['sig_install'])
def condorStart(args):
"""Start Condor if deamons is currently stopped"""
result = os.system(args['restart_bin'])
if result != 0:
#process failled to restart that mean that condor deamons is not curently started
os.system(args['start_bin'])
\ No newline at end of file
......@@ -46,6 +46,9 @@
######################################################################
######################################################################
NO_DNS = True
DEFAULT_DOMAIN_NAME = %(domain_name)s
ENABLE_IPV6 = TRUE
## What machine is your central manager?
CONDOR_HOST = %(condor_host)s
##--------------------------------------------------------------------
......@@ -204,7 +207,7 @@ ALLOW_OWNER = $(FULL_HOSTNAME), $(ALLOW_ADMINISTRATOR)
## will be able to view the status of your pool and more easily help
## you install, configure or debug your Condor installation.
## It is important to have this defined.
ALLOW_READ = %(domain)s
ALLOW_READ = *
#ALLOW_READ = *.your.domain, *.cs.wisc.edu
#DENY_READ = *.bad.subnet, bad-machine.your.domain, 144.77.88.*
......@@ -322,7 +325,7 @@ ALLOW_READ_STARTD = $(ALLOW_READ), $(FLOCK_FROM)
##--------------------------------------------------------------------
## Do you want to use NFS for file access instead of remote system
## calls?
USE_NFS = True
#USE_NFS = False
## Do you want to use AFS for file access instead of remote system
## calls?
......
[buildout]
parts =
condor
app-submit
publish-connection-informations
eggs-directory = ${buildout:eggs-directory}
develop-eggs-directory = ${buildout:develop-eggs-directory}
offline = true
#Enable download
newest = false
offline = false
# Create all needed directories
[rootdirectory]
......@@ -12,6 +16,7 @@ recipe = slapos.cookbook:mkdirectory
etc = $${buildout:directory}/etc/
var = $${buildout:directory}/var/
condor = $${buildout:directory}/condor/
job = $${buildout:directory}/jobs/
bin = $${buildout:directory}/bin/
sbin = $${buildout:directory}/sbin/
srv = $${buildout:directory}/srv/
......@@ -30,32 +35,60 @@ wrapper = $${basedirectory:run}/condor
[condor]
recipe = slapos.cookbook:condor
config_wrapper = $${basedirectory:services}/configure
wrapper-dir = $${basedirectory:services}
ip = $${slap-network-information:global-ipv6}
package = ${condor:location}
rootdirectory = $${buildout:directory}
local-dir = $${rootdirectory:condor}
job-dir = $${rootdirectory:job}
perl-bin = ${perl:location}/bin
java-bin = ${java:location}/bin
bin = $${wrapperdirectory:wrapper}/bin/
sbin = $${wrapperdirectory:wrapper}/sbin/
#additionnal LD_LIBRARY_PATH and PATH for all condor executable files
linkdir = ${libexpat:location}/lib:${kerberos:location}/lib:${openldap:location}/lib:${zlib:location}/lib
path = ${perl:location}/bin:${java:location}/bin:${kerberos:location}/bin:${openldap:location}/bin:$PATH
dash = ${dash:location}/bin/dash
environment =
LD_LIBRARY_PATH=${libexpat:location}/lib:${kerberos:location}/lib:${openldap:location}/lib:${zlib:location}/lib
PATH=${perl:location}/bin:${java:location}/bin:${kerberos:location}/bin:${openldap:location}/bin:%(PATH)s
HOME=$${rootdirectory:condor}
HOSTNAME=$${slap-parameter:host_manager_name}
#Condor user parameter
central-host = $${slap-parameter:central-manager}
condor_host = $${slap-parameter:host_manager_name}
domain = $${slap-parameter:domain}
collector_name = $${slap-parameter:collector_name}
#Condor machine role: worker=submit,execute manager=manager,submit
machine-role = $${slap-parameter:role}
disk-space = $${slap-parameter:diskspace}
[app-submit]
<= condor
recipe = slapos.cookbook:condor.submit
app-name = $${slap-parameter:app-name}
#user can specifie a list files to use in application, one per line
files = $${slap-parameter:files}
description-file = $${slap-parameter:description-file}
executable = $${slap-parameter:executable}
executable-name = $${slap-parameter:executable-name}
# Publish all instance parameters (url of instance)
[publish-connection-informations]
recipe = slapos.cookbook:publish
ipv6_address = $${condor:ip}
instance_type = $${condor:machine-role}
condor_host = $${condor:condor_host}
[slap-parameter]
# Default values if not specified
host_manager_name = [$${slap-network-information:global-ipv6}]
domain = *
collector_name = SLAPOS-CONDOR-POOL
role = worker
role = manager
diskspace = 5
#submit application
central-manager =
app-name = condor_test
description-file = ${description-file:location}/${description-file:filename}
executable = ${executable:location}/${executable:filename}
executable-name = ${executable:filename}
#user can specifie a list of input file, one per line
files =
[buildout]
develop =
/srv/slapgrid/slappart19/srv//runner/project/slapos.github
parts =
condor
instance-egg
template
template-condor
description-file
executable
extends =
../../component/condor/buildout.cfg
download-cache = ${buildout:directory}/cache
[template]
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance.cfg
......@@ -25,8 +22,24 @@ md5sum = 9e9db6f4c5e38ce3fd45d43c2bf616a8
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance-condor.cfg
output = ${buildout:directory}/template-condor.cfg
md5sum = d74e38da04650ad70a8c0e909ccd6be0
md5sum = 9df787cc62e2dc6741c2faf1b1e0f315
mode = 0644
[description-file]
recipe = slapos.recipe.download
mode = 0644
url = ${:_profile_base_location_}/template/submit
filename = submit
location = ${buildout:parts-directory}/${:_buildout_section_name_}
md5sum = 8180d88348b89b55216f8dd4475a9eea
[executable]
recipe = slapos.recipe.download
mode = 0644
url = ${:_profile_base_location_}/template/simple
filename = simple
location = ${buildout:parts-directory}/${:_buildout_section_name_}
md5sum = c512f495cdd112bceb04feab7c909a10
[instance-egg]
recipe = zc.recipe.egg
......
Universe = vanilla
Executable = simple
Arguments = 4 10
Log = simple.log
Output = simple.$(Process).out
Error = simple.$(Process).error
Queue
Arguments = 4 11
Queue
Arguments = 4 12
Queue
\ No newline at end of file
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment