41 log.debug(
"CondorJobs:__init__")
52 log.debug(
"CondorJobs:submitJob")
53 clusterexp = re.compile(
"1 job\(s\) submitted to cluster (\d+).")
55 submitRequest =
"condor_submit %s" % condorFile
57 pop = os.popen(submitRequest,
"r")
62 num = clusterexp.findall(line)
65 print "submitted job # %s as file %s" % (num[0], condorFile)
78 log.debug(
"CondorJobs:waitForJobToRun")
80 queueExp = re.compile(
"\S+")
82 print "waiting for job %s to run." % num
87 pop = os.popen(
"condor_q",
"r")
89 if (secondsWaited > 0)
and ((secondsWaited % 60) == 0):
90 minutes = secondsWaited/60
91 print "waited %d minute%s so far. still waiting for job %s to run." % ((secondsWaited / 60), (
"" if (minutes == 1)
else "s"), num)
96 values = queueExp.findall(line)
100 if (values[0] == jobNum):
101 cJobSeen = cJobSeen + 1
103 if (values[0] == jobNum)
and (runstate ==
'R'):
105 print "Job %s is now being run." % num
107 if (values[0] == jobNum)
and (runstate ==
'H'):
110 print "Job %s is being held. Please review the logs." % num
112 if (values[0] == jobNum)
and (runstate ==
'X'):
116 print "Saw job %s, but it was being aborted" % num
118 if (values[0] == jobNum)
and (runstate ==
'C'):
121 print "Job %s is being cancelled." % num
125 if (cJobSeen > 0)
and (bJobSeenNow ==
False):
127 print "Was monitoring job %s, but it exitted." % num
132 secondsWaited = secondsWaited + 1
136 log.debug(
"CondorJobs:waitForAllJobsToRun")
137 queueExp = re.compile(
"\S+")
138 jobList = list(numList)
140 pop = os.popen(
"condor_q",
"r")
142 line = pop.readline()
145 values = queueExp.findall(line)
150 for jobEntry
in jobList:
151 jobId =
"%s.0" % jobEntry
152 if (jobNum == jobId)
and (runstate ==
'R'):
153 jobList = [job for job
in jobList
if job[:] != jobEntry]
154 if len(jobList) == 0:
159 if (jobNum == jobEntry)
and (runstate ==
'H'):
168 log.debug(
"CondorJobs: submitCondorDag "+filename)
178 clusterexp = re.compile(
"1 job\(s\) submitted to cluster (\d+).")
179 cmd =
"condor_submit_dag %s" % filename
181 process = subprocess.Popen(cmd.split(), shell=
False, stdout=subprocess.PIPE)
183 line = process.stdout.readline()
188 line = process.stdout.readline()
191 num = clusterexp.findall(line)
194 stdoutdata, stderrdata = process.communicate()
197 stdoutdata, stderrdata = process.communicate()
202 log.debug(
"CondorJobs: killCondorId"+str(cid))
203 cmd =
"condor_rm "+str(cid)
204 process = subprocess.Popen(cmd.split(), shell=
False, stdout=subprocess.PIPE)
205 line = process.stdout.readline()
209 line = process.stdout.readline()
211 stdoutdata, stderrdata = process.communicate()
215 jobNum =
"%s.0" % cid
216 queueExp = re.compile(
"\S+")
217 process = subprocess.Popen(
"condor_q", shell=
False, stdout=subprocess.PIPE)
219 line = process.stdout.readline()
222 values = queueExp.findall(line)
225 if (values[0] == jobNum):
227 stdoutdata, stderrdata = process.communicate()
230 stdoutdata, stderrdata = process.communicate()
def killCondorId
kill the HTCondor job with a this id
def submitJob
submit a condor file, and return the job number associated with it.
def waitForAllJobsToRun
waits for all jobs to enter the run state
def condorSubmitDag
submit a condor dag and return its cluster number
CondorJobs - handles interaction with HTCondor This class is highly dependent on the output of the co...
def waitForJobToRun
wait for a condor job to reach it's run state.
def isJobAlive
check to see if the job with id "cid" is still alive