Study submission example

from pyHTC.Study import *
import pyHTC.toolkit as toolbox

# Definition of the study 

myName = 'myStudy'
myPath = '/afs/cern.ch/user/a/apoyet/public/pyHTC/example'
myExe = '/afs/cern.ch/user/m/mad/bin/madx'
mySubFileName = 'mySubFile'

myStudy = StudyObj(myName, myPath, myExe, mySubFileName, input_dir='input/', output_dir='output/', error_dir='error/',
                   log_dir = 'log/')

# One has now to create the input files... 

myTemplate = 'myTemplate.madx'
myMaskedParam = toolbox.getMaskedParameterList(myTemplate, tag='%MASKED_')
print(myMaskedParam)

# Definition of the parameters
# NB : the number of parameters doesn't matter

myParam = {'Q1' : [.25, .30], 'Q2' : [.26, .28, .29]}

myStudy.define_study(myParam)

# see how you can input DF or dict

# One can access the parameters and their values

print('The parameters are : {}'.format(myStudy.parameters_keys))
print('Their values are : {}'.format(myStudy.parameters_values))
print('Printing in full : {}'.format(myStudy.parameters))

The parameters are : ['Q1', 'Q2']
Their values are : [[0.25, 0.3], [0.26, 0.28, 0.29]]
Printing in full : {'Q1': [0.25, 0.3], 'Q2': [0.26, 0.28, 0.29]}

# Creation of the input files

for x in myStudy.parameters['Q1']:
    for y in myStudy.parameters['Q2']:
        myMachineParam = {}
        myMachineParam.update({
            '%MASKED_Q1' : x,
            '%MASKED_Q2' : y, 
            '%MASKED_output_file' : '/afs/cern.ch/user/a/apoyet/public/pyHTC/example/mad_output/ptc_out_{0}_{1}.txt'.format(x,y)
        })
        myInputFile = 'input/{0}_{1}_{2}.in'.format(myStudy.name, x, y)
        toolbox.unmask(myTemplate, myMaskedParam, myMachineParam, myInputFile)

# Creating the submission file corresponding to the STUDY 
# NB : MULTIPLE JOBS SUBMISSION

myStudy.submit2file(myStudy.submit2str())

# One can display the submission file

myStudy.display_subfile()

executable = /afs/cern.ch/user/m/mad/bin/madx
input = $(input_file)
arguments = $(ClusterId) $(ProcId)
output = output/myStudy.$(ClusterId).$(ProcId).out
error = error/myStudy.$(ClusterId).$(ProcId).err
log = log/myStudy.$(ClusterId).log
universe = vanilla
queue input_file matching files /afs/cern.ch/user/a/apoyet/public/pyHTC/example/input/myStudy_*.in

# And...... SUBMISSION

myStudy.submit2HTCondor()

Submitting job(s)......
6 job(s) submitted to cluster 3652946.

# Monitoring the jobs... :) 

myStudy.condor_q()

-- Schedd: bigbird16.cern.ch : <188.184.90.62:9618?... @ 07/22/19 14:45:40
OWNER BATCH_NAME      SUBMITTED   DONE   RUN    IDLE   HOLD  TOTAL JOB_IDS

0 jobs; 0 completed, 0 removed, 0 idle, 0 running, 0 held, 0 suspended

# The idea then is to generate a pandas DataFrame containing the different points (or JOBS) of the study
# The DF will be used as a reference afterwards to retrieve which job was made which which parameters
# It should therefore contains the paths of the corresponding files

df = myStudy.get_studyDF()

# Let's print the DF

df

	Q1	Q2	Input	Output	Error	Log	ProcID
myStudy_0.25_0.26	0.25	0.26	input/myStudy_0.25_0.26.in	output/myStudy.3652857.0.out	error/myStudy.3652857.0.err	log/myStudy.3652857.log	0
myStudy_0.25_0.28	0.25	0.28	input/myStudy_0.25_0.28.in	output/myStudy.3652857.1.out	error/myStudy.3652857.1.err	log/myStudy.3652857.log	1
myStudy_0.25_0.29	0.25	0.29	input/myStudy_0.25_0.29.in	output/myStudy.3652857.2.out	error/myStudy.3652857.2.err	log/myStudy.3652857.log	2
myStudy_0.3_0.26	0.3	0.26	input/myStudy_0.3_0.26.in	output/myStudy.3652857.3.out	error/myStudy.3652857.3.err	log/myStudy.3652857.log	3
myStudy_0.3_0.28	0.3	0.28	input/myStudy_0.3_0.28.in	output/myStudy.3652857.4.out	error/myStudy.3652857.4.err	log/myStudy.3652857.log	4
myStudy_0.3_0.29	0.3	0.29	input/myStudy_0.3_0.29.in	output/myStudy.3652857.5.out	error/myStudy.3652857.5.err	log/myStudy.3652857.log	5