Study submission example

from pyHTC.Study import *
import pyHTC.toolkit as toolbox
# Definition of the study 

myName = 'myStudy'
myPath = '/afs/cern.ch/user/a/apoyet/public/pyHTC/example'
myExe = '/afs/cern.ch/user/m/mad/bin/madx'
mySubFileName = 'mySubFile'

myStudy = StudyObj(myName, myPath, myExe, mySubFileName, input_dir='input/', output_dir='output/', error_dir='error/',
                   log_dir = 'log/')
# One has now to create the input files... 

myTemplate = 'myTemplate.madx'
myMaskedParam = toolbox.getMaskedParameterList(myTemplate, tag='%MASKED_')
print(myMaskedParam)
# Definition of the parameters
# NB : the number of parameters doesn't matter

myParam = {'Q1' : [.25, .30], 'Q2' : [.26, .28, .29]}

myStudy.define_study(myParam)

# see how you can input DF or dict
# One can access the parameters and their values

print('The parameters are : {}'.format(myStudy.parameters_keys))
print('Their values are : {}'.format(myStudy.parameters_values))
print('Printing in full : {}'.format(myStudy.parameters))
The parameters are : ['Q1', 'Q2']
Their values are : [[0.25, 0.3], [0.26, 0.28, 0.29]]
Printing in full : {'Q1': [0.25, 0.3], 'Q2': [0.26, 0.28, 0.29]}
# Creation of the input files

for x in myStudy.parameters['Q1']:
    for y in myStudy.parameters['Q2']:
        myMachineParam = {}
        myMachineParam.update({
            '%MASKED_Q1' : x,
            '%MASKED_Q2' : y, 
            '%MASKED_output_file' : '/afs/cern.ch/user/a/apoyet/public/pyHTC/example/mad_output/ptc_out_{0}_{1}.txt'.format(x,y)
        })
        myInputFile = 'input/{0}_{1}_{2}.in'.format(myStudy.name, x, y)
        toolbox.unmask(myTemplate, myMaskedParam, myMachineParam, myInputFile)
# Creating the submission file corresponding to the STUDY 
# NB : MULTIPLE JOBS SUBMISSION

myStudy.submit2file(myStudy.submit2str())
# One can display the submission file

myStudy.display_subfile()
executable = /afs/cern.ch/user/m/mad/bin/madx
input = $(input_file)
arguments = $(ClusterId) $(ProcId)
output = output/myStudy.$(ClusterId).$(ProcId).out
error = error/myStudy.$(ClusterId).$(ProcId).err
log = log/myStudy.$(ClusterId).log
universe = vanilla
queue input_file matching files /afs/cern.ch/user/a/apoyet/public/pyHTC/example/input/myStudy_*.in
# And...... SUBMISSION

myStudy.submit2HTCondor()
Submitting job(s)......
6 job(s) submitted to cluster 3652946.
# Monitoring the jobs... :) 

myStudy.condor_q()
-- Schedd: bigbird16.cern.ch : <188.184.90.62:9618?... @ 07/22/19 14:45:40
OWNER BATCH_NAME      SUBMITTED   DONE   RUN    IDLE   HOLD  TOTAL JOB_IDS

0 jobs; 0 completed, 0 removed, 0 idle, 0 running, 0 held, 0 suspended
# The idea then is to generate a pandas DataFrame containing the different points (or JOBS) of the study
# The DF will be used as a reference afterwards to retrieve which job was made which which parameters
# It should therefore contains the paths of the corresponding files

df = myStudy.get_studyDF()
# Let's print the DF

df
Q1 Q2 Input Output Error Log ProcID
myStudy_0.25_0.26 0.25 0.26 input/myStudy_0.25_0.26.in output/myStudy.3652857.0.out error/myStudy.3652857.0.err log/myStudy.3652857.log 0
myStudy_0.25_0.28 0.25 0.28 input/myStudy_0.25_0.28.in output/myStudy.3652857.1.out error/myStudy.3652857.1.err log/myStudy.3652857.log 1
myStudy_0.25_0.29 0.25 0.29 input/myStudy_0.25_0.29.in output/myStudy.3652857.2.out error/myStudy.3652857.2.err log/myStudy.3652857.log 2
myStudy_0.3_0.26 0.3 0.26 input/myStudy_0.3_0.26.in output/myStudy.3652857.3.out error/myStudy.3652857.3.err log/myStudy.3652857.log 3
myStudy_0.3_0.28 0.3 0.28 input/myStudy_0.3_0.28.in output/myStudy.3652857.4.out error/myStudy.3652857.4.err log/myStudy.3652857.log 4
myStudy_0.3_0.29 0.3 0.29 input/myStudy_0.3_0.29.in output/myStudy.3652857.5.out error/myStudy.3652857.5.err log/myStudy.3652857.log 5