Skip to content

Commit a4c6557

Browse files
committed
Merge pull request #178 from kreczko/create-qcd-sample
Create qcd sample script
2 parents 45b6e2d + 6a62aaa commit a4c6557

File tree

4 files changed

+239
-35
lines changed

4 files changed

+239
-35
lines changed

bin/qcd_from_data

100644100755
Lines changed: 141 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,159 @@ data_file:
1010
mc_file:
1111
path to MC file, used to normalisation of data histograms
1212
13-
output_file:
14-
path to the output file
13+
subtract_other_samples:
14+
dictionary of (sample, file) to be removed from the template control region (data_file)
15+
16+
histogram_path:
17+
source of histograms. All sub-paths will be considered (recursive)
18+
19+
ignore_subpaths:
20+
sub-paths of histogram_path to be ignored
21+
22+
normalisation_keyword_in_path:
23+
part of the path that determines the normalisation (usually signal) region
24+
25+
shape_keyword_in_path:
26+
part of the path that determines the template (usually control/sideband) region
27+
28+
shape_btag:
29+
b-tag multiplicity to be used for the template region
30+
31+
shape_btag_for_exceptions:
32+
b-tag multiplicity to be used for the template region for exceptions (see shape_btag_exceptions)
1533
16-
data_histograms:
17-
paths to data histograms (from data file)
34+
shape_btag_exceptions:
35+
list of histogram names that have a different b-tag multiplicity for the template region
1836
19-
mc_histograms:
20-
paths to MC histograms (from MC file)
37+
remove_for_shape:
38+
part of the histogram name to be removed fromt the template histogram name.
39+
Useful for re-weighted histograms (mc-only).
40+
41+
output_file:
42+
path to the output file
2143
22-
output_histograms:
23-
paths of the output histograms
44+
Uses the data_file to extract the templates, removes other samples
45+
(subtract_other_samples) and normalises it according to mc_file.
2446
2547
'''
48+
from ROOT import gROOT
49+
gcd = gROOT.cd
50+
from optparse import OptionParser
51+
from tools.file_utilities import write_data_to_JSON, read_data_from_JSON
52+
from tools.ROOT_utililities import root_mkdir, find_btag, get_histogram_dictionary
53+
from tools.hist_utilities import clean_control_region
54+
from rootpy.io import root_open
2655

2756
def main():
2857
print "Welcome to the QCD-from-data merging script"
2958
print 'Please take a seat while the code is being developed.'
3059
print 'Once finished you will be able to create a single file using shapes from data and normalisation from MC'
3160
print 'In the meantime have a look at the script usage'
3261
print
33-
print __doc__
62+
options, input_values_sets, json_input_files = parse_options()
63+
if options.test:
64+
input_values_sets = [setup_test_values()]
65+
json_input_files = ['test.json']
66+
67+
for input_values, json_file in zip(input_values_sets, json_input_files):
68+
print 'Processing', json_file
69+
create_qcd_file(input_values)
70+
71+
def parse_options():
72+
parser = OptionParser( __doc__ )
73+
parser.add_option( "-t", "--test", dest = "test", action = "store_true",
74+
help = "Run with test values and write them to test.json" )
75+
( options, args ) = parser.parse_args()
76+
77+
input_values_sets = []
78+
json_input_files = []
79+
add_set = input_values_sets.append
80+
add_json_file = json_input_files.append
81+
if not options.test:
82+
for arg in args:
83+
input_values = read_data_from_JSON(arg)
84+
add_set(input_values)
85+
add_json_file(arg)
86+
87+
return options, input_values_sets, json_input_files
88+
89+
def create_qcd_file(input_values):
90+
data_file = input_values['data_file']
91+
mc_file = input_values['mc_file']
92+
histogram_path = input_values['histogram_path']
93+
shape_keyword_in_path = input_values['shape_keyword_in_path']
94+
shape_btag = input_values['shape_btag']
95+
shape_btag_for_exceptions = input_values['shape_btag_for_exceptions']
96+
shape_btag_exceptions = input_values['shape_btag_exceptions']
97+
remove_for_shape = input_values['remove_for_shape']
98+
normalisation_keyword_in_path = input_values['normalisation_keyword_in_path']
99+
ignore_subpaths = input_values['ignore_subpaths']
100+
subtract_other_samples = input_values['subtract_other_samples']
101+
output_file = input_values['output_file']
102+
103+
104+
total_histograms = 0
105+
data_file_handle = root_open(data_file)
106+
get_shape_hist = data_file_handle.Get
107+
output = {}
108+
with root_open(mc_file) as f:
109+
for path,_,histograms in f.walk():
110+
ignore_path = False
111+
for subpath in ignore_subpaths:
112+
if subpath in path:
113+
ignore_path = True
114+
if not histogram_path in path or not histograms or ignore_path:
115+
continue
116+
for histogram in histograms:
117+
hist = f.Get(path + '/' + histogram)
118+
normalisation = hist.integral(overflow = True)
119+
shape_path = path.replace(normalisation_keyword_in_path, shape_keyword_in_path)
120+
# now swap the b-tag
121+
current_btag, _ = find_btag(histogram)
122+
is_exception = False
123+
for var in shape_btag_exceptions:
124+
if var in histogram:
125+
is_exception = True
126+
shape_histogram = histogram
127+
for r in remove_for_shape:
128+
shape_histogram = shape_histogram.replace(r, '')
129+
if is_exception:
130+
shape_histogram = shape_histogram.replace(current_btag, shape_btag_for_exceptions)
131+
else:
132+
shape_histogram = shape_histogram.replace(current_btag, shape_btag)
133+
gcd()
134+
output_hist = get_shape_hist(shape_path + '/' + shape_histogram).clone()
135+
other_samples = get_histogram_dictionary(shape_path + '/' + shape_histogram, subtract_other_samples)
136+
subtract_samples = other_samples.keys()
137+
other_samples['data'] = output_hist
138+
output_hist = clean_control_region(other_samples,
139+
subtract = subtract_samples)
140+
# scale the histogram
141+
n_entries_shape = output_hist.integral(overflow = True)
142+
scale_factor = 1
143+
if n_entries_shape > 0:
144+
if normalisation == 0:
145+
# bug fix for empty templates
146+
scale_factor = 1/n_entries_shape
147+
else:
148+
scale_factor = normalisation/n_entries_shape
149+
150+
output_hist.Scale(scale_factor)
151+
output[path + '/' + histogram] = output_hist
152+
total_histograms += len(histograms)
153+
154+
data_file_handle.close()
155+
output_file_handle = root_open(output_file, 'recreate')
156+
# probably faster to use TFileCache within the loop above.
157+
for path_with_hist, histogram in output.iteritems():
158+
histogram_name = path_with_hist.split('/')[-1]
159+
path = path_with_hist.replace('/' + histogram_name, '')
160+
root_mkdir(output_file_handle, path)
161+
output_file_handle.cd(path)
162+
histogram.write(histogram_name)
163+
output_file_handle.cd()
164+
output_file_handle.close()
165+
print 'Processed', total_histograms, 'histograms'
34166

35167
if __name__ == '__main__':
36168
main()
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"data_file": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/SingleElectron_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
3+
"mc_file": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/QCD_Electron_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
4+
"subtract_other_samples": {
5+
"VJets": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/VJets_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
6+
"TTJet": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/TTJet_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
7+
"SingleTop": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/SingleTop_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root"
8+
},
9+
"histogram_path": "TTbar_plus_X_analysis/EPlusJets/Ref selection/",
10+
"normalisation_keyword_in_path": "Ref selection",
11+
"shape_keyword_in_path": "QCDConversions",
12+
"shape_btag": "0btag",
13+
"shape_btag_for_exceptions": "1btag",
14+
"shape_btag_exceptions": ["angle_bl", "M_bl"],
15+
"remove_for_shape": ["_reweighted"],
16+
"ignore_subpaths": [
17+
"GenMET",
18+
"JetRes",
19+
"Vertices",
20+
"Ref selection/Jets"
21+
],
22+
"output_file": "QCD_Electron_from_conversions.root"
23+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"data_file": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/SingleMu_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
3+
"mc_file": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/QCD_Muon_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
4+
"subtract_other_samples": {
5+
"VJets": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/VJets_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
6+
"TTJet": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/TTJet_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root",
7+
"SingleTop": "/storage/TopQuarkGroup/results/histogramfiles/AN-14-071_5th_draft/8TeV/central/SingleTop_19584pb_PFElectron_PFMuon_PF2PATJets_PFMET.root"
8+
},
9+
"histogram_path": "TTbar_plus_X_analysis/MuPlusJets/Ref selection/",
10+
"normalisation_keyword_in_path": "Ref selection",
11+
"shape_keyword_in_path": "QCD non iso mu+jets ge3j",
12+
"shape_btag": "0btag",
13+
"shape_btag_for_exceptions": "1btag",
14+
"shape_btag_exceptions": ["angle_bl", "M_bl"],
15+
"remove_for_shape": ["_reweighted"],
16+
"ignore_subpaths": [
17+
"GenMET",
18+
"JetRes",
19+
"Vertices",
20+
"Ref selection/Jets"
21+
],
22+
"output_file": "QCD_Muon_from_noniso_ge3j.root"
23+
}

tools/ROOT_utils.py

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,16 @@
88
from ROOT import gROOT
99
gcd = gROOT.cd
1010
from config.summations_common import b_tag_bins_inclusive, b_tag_summations
11+
from config.summations_common import b_tag_bins_exclusive
1112

1213
def get_histogram_from_file( histogram_path, input_file ):
13-
current_btag = b_tag_bins_inclusive[0]
14-
found_btag = False
15-
16-
for b_tag in b_tag_bins_inclusive:
17-
if b_tag in histogram_path:
18-
current_btag = b_tag
19-
found_btag = True
20-
break
14+
current_btag, found_btag = find_btag(histogram_path)
2115

2216
root_file = File( input_file )
2317
get_histogram = root_file.Get
2418

2519

26-
if not found_btag:
20+
if not found_btag or not current_btag in b_tag_summations.keys():
2721
root_histogram = get_histogram( histogram_path )
2822
if not is_valid_histogram( root_histogram, histogram_path, input_file ):
2923
return
@@ -52,7 +46,6 @@ def is_valid_histogram( histogram, histogram_name, file_name ):
5246
return False
5347
return True
5448

55-
5649
# Reads a single histogram from each given rootFile
5750
# and returns a dictionary with the same naming as 'files'
5851
def get_histogram_dictionary( histogram_path, files = {} ):
@@ -71,17 +64,10 @@ def get_histograms_from_files( histogram_paths = [], files = {}, verbose = False
7164
histograms[sample] = {}
7265

7366
for histogram_path in histogram_paths:
74-
current_btag = b_tag_bins_inclusive[0]
75-
found_btag = False
76-
77-
for b_tag in b_tag_bins_inclusive:
78-
if b_tag in histogram_path:
79-
current_btag = b_tag
80-
found_btag = True
81-
break
67+
current_btag, found_btag = find_btag(histogram_path)
8268

8369
root_histogram = None
84-
if not found_btag:
70+
if not found_btag or not current_btag in b_tag_summations.keys():
8571
root_histogram = get_histogram( histogram_path )
8672
if not is_valid_histogram( root_histogram, histogram_path, input_file ):
8773
return
@@ -107,13 +93,6 @@ def get_histograms_from_files( histogram_paths = [], files = {}, verbose = False
10793
root_file.Close()
10894
return histograms
10995

110-
def root_file_mkdir( root_file, directory ):
111-
pointer_to_directory = root_file.Get( directory )
112-
if not pointer_to_directory:
113-
root_file.mkdir( directory ) # if directory = a/b/c this will only return a, but make complete path
114-
pointer_to_directory = root_file.Get( directory )
115-
return pointer_to_directory
116-
11796
def get_histogram_info_tuple( histogram_in_path ):
11897
histogram_name = histogram_in_path.split( '/' )[-1]
11998
directory = ''.join( histogram_in_path.rsplit( histogram_name, 1 )[:-1] )
@@ -126,3 +105,50 @@ def set_root_defaults( set_batch = True, msg_ignore_level = 1001 ):
126105
gROOT.SetBatch( set_batch )
127106
# ignore warnings
128107
gROOT.ProcessLine( 'gErrorIgnoreLevel = %d;' % msg_ignore_level )
108+
109+
def root_mkdir(file_handle, path):
110+
'''
111+
Equivalent to mkdir -p but for ROOT files.
112+
Will create all the directories necessary to complete the given path
113+
@param file_handle: file handle to an open ROOT file with write acccess
114+
@param path: the path to be written to the ROOT file
115+
'''
116+
file_handle.cd()
117+
118+
directories = []
119+
if '/' in path:
120+
directories = path.split('/')
121+
else:
122+
directories = [path]
123+
124+
current_dir = ''
125+
for directory in directories:
126+
if current_dir == '':
127+
current_dir = directory
128+
else:
129+
current_dir = current_dir + '/' + directory
130+
if root_exists(file_handle, current_dir):
131+
continue
132+
file_handle.mkdir(current_dir)
133+
134+
def root_exists(file_handle, path):
135+
pointer_to_directory = None
136+
try:
137+
pointer_to_directory = file_handle.GetDirectory( path )
138+
except:
139+
return False
140+
return not (pointer_to_directory is None)
141+
142+
def find_btag( histogram_path ):
143+
'''
144+
function to determine if the histogram path contains a valid b-tag
145+
multiplicity identifier (as specified in config.summations_common)
146+
Returns (found b-tag, True) or (default b-tag, False)
147+
'''
148+
for b_tag in b_tag_bins_inclusive:
149+
if b_tag in histogram_path:
150+
return b_tag, True
151+
for b_tag in b_tag_bins_exclusive:
152+
if b_tag in histogram_path:
153+
return b_tag, True
154+
return b_tag_bins_inclusive[0], False

0 commit comments

Comments
 (0)