@@ -10,27 +10,159 @@ data_file:
1010mc_file:
1111 path to MC file, used to normalisation of data histograms
1212
13- output_file:
14- path to the output file
13+ subtract_other_samples:
14+ dictionary of (sample, file) to be removed from the template control region (data_file)
15+
16+ histogram_path:
17+ source of histograms. All sub-paths will be considered (recursive)
18+
19+ ignore_subpaths:
20+ sub-paths of histogram_path to be ignored
21+
22+ normalisation_keyword_in_path:
23+ part of the path that determines the normalisation (usually signal) region
24+
25+ shape_keyword_in_path:
26+ part of the path that determines the template (usually control/sideband) region
27+
28+ shape_btag:
29+ b-tag multiplicity to be used for the template region
30+
31+ shape_btag_for_exceptions:
32+ b-tag multiplicity to be used for the template region for exceptions (see shape_btag_exceptions)
1533
16- data_histograms :
17- paths to data histograms (from data file)
34+ shape_btag_exceptions :
35+ list of histogram names that have a different b-tag multiplicity for the template region
1836
19- mc_histograms:
20- paths to MC histograms (from MC file)
37+ remove_for_shape:
38+ part of the histogram name to be removed fromt the template histogram name.
39+ Useful for re-weighted histograms (mc-only).
40+
41+ output_file:
42+ path to the output file
2143
22- output_histograms:
23- paths of the output histograms
44+ Uses the data_file to extract the templates, removes other samples
45+ (subtract_other_samples) and normalises it according to mc_file.
2446
2547'''
48+ from ROOT import gROOT
49+ gcd = gROOT .cd
50+ from optparse import OptionParser
51+ from tools .file_utilities import write_data_to_JSON , read_data_from_JSON
52+ from tools .ROOT_utililities import root_mkdir , find_btag , get_histogram_dictionary
53+ from tools .hist_utilities import clean_control_region
54+ from rootpy .io import root_open
2655
2756def main ():
2857 print "Welcome to the QCD-from-data merging script"
2958 print 'Please take a seat while the code is being developed.'
3059 print 'Once finished you will be able to create a single file using shapes from data and normalisation from MC'
3160 print 'In the meantime have a look at the script usage'
3261 print
33- print __doc__
62+ options , input_values_sets , json_input_files = parse_options ()
63+ if options .test :
64+ input_values_sets = [setup_test_values ()]
65+ json_input_files = ['test.json' ]
66+
67+ for input_values , json_file in zip (input_values_sets , json_input_files ):
68+ print 'Processing' , json_file
69+ create_qcd_file (input_values )
70+
71+ def parse_options ():
72+ parser = OptionParser ( __doc__ )
73+ parser .add_option ( "-t" , "--test" , dest = "test" , action = "store_true" ,
74+ help = "Run with test values and write them to test.json" )
75+ ( options , args ) = parser .parse_args ()
76+
77+ input_values_sets = []
78+ json_input_files = []
79+ add_set = input_values_sets .append
80+ add_json_file = json_input_files .append
81+ if not options .test :
82+ for arg in args :
83+ input_values = read_data_from_JSON (arg )
84+ add_set (input_values )
85+ add_json_file (arg )
86+
87+ return options , input_values_sets , json_input_files
88+
89+ def create_qcd_file (input_values ):
90+ data_file = input_values ['data_file' ]
91+ mc_file = input_values ['mc_file' ]
92+ histogram_path = input_values ['histogram_path' ]
93+ shape_keyword_in_path = input_values ['shape_keyword_in_path' ]
94+ shape_btag = input_values ['shape_btag' ]
95+ shape_btag_for_exceptions = input_values ['shape_btag_for_exceptions' ]
96+ shape_btag_exceptions = input_values ['shape_btag_exceptions' ]
97+ remove_for_shape = input_values ['remove_for_shape' ]
98+ normalisation_keyword_in_path = input_values ['normalisation_keyword_in_path' ]
99+ ignore_subpaths = input_values ['ignore_subpaths' ]
100+ subtract_other_samples = input_values ['subtract_other_samples' ]
101+ output_file = input_values ['output_file' ]
102+
103+
104+ total_histograms = 0
105+ data_file_handle = root_open (data_file )
106+ get_shape_hist = data_file_handle .Get
107+ output = {}
108+ with root_open (mc_file ) as f :
109+ for path ,_ ,histograms in f .walk ():
110+ ignore_path = False
111+ for subpath in ignore_subpaths :
112+ if subpath in path :
113+ ignore_path = True
114+ if not histogram_path in path or not histograms or ignore_path :
115+ continue
116+ for histogram in histograms :
117+ hist = f .Get (path + '/' + histogram )
118+ normalisation = hist .integral (overflow = True )
119+ shape_path = path .replace (normalisation_keyword_in_path , shape_keyword_in_path )
120+ # now swap the b-tag
121+ current_btag , _ = find_btag (histogram )
122+ is_exception = False
123+ for var in shape_btag_exceptions :
124+ if var in histogram :
125+ is_exception = True
126+ shape_histogram = histogram
127+ for r in remove_for_shape :
128+ shape_histogram = shape_histogram .replace (r , '' )
129+ if is_exception :
130+ shape_histogram = shape_histogram .replace (current_btag , shape_btag_for_exceptions )
131+ else :
132+ shape_histogram = shape_histogram .replace (current_btag , shape_btag )
133+ gcd ()
134+ output_hist = get_shape_hist (shape_path + '/' + shape_histogram ).clone ()
135+ other_samples = get_histogram_dictionary (shape_path + '/' + shape_histogram , subtract_other_samples )
136+ subtract_samples = other_samples .keys ()
137+ other_samples ['data' ] = output_hist
138+ output_hist = clean_control_region (other_samples ,
139+ subtract = subtract_samples )
140+ # scale the histogram
141+ n_entries_shape = output_hist .integral (overflow = True )
142+ scale_factor = 1
143+ if n_entries_shape > 0 :
144+ if normalisation == 0 :
145+ # bug fix for empty templates
146+ scale_factor = 1 / n_entries_shape
147+ else :
148+ scale_factor = normalisation / n_entries_shape
149+
150+ output_hist .Scale (scale_factor )
151+ output [path + '/' + histogram ] = output_hist
152+ total_histograms += len (histograms )
153+
154+ data_file_handle .close ()
155+ output_file_handle = root_open (output_file , 'recreate' )
156+ # probably faster to use TFileCache within the loop above.
157+ for path_with_hist , histogram in output .iteritems ():
158+ histogram_name = path_with_hist .split ('/' )[- 1 ]
159+ path = path_with_hist .replace ('/' + histogram_name , '' )
160+ root_mkdir (output_file_handle , path )
161+ output_file_handle .cd (path )
162+ histogram .write (histogram_name )
163+ output_file_handle .cd ()
164+ output_file_handle .close ()
165+ print 'Processed' , total_histograms , 'histograms'
34166
35167if __name__ == '__main__' :
36168 main ()
0 commit comments