-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathextract_peaks.py
More file actions
227 lines (196 loc) · 9.2 KB
/
extract_peaks.py
File metadata and controls
227 lines (196 loc) · 9.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# Copyright © 2018 Liu Lab, Beijing Computational Science Research Center <http://liulab.csrc.ac.cn>
# Authors:
# 2017-2018 Xuanxuan Li <lxx2011011580@gmail.com>
# 2017-2018 Chufeng Li <chufengl@asu.edu>
# 2017 Richard Kirian <rkirian@asu.edu>
# 2017 Nadia Zatsepin <Nadia.Zatsepin@asu.edu>
# 2017 John Spence <spence@asu.edu>
# 2017 Haiguang Liu <hgliu@csrc.ac.cn>
# This file is part of SPIND.
# SPIND is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# SPIND is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with SPIND. If not, see <http://www.gnu.org/licenses/>.
"""
Usage:
extract_peaks.py -i cxi_file_or_dir -g geometry_file -d det_dist [options]
Options:
-h --help Show this screen.
-i cxi_file_or_dir CXI file or directory.
-g geometry_file Geometry file in cheetah, crystfel or psana format.
-d det_dist Detector distance in meters.
-p pixel_size Pixel size in meters [default: 110E-6].
-o output_dir Output directory [default: output].
--sort-by=<metric> Sort peaks by certain metric [default: SNR].
--res=<resolution> Resolution threshold for high priority peaks in angstrom [default: 4.5].
--output-format=output_format Ourput format, multiple plain txt files or single hdf5 files [default: h5].
--scale Special option for cxij6916 for filtered region scaling.
"""
from docopt import docopt
import os
import h5py
from tqdm import tqdm # show a progress bar for loops
import numpy as np
from math import sin, atan, sqrt, exp, cos
import glob
h = 4.135667662E-15 # Planck constant in eV*s
c = 2.99792458E8 # light speed in meters per sec
class Peak(object):
"""docstring for peak"""
def __init__(self, x, y, res, SNR, total_intensity, encoder_value, photon_energy):
"""Summary
Args:
x (double): x coordinate in assembled detector in pixels.
y (double): y coordinate in assembled detector in pixels.
res (double): resolution in angstrom.
SNR (double): peak signal to noise ratio.
total_intensity (double): peak total intensity.
encoder_value (double): camera offset.
photon_energy (double): photom energy in eV.
"""
super(Peak, self).__init__()
self.x = x
self.y = y
self.res = res
self.SNR = SNR
self.total_intensity = total_intensity
self.encoder_value = encoder_value
self.photon_energy = photon_energy
def parse_peak_list_filename(filename):
basename = os.path.splitext(os.path.basename(filename))[0]
experiment, run_id, class_id = basename.split('-')
class_id = class_id.split('e')[0] # remove event id
return experiment, run_id, class_id
def load_geom(filename):
print "load %s" % filename
ext = os.path.splitext(filename)[1]
if ext == '.h5':
f = h5py.File(filename, 'r')
return f['x'].value, f['y'].value, f['z'].value
elif ext == '.geom':
from psgeom import camera
cspad = camera.Cspad.from_crystfel_file(filename)
cspad.to_cheetah_file('.geom.h5')
f = h5py.File('.geom.h5', 'r')
return f['x'].value, f['y'].value, f['z'].value
elif ext == '.psana':
from psgeom import camera
cspad = camera.Cspad.from_psana_file(filename)
cspad.to_cheetah_file('.geom.h5')
f = h5py.File('.geom.h5', 'r')
return f['x'].value, f['y'].value, f['z'].value
else:
print('Wrong geometry: %s. You must provide Cheetah, CrystFEL or psana geometry file.')
return None
def extract_peaks(cxi_file, output_dir, sort_by='SNR',
res_threshold=4.5E-10, scale=False,
output_format='h5'):
data = h5py.File(cxi_file,'r')
basename, ext = os.path.splitext(os.path.basename(cxi_file))
# mkdir for output
if not os.path.isdir(output_dir):
os.makedirs('%s' % output_dir)
nb_events = data['entry_1/data_1/data'].shape[0]
print('Processing %s with geometry %s' % (cxi_file, geom_file))
peak_lists = []
dataset_names = []
for event_id in tqdm(range(nb_events)):
if output_format == 'txt':
output = output_dir + '/' + basename + '-e%04d.txt' % event_id
elif output_format == 'h5':
dataset_names.append(basename + '-e%04d' % event_id)
nb_peaks = data['/entry_1/result_1/nPeaks'][event_id]
peak_list = []
for peak_id in range(nb_peaks):
rawX = int(data['/entry_1/result_1/peakXPosRaw'][event_id][peak_id])
rawY = int(data['/entry_1/result_1/peakYPosRaw'][event_id][peak_id])
assX = geom_x[rawY][rawX] / pixel_size
assY = geom_y[rawY][rawX] / pixel_size
total_intensity = data['/entry_1/result_1/peakTotalIntensity'][event_id][peak_id]
SNR = data['/entry_1/result_1/peakSNR'][event_id][peak_id]
encoder_value = data['/LCLS/detector_1/EncoderValue'][event_id]
photon_energy = data['LCLS/photon_energy_eV'][event_id]
# calculate resolution
r = sqrt(assX ** 2. + assY ** 2.)
lam = h * c / photon_energy
res = lam / (sin(0.5 * atan(r * pixel_size / det_dist)) * 2)
if scale:
zn_thickness = 50E-6
absorption_length = 41.73E-6
theta = atan(r * pixel_size / det_dist) # scattering angle
scaling = exp(zn_thickness / absorption_length / cos(theta))
total_intensity *= scaling
peak = Peak(assX, assY, res, SNR, total_intensity, encoder_value, photon_energy)
peak_list.append(peak)
if sort_by == 'SNR':
peak_list.sort(key=lambda peak: peak.SNR, reverse=True)
elif sort_by == 'total_intensity':
peak_list.sort(key=lambda peak: peak.total_intensity, reverse=True)
elif sort_by == 'res':
peak_list.sort(key=lambda peak: peak.res, reverse=False)
else:
print('Unimplemented sort strategy: %s' % sort_by)
return None
HP_ids = [] # high priority peak indices
LP_ids = [] # low priority peak indices
for peak_id in range(len(peak_list)):
peak = peak_list[peak_id]
if peak.res > res_threshold:
HP_ids.append(peak_id)
else:
LP_ids.append(peak_id)
# rearange peaks order according to the priority
new_peak_list = []
for peak_id in HP_ids:
new_peak_list.append(peak_list[peak_id])
for peak_id in LP_ids:
new_peak_list.append(peak_list[peak_id])
peak_lists.append(new_peak_list)
# write to file
if output_format == 'txt':
f = open(output, 'w')
for peak in new_peak_list:
f.write('%.5e %.5e %.5e %.5e %.5e %.5e %5.2e\n' %
(peak.x, peak.y, peak.total_intensity,
peak.SNR, peak.photon_energy, peak.encoder_value,
peak.res))
if output_format == 'h5':
output = output_dir + '/' + basename + '.h5'
f = h5py.File(output)
for i, peak_list in enumerate(peak_lists):
peak_array = []
for peak in peak_list:
peak_array.append([peak.x, peak.y, peak.total_intensity,
peak.SNR, peak.photon_energy, peak.encoder_value,
peak.res])
f.create_dataset(dataset_names[i], data=np.asarray(peak_array))
f.close()
if __name__ == '__main__':
argv = docopt(__doc__)
cxi_file_or_dir = argv['-i']
geom_file = argv['-g']
sort_by = argv['--sort-by']
res_threshold = float(argv['--res']) * 1.E-10 # convert resolution in angstroms to meters.
output_dir = argv['-o']
det_dist = float(argv['-d'])
pixel_size = float(argv['-p'])
output_format = argv['--output-format']
scale = argv['--scale']
# load geometry
geom_x, geom_y, geom_z = load_geom(geom_file)
# load cxi file
if os.path.isdir(cxi_file_or_dir): # extract peaks from multiple cxi files
cxi_files = glob.glob(cxi_file_or_dir + '/' + '*.cxi')
else: # extract peaks from single cxi file
cxi_files = [cxi_file_or_dir, ]
for cxi_file in cxi_files:
experiment, run_id, class_id = parse_peak_list_filename(cxi_file)
extract_peaks(cxi_file, '%s/%s/%s' % (output_dir, run_id, class_id),
sort_by=sort_by, res_threshold=res_threshold,
scale=scale, output_format=output_format)