-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsmall_data.py
More file actions
114 lines (97 loc) · 4.55 KB
/
small_data.py
File metadata and controls
114 lines (97 loc) · 4.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
"""
Store scalar attribute for SPIData patterns such as score in classification. The `paths` and `frames` are required fields. Any other customized fields can be added, but should be a list.
## REQUIRED FIELDS ##
'paths'(list of str):
Path for each record. Must be SPIData format.
'frames'(list of int):
Frame of each record.
## CUSTOMIZED FIELDS ##
# example given below #
'scores'(list of float):
Score for each record, which may be the classification score.
"""
import h5py
import numpy as np
class SmallData(object):
"""docstring for SmallData"""
def __init__(self, output='small-data.h5', smallDataNames=None):
"""Summary
Args:
output (str, optional): Output filename. Default is 'output.h5'.
smallDataNames (None, optional): List of srings of customized small data names. Must provide 1 datanames at least like ['scores', ]
Raises:
Exception: smallDataNames must be a list of strings. error raised if not.
"""
super(SmallData, self).__init__()
self.output = output
if smallDataNames is None:
raise Exception('Must provide one smallDataNames at least, like ["scores",].')
sys.exit()
if not isinstance(smallDataNames, list):
raise Exception('smallDataNames must be list of strings, like ["scores",].')
sys.exit()
self.smallDataNames = smallDataNames
self.h5File = self.initH5()
self.paths = []
self.frames = []
self.smallDataDict = {}
for dataname in self.smallDataNames:
self.smallDataDict[dataname] = []
def initH5(self):
h5File = h5py.File(self.output, 'w')
print('%s created' %self.output)
return h5File
def addRecord(self, path, frame, **kwargs):
"""Summary
Args:
path (str): Corresponding filepath of this record.
frame (int): Correspoding frame of this record in the file.
**kwargs (TYPE): key-value pairs. The keys must match the small data names. Use singular form.
Returns:
TYPE: None
Raises:
Exception: Keys in kwargs must match the small data. Error raised if not.
"""
keys = kwargs.keys()
# plural_keys = []
# for key in keys:
# plural_keys.append(key+'s')
if not set(keys) == set(self.smallDataNames):
raise Exception('Record(%s) not match small data names: %s' %(keys, self.smallDataNames))
for key, item in kwargs.iteritems():
# plural_key = key + 's'
self.smallDataDict[key].append(item)
self.paths.append(str(path))
self.frames.append(int(frame))
def addRecords(self, paths, frames, **kwargs):
"""Summary
Args:
paths (list of str): Corresponding filepath of this record.
frames (list of int): Correspoding frame of this record in the file.
**kwargs (TYPE): key-value pairs. The keys must match the small data names. Use plural form.
Returns:
TYPE: None
Raises:
Exception: Keys in kwargs must match the small data. Error raised if not.
"""
keys = kwargs.keys()
if not set(keys) == set(self.smallDataNames):
raise Exception('Record(%s) not match small data names: %s' %(keys, self.smallDataNames))
for key, item in kwargs.iteritems():
self.smallDataDict[key].extend(item)
self.paths.extend(paths)
self.frames.extend(frames)
def close(self):
self.h5File.create_dataset('paths', data=self.paths, dtype=h5py.special_dtype(vlen=unicode))
self.h5File.create_dataset('frames', data=self.frames)
for dataName in self.smallDataNames:
self.h5File.create_dataset(dataName, data=self.smallDataDict[dataName])
self.h5File.close()
print('%s closed' %self.output)
print('====================SUMMARY====================')
print('Num of records : %d' %len(self.paths))
if __name__ == '__main__':
smallData = SmallData(output='test.h5', smallDataNames=['score', 'aa', 'bb']) # init smalldata with 3 customized fields: scores, aas, bbs
smallData.addRecord('path1', 1, score=1, aa=12, bb=1) # add one record using singular form keyword
smallData.addRecords(['path1', 'path2'], [1, 2], score=[3,4], aa=[1,2], bb=[3,4]) # add multiple records using plural form keyword
smallData.close() # write to h5 file and close