1- '''
2- Name: eLCS.py
3- Authors: Robert Zhang in association with Ryan Urbanowicz
4- Contact: robertzh@wharton.upenn.edu
5- Description: This module creates a class that takes in data, and cleans it up to be used by another machine learning module
6- '''
1+
72
83import numpy as np
94import pandas as pd
@@ -106,11 +101,11 @@ def add_attribute_converter_map(self,headerName,map):
106101 def add_attribute_converter_random (self ,headerName ):
107102 if headerName in self .dataHeaders and not (headerName in self .map ):
108103 headerIndex = np .where (self .dataHeaders == headerName )[0 ][0 ]
109- uniqueItems = np . array ([])
104+ uniqueItems = []
110105 for instance in self .dataFeatures :
111106 if not (instance [headerIndex ] in uniqueItems ) and instance [headerIndex ] != "NA" :
112- uniqueItems = np .append (uniqueItems , instance [headerIndex ])
113- self .add_attribute_converter (headerName ,uniqueItems )
107+ uniqueItems .append (instance [headerIndex ])
108+ self .add_attribute_converter (headerName ,np . array ( uniqueItems ) )
114109
115110 def add_class_converter (self ,array ):
116111 if not (self .classLabel in self .map .keys ()):
@@ -121,11 +116,11 @@ def add_class_converter(self,array):
121116
122117 def add_class_converter_random (self ):
123118 if not (self .classLabel in self .map .keys ()):
124- uniqueItems = np . array ([])
119+ uniqueItems = []
125120 for instance in self .dataPhenotypes :
126121 if not (instance in uniqueItems ) and instance != "NA" :
127- uniqueItems = np .append (uniqueItems , instance )
128- self .add_class_converter (uniqueItems )
122+ uniqueItems .append (instance )
123+ self .add_class_converter (np . array ( uniqueItems ) )
129124
130125 def convert_all_attributes (self ):
131126 for attribute in self .dataHeaders :
@@ -144,56 +139,43 @@ def convert_all_attributes(self):
144139 def delete_attribute (self ,headerName ):
145140 if headerName in self .dataHeaders :
146141 i = np .where (headerName == self .dataHeaders )[0 ][0 ]
147- newFeatures = np .array ([[2 ,3 ]])
148142 self .dataHeaders = np .delete (self .dataHeaders ,i )
149143 if headerName in self .map .keys ():
150144 del self .map [headerName ]
151145
146+ newFeatures = []
152147 for instanceIndex in range (len (self .dataFeatures )):
153148 instance = np .delete (self .dataFeatures [instanceIndex ],i )
154- if (instanceIndex == 0 ):
155- newFeatures = np .array ([instance ])
156- else :
157- newFeatures = np .concatenate ((newFeatures ,[instance ]),axis = 0 )
158- self .dataFeatures = newFeatures
149+ newFeatures .append (instance )
150+ self .dataFeatures = np .array (newFeatures )
159151 else :
160152 raise Exception ("Header Doesn't Exist" )
161153
162154 def delete_all_instances_without_header_data (self ,headerName ):
163- newFeatures = np . array ([[ 2 , 3 ]])
164- newPhenotypes = np . array ([])
155+ newFeatures = []
156+ newPhenotypes = []
165157 attributeIndex = np .where (self .dataHeaders == headerName )[0 ][0 ]
166158
167- firstTime = True
168159 for instanceIndex in range (len (self .dataFeatures )):
169160 instance = self .dataFeatures [instanceIndex ]
170161 if instance [attributeIndex ] != "NA" :
171- if firstTime :
172- firstTime = False
173- newFeatures = np .array ([instance ])
174- else :
175- newFeatures = np .concatenate ((newFeatures ,[instance ]),axis = 0 )
176- newPhenotypes = np .append (newPhenotypes ,self .dataPhenotypes [instanceIndex ])
162+ newFeatures .append (instance )
163+ newPhenotypes .append (self .dataPhenotypes [instanceIndex ])
177164
178- self .dataFeatures = newFeatures
179- self .dataPhenotypes = newPhenotypes
165+ self .dataFeatures = np . array ( newFeatures )
166+ self .dataPhenotypes = np . array ( newPhenotypes )
180167
181168 def delete_all_instances_without_phenotype (self ):
182- newFeatures = np .array ([[2 ,3 ]])
183- newPhenotypes = np .array ([])
184- firstTime = True
169+ newFeatures = []
170+ newPhenotypes = []
185171 for instanceIndex in range (len (self .dataFeatures )):
186172 instance = self .dataPhenotypes [instanceIndex ]
187173 if instance != "NA" :
188- if firstTime :
189- firstTime = False
190- newFeatures = np .array ([self .dataFeatures [instanceIndex ]])
191- else :
192- newFeatures = np .concatenate ((newFeatures ,[self .dataFeatures [instanceIndex ]]),axis = 0 )
193- newPhenotypes = np .append (newPhenotypes ,instance )
174+ newFeatures .append (self .dataFeatures [instanceIndex ])
175+ newPhenotypes .append (instance )
194176
195- self .dataFeatures = newFeatures
196- self .dataPhenotypes = newPhenotypes
177+ self .dataFeatures = np . array ( newFeatures )
178+ self .dataPhenotypes = np . array ( newPhenotypes )
197179
198180 def print (self ):
199181 isFullNumber = self .check_is_full_numeric ()
@@ -247,26 +229,20 @@ def get_params(self):
247229 if not (self .check_is_full_numeric ()):
248230 raise Exception ("Features and Phenotypes must be fully numeric" )
249231
250- newFeatures = np .array ([[2 ,3 ]],dtype = float )
251- newPhenotypes = np .array ([],dtype = float )
252- firstTime = True
232+ newFeatures = []
233+ newPhenotypes = []
253234 for instanceIndex in range (len (self .dataFeatures )):
254- newInstance = np . array ([], dtype = float )
235+ newInstance = []
255236 for attribute in self .dataFeatures [instanceIndex ]:
256237 if attribute == "NA" :
257- newInstance = np .append (newInstance , np .nan )
238+ newInstance .append (np .nan )
258239 else :
259- newInstance = np .append (newInstance , float (attribute ))
260-
261- if firstTime :
262- firstTime = False
263- newFeatures = np .array ([newInstance ])
264- else :
265- newFeatures = np .concatenate ((newFeatures ,[newInstance ]),axis = 0 )
240+ newInstance .append (float (attribute ))
266241
242+ newFeatures .append (np .array (newInstance ,dtype = float ))
267243 if self .dataPhenotypes [instanceIndex ] == "NA" : #Should never happen. All NaN phenotypes should be removed automatically at init. Just a safety mechanism.
268- newPhenotypes = np .append (newPhenotypes , np .nan )
244+ newPhenotypes .append (np .nan )
269245 else :
270- newPhenotypes = np .append (newPhenotypes , float (self .dataPhenotypes [instanceIndex ]))
246+ newPhenotypes .append (float (self .dataPhenotypes [instanceIndex ]))
271247
272- return self .dataHeaders ,self .classLabel ,newFeatures ,newPhenotypes
248+ return self .dataHeaders ,self .classLabel ,np . array ( newFeatures ,dtype = float ), np . array ( newPhenotypes , dtype = float )
0 commit comments