Skip to content

Commit 9540603

Browse files
string enum
1 parent 4b79eda commit 9540603

2 files changed

Lines changed: 33 additions & 57 deletions

File tree

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,14 @@
66
setup(
77
name = 'scikit-eLCS',
88
packages = ['skeLCS'],
9-
version = '1.2.1',
9+
version = '1.2.2',
1010
license='License :: OSI Approved :: GNU General Public License v3 (GPLv3)',
1111
description = 'Educational Learning Classifier System',
1212
long_description_content_type="text/markdown",
1313
author = 'Robert Zhang, Ryan J. Urbanowicz',
1414
author_email = 'robertzh@seas.upenn.edu,ryanurb@upenn.edu',
1515
url = 'https://github.com/UrbsLab/scikit-eLCS',
16-
download_url = 'https://github.com/UrbsLab/scikit-eLCS/archive/v_1.2.1.tar.gz',
16+
download_url = 'https://github.com/UrbsLab/scikit-eLCS/archive/v_1.2.2.tar.gz',
1717
keywords = ['machine learning','data analysis','data science','learning classifier systems'],
1818
install_requires=['numpy','pandas','scikit-learn'],
1919
classifiers=[

skeLCS/DataCleanup.py

Lines changed: 31 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
1-
'''
2-
Name: eLCS.py
3-
Authors: Robert Zhang in association with Ryan Urbanowicz
4-
Contact: robertzh@wharton.upenn.edu
5-
Description: This module creates a class that takes in data, and cleans it up to be used by another machine learning module
6-
'''
1+
72

83
import numpy as np
94
import pandas as pd
@@ -106,11 +101,11 @@ def add_attribute_converter_map(self,headerName,map):
106101
def add_attribute_converter_random(self,headerName):
107102
if headerName in self.dataHeaders and not (headerName in self.map):
108103
headerIndex = np.where(self.dataHeaders == headerName)[0][0]
109-
uniqueItems = np.array([])
104+
uniqueItems = []
110105
for instance in self.dataFeatures:
111106
if not(instance[headerIndex] in uniqueItems) and instance[headerIndex] != "NA":
112-
uniqueItems = np.append(uniqueItems,instance[headerIndex])
113-
self.add_attribute_converter(headerName,uniqueItems)
107+
uniqueItems.append(instance[headerIndex])
108+
self.add_attribute_converter(headerName,np.array(uniqueItems))
114109

115110
def add_class_converter(self,array):
116111
if not (self.classLabel in self.map.keys()):
@@ -121,11 +116,11 @@ def add_class_converter(self,array):
121116

122117
def add_class_converter_random(self):
123118
if not (self.classLabel in self.map.keys()):
124-
uniqueItems = np.array([])
119+
uniqueItems = []
125120
for instance in self.dataPhenotypes:
126121
if not (instance in uniqueItems) and instance != "NA":
127-
uniqueItems = np.append(uniqueItems, instance)
128-
self.add_class_converter(uniqueItems)
122+
uniqueItems.append(instance)
123+
self.add_class_converter(np.array(uniqueItems))
129124

130125
def convert_all_attributes(self):
131126
for attribute in self.dataHeaders:
@@ -144,56 +139,43 @@ def convert_all_attributes(self):
144139
def delete_attribute(self,headerName):
145140
if headerName in self.dataHeaders:
146141
i = np.where(headerName == self.dataHeaders)[0][0]
147-
newFeatures = np.array([[2,3]])
148142
self.dataHeaders = np.delete(self.dataHeaders,i)
149143
if headerName in self.map.keys():
150144
del self.map[headerName]
151145

146+
newFeatures = []
152147
for instanceIndex in range(len(self.dataFeatures)):
153148
instance = np.delete(self.dataFeatures[instanceIndex],i)
154-
if (instanceIndex == 0):
155-
newFeatures = np.array([instance])
156-
else:
157-
newFeatures = np.concatenate((newFeatures,[instance]),axis=0)
158-
self.dataFeatures = newFeatures
149+
newFeatures.append(instance)
150+
self.dataFeatures = np.array(newFeatures)
159151
else:
160152
raise Exception("Header Doesn't Exist")
161153

162154
def delete_all_instances_without_header_data(self,headerName):
163-
newFeatures = np.array([[2,3]])
164-
newPhenotypes = np.array([])
155+
newFeatures = []
156+
newPhenotypes = []
165157
attributeIndex = np.where(self.dataHeaders == headerName)[0][0]
166158

167-
firstTime = True
168159
for instanceIndex in range(len(self.dataFeatures)):
169160
instance = self.dataFeatures[instanceIndex]
170161
if instance[attributeIndex] != "NA":
171-
if firstTime:
172-
firstTime = False
173-
newFeatures = np.array([instance])
174-
else:
175-
newFeatures = np.concatenate((newFeatures,[instance]),axis = 0)
176-
newPhenotypes = np.append(newPhenotypes,self.dataPhenotypes[instanceIndex])
162+
newFeatures.append(instance)
163+
newPhenotypes.append(self.dataPhenotypes[instanceIndex])
177164

178-
self.dataFeatures = newFeatures
179-
self.dataPhenotypes = newPhenotypes
165+
self.dataFeatures = np.array(newFeatures)
166+
self.dataPhenotypes = np.array(newPhenotypes)
180167

181168
def delete_all_instances_without_phenotype(self):
182-
newFeatures = np.array([[2,3]])
183-
newPhenotypes = np.array([])
184-
firstTime = True
169+
newFeatures = []
170+
newPhenotypes = []
185171
for instanceIndex in range(len(self.dataFeatures)):
186172
instance = self.dataPhenotypes[instanceIndex]
187173
if instance != "NA":
188-
if firstTime:
189-
firstTime = False
190-
newFeatures = np.array([self.dataFeatures[instanceIndex]])
191-
else:
192-
newFeatures = np.concatenate((newFeatures,[self.dataFeatures[instanceIndex]]),axis = 0)
193-
newPhenotypes = np.append(newPhenotypes,instance)
174+
newFeatures.append(self.dataFeatures[instanceIndex])
175+
newPhenotypes.append(instance)
194176

195-
self.dataFeatures = newFeatures
196-
self.dataPhenotypes = newPhenotypes
177+
self.dataFeatures = np.array(newFeatures)
178+
self.dataPhenotypes = np.array(newPhenotypes)
197179

198180
def print(self):
199181
isFullNumber = self.check_is_full_numeric()
@@ -247,26 +229,20 @@ def get_params(self):
247229
if not(self.check_is_full_numeric()):
248230
raise Exception("Features and Phenotypes must be fully numeric")
249231

250-
newFeatures = np.array([[2,3]],dtype=float)
251-
newPhenotypes = np.array([],dtype=float)
252-
firstTime = True
232+
newFeatures = []
233+
newPhenotypes = []
253234
for instanceIndex in range(len(self.dataFeatures)):
254-
newInstance = np.array([],dtype=float)
235+
newInstance = []
255236
for attribute in self.dataFeatures[instanceIndex]:
256237
if attribute == "NA":
257-
newInstance = np.append(newInstance, np.nan)
238+
newInstance.append(np.nan)
258239
else:
259-
newInstance = np.append(newInstance, float(attribute))
260-
261-
if firstTime:
262-
firstTime = False
263-
newFeatures = np.array([newInstance])
264-
else:
265-
newFeatures = np.concatenate((newFeatures,[newInstance]),axis = 0)
240+
newInstance.append(float(attribute))
266241

242+
newFeatures.append(np.array(newInstance,dtype=float))
267243
if self.dataPhenotypes[instanceIndex] == "NA": #Should never happen. All NaN phenotypes should be removed automatically at init. Just a safety mechanism.
268-
newPhenotypes = np.append(newPhenotypes, np.nan)
244+
newPhenotypes.append(np.nan)
269245
else:
270-
newPhenotypes = np.append(newPhenotypes, float(self.dataPhenotypes[instanceIndex]))
246+
newPhenotypes.append(float(self.dataPhenotypes[instanceIndex]))
271247

272-
return self.dataHeaders,self.classLabel,newFeatures,newPhenotypes
248+
return self.dataHeaders,self.classLabel,np.array(newFeatures,dtype=float),np.array(newPhenotypes,dtype=float)

0 commit comments

Comments
 (0)