-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun_analysis.R
More file actions
109 lines (103 loc) · 5.11 KB
/
run_analysis.R
File metadata and controls
109 lines (103 loc) · 5.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Variables for URL, directory, and files
FileURL = "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
dataFile="data.zip"
mainDir = "UCI HAR Dataset"
featuresFile = paste0(mainDir, "/", "features.txt")
xTrainFile = paste0(mainDir, "/", "train/X_train.txt")
xTestFile = paste0(mainDir, "/", "test/X_test.txt")
activityTrainFile = paste0(mainDir, "/", "train/y_train.txt")
activityTestFile = paste0(mainDir, "/", "test/y_test.txt")
subjectTrainFile = paste0(mainDir, "/", "train/subject_train.txt")
subjectTestFile = paste0(mainDir, "/", "test/subject_test.txt")
activityNamesFile = paste0(mainDir, "/", "activity_labels.txt")
# Downloads the URL if its chosen destination file doesn't exist
# Extracts the destination file if the internal main directory doesn't exist
extractFile <- function(){
if(!file.exists(dataFile))
download.file(FileURL,destfile = dataFile, mode = "wb")
if(!file.exists(mainDir))
unzip(dataFile)
}
readData <- function(){
extractFile()
xData = do.call("rbind",lapply(c(xTrainFile,xTestFile),function(x)
data.frame( read.table(x))))
activityData = do.call("rbind",lapply(
c(activityTrainFile,activityTestFile),
function(x) data.frame( read.table(x))))[,1]
subjectData = do.call("rbind",lapply(
c(subjectTrainFile,subjectTestFile),
function(x) data.frame( read.table(x))))
# Only the names of the features are important, so I only read the second column
# And I only care about the vector of the names themselves, so I subset them out.
myFeatures = read.table(featuresFile,
colClasses = c("NULL",NA),
stringsAsFactors = FALSE)[,1]
activityNames = read.table(activityNamesFile,
stringsAsFactors = FALSE,
colClasses = c("NULL",NA))[,1]
#1c) Set the column names of the x data table to be equal to the features vector
colnames(xData) = myFeatures
colnames(subjectData) = "Subject"
subjectData$Subject = as.factor(subjectData$Subject)
#1d) Create vector of actual activity names instead of numbers
# blank vector for storing activities
activityVector = c()
for(i in 1:length(activityData)){
activityVector[i] = activityNames[activityData[i]]
}
#1e) Create data frame of these activities
activity = as.data.frame(activityVector)
colnames(activity) = "Activity"
#1f) Combine subject and activity into its own data frame just to keep it separated
subActivity = cbind(subjectData,activity)
cbind(subjectData,activity,xData)
}
#2) Extracts only the measurements on the mean and standard deviation for each measurement.
extractData = function(df){
extractedData = df[grep("Subject|Activity|mean\\(\\)$|std\\(\\)$|mean\\(\\)-[XYZ]$|std\\(\\)-[XYZ]$",names(df))]
names(extractedData) = setGoodNames(extractedData)
extractedData
}
#3) Uses descriptive activity names to name the activities in the data set
setGoodNames = function(df){
# Extract the bad names.
extractedNames = names(df)
# If the name begins with t, replace that with "timeDomain"
extractedNames = sub("^t","timeDomain",extractedNames)
# If the name begins with f, replace that with "frequencyDomain"
extractedNames = sub("^f","frequencyDomain",extractedNames)
# Replace Acc with Acceleration
extractedNames = sub("Acc","Acceleration",extractedNames)
# Replace Gyro with Gyroscope
extractedNames = sub("Gyro","Gyroscope",extractedNames)
# Replace Mag with Magnitude (note: this is merely a presumption that
# it should stand for magnitude as the original README doesn't
# directly reference it)
extractedNames = sub("Mag","Magnitude",extractedNames)
# BodyBody appears to be a typographical error; replace with Body
extractedNames = sub("BodyBody","Body",extractedNames)
# Remove "()" that isn't necessary
extractedNames = sub("\\(\\)","",extractedNames)
# Replace std at the end of the line with StandardDeviation
extractedNames = sub("\\-std\\-","-StandardDeviation-",extractedNames)
extractedNames = sub("std$","StandardDeviation",extractedNames)
# Replace with X/Y/Z that are in the end of the name with
# "in[X/Y/Z]Direction"
extractedNames = sub("X$","inXDirection",extractedNames)
extractedNames = sub("Y$","inYDirection",extractedNames)
extractedNames = sub("Z$","inZDirection",extractedNames)
extractedNames
}
getMeanData = function(df){
newdf = df[ , -which(names(df) %in% c("Subject","Activity"))]
newdf = aggregate(newdf, by=list(df$Subject,df$Activity),
FUN=mean, na.rm=TRUE)
colnames(newdf)[1] = "Subject"
colnames(newdf)[2] = "Activity"
newdf
}
extractFile()
myOriginalData = readData()
extractedData = extractData(myOriginalData)
meanCalculationsData = getMeanData(extractedData)