-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy path9pca.R
More file actions
88 lines (52 loc) · 2.1 KB
/
9pca.R
File metadata and controls
88 lines (52 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
library(magrittr)
army <- readr::read_csv("C:/Users/ps324/OneDrive - Cummins/documents/Explore/army.csv")
saveRDS(army, "army1.RData")
army_f <- readr::read_csv("C:/Users/ps324/OneDrive - Cummins/documents/Explore/army_f.csv")
saveRDS(army_f, "army2.RData")
colnames(army)
table(army$Component)
sapply(army, function(x) sum(is.na(x)))
janitor::get_dupes(army)
sjmisc::descr(army$Weightlbs)
ggplot2::ggplot(army, ggplot2::aes(x = Weightlbs)) +
ggplot2::geom_density() +
ggthemes::theme_gdocs()
dplyr::select(army, Weightlbs) %>%
dplyr::arrange(Weightlbs)
armyClean <- dplyr::filter(army, Weightlbs > 0)
#janitor::clean_names(army, case = "")
army_subset <- armyClean[, c(2:94,106)]
# train and test
DataExplorer::plot_correlation(army_subset[, 1:10])
army_scale <- data.frame(scale(army_subset))
pca <- psych::principal(army_scale, rotate = "none")
plot(pca$values, type = "b", ylab = "Eigenvalues", xlab = "Component")
head(pca$values, 10)
# let's go with 5
sum(pca$values)
sum(pca$values[1:6])
pca_6 <- psych::principal(army_scale, nfactors = 6, rotate = "none")
pca_6
pca_scores <- data.frame(round(pca_6$scores, digits = 2))
head(pca_scores)
pca_scores$weight <- armyClean$Weightlbs
# pca_scores$regulararmy <- ifelse(armyClean$Component == "Regular Army", 1, 0)
DataExplorer::plot_correlation(pca_scores)
pca_lm <- lm(weight ~ ., data = pca_scores)
broom::tidy(pca_lm)
summary(pca_lm)
pca_scores$predicted <- round(pca_lm$fitted.values, digits = 2)
ggplot2::ggplot(pca_scores, ggplot2::aes(x = predicted, y = weight)) +
ggplot2::geom_point() +
ggplot2::stat_smooth(method = "lm", se = FALSE) +
ggthemes::theme_pander()
pca_scores$residuals <- round(pca_lm$residuals, digits = 2)
ggplot2::ggplot(pca_scores, ggplot2::aes(x = predicted, y = residuals)) +
ggplot2::geom_point() +
ggplot2::stat_smooth(method = "lm", se = FALSE) +
ggthemes::theme_few()
plot(pca_lm)
pca_trimmed <- pca_scores[c(-2388, -3668, -3918), c(-8, -9)]
pca_lm2 <- lm(weight ~ ., data = pca_trimmed)
summary(pca_lm2)
plot(pca_lm2)