forked from Sage-Bionetworks/mPower-sdata
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmPower-summaries.R
More file actions
137 lines (117 loc) · 4.63 KB
/
mPower-summaries.R
File metadata and controls
137 lines (117 loc) · 4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
require(ggplot2)
## The packages synapseClient and rGithubClient are not in standard CRAN,
## so get them from synapse and github respectively
if(!require(synapseClient)){
## The package synapseClient is not on CRAN. Get it from sagebase.
source("http://depot.sagebase.org/CRAN.R")
pkgInstall("synapseClient")
require(synapseClient)
}
if(!require(githubr)){
## The package githubr is not on CRAN. Get it from using the useful devtools.
install.packages("devtools")
devtools::install_github("brian-bot/githubr")
require(githubr)
}
## The following will prompt for a username and password.
synapseLogin()
firstDate <- as.Date("2015-03-09")
lastDate <- as.Date("2015-09-09")
## QUERY THE mPower PROJECT FOR ALL OF THE TABLES
q <- synQuery('SELECT id, name FROM table WHERE parentId=="syn4993293"')
## READ IN ALL OF THE TABLES
allDat <- lapply(as.list(q$table.id), function(x){
synTableQuery(paste0('SELECT * FROM ', x))@values
})
names(allDat) <- q$table.name
## UNIQUE PARTICIPANTS AND TASKS
allParticipants <- lapply(lapply(allDat, "[[", "healthCode"), unique)
length(unique(unlist(allParticipants)))
## PARKINSON DIAGNOSIS
table(allDat$`Demographics Survey`$`professional-diagnosis`, useNA = 'always')
## PER TABLE METRICS
tableSummaries <- data.frame(uniqueParticipants = sapply(allParticipants, length),
uniqueTasks = sapply(allDat, nrow), stringsAsFactors = FALSE)
rownames(tableSummaries) <- names(allDat)
tableSummaries
#####
## CREATE SOME PLOTS OF PARTICIPATION
#####
partDat <- lapply(allDat, function(x){
x$date <- as.Date(x$createdOn)
res1 <- x[, c("healthCode", "date")]
res2 <- res1[!duplicated(x$healthCode), ]
res1$Count <- "tasks"
res2$Count <- "participants"
return(rbind(res1, res2))
})
theseTasks <- c("Memory Activity", "Walking Activity", "Voice Activity", "Tapping Activity")
facetDat <- lapply(as.list(theseTasks), function(x){
tmp <- partDat[[x]]
tmpTab <- as.data.frame(table(tmp$date, tmp$Count))
names(tmpTab) <- c("date", "Count", "freq")
tmpTab$freqCum[ tmpTab$Count=="participants" ] <- cumsum(tmpTab$freq[ tmpTab$Count=="participants" ])
tmpTab$freqCum[ tmpTab$Count=="tasks" ] <- cumsum(tmpTab$freq[ tmpTab$Count=="tasks" ])
tmpTab$taskName <- x
return(tmpTab)
})
facetDat <- do.call(rbind, facetDat)
facetDat$date <- as.Date(facetDat$date)
facetDat$taskName <- factor(facetDat$taskName, levels = theseTasks)
pPlot <- ggplot(data=facetDat, aes(date, freqCum, fill=Count)) +
facet_grid(taskName ~ .) +
geom_bar(alpha=.5, position="identity", stat="identity") +
xlim(firstDate, lastDate) +
scale_fill_grey(start=0) +
theme_bw() +
labs(x="Date", y="Cumulative Count") +
theme(legend.position="bottom")
fName2 <- file.path(tempdir(), "figure2-participantActivities.pdf")
pdf(fName2, width = 6, height = 8)
show(pPlot)
dev.off()
## PLOT USER PARTICIPATION OVER TIME
daysDat <- lapply(allDat, function(x){
x$date <- as.Date(x$createdOn)
res <- x[, c("healthCode", "date")]
return(res)
})
daysDat <- do.call(rbind, daysDat)
daysDat <- daysDat[!duplicated(daysDat), ]
dd <- as.data.frame(table(daysDat$healthCode))
names(dd) <- c("healthCode", "days")
dd$park <- NA
tmpDiag <- allDat$`Demographics Survey`
rownames(tmpDiag) <- tmpDiag$healthCode
dd$park <- tmpDiag[dd$healthCode, "professional-diagnosis"]
## REMOVE THOSE WHO DO NOT HAVE DIAGNOSIS INFORMATION
dd <- dd[!is.na(dd$park), ]
dd$diagnosis <- "control"
dd$diagnosis[ dd$park ] <- "parkinson"
dd$log10Days <- log10(dd$days)
dd <- dd[ dd$days >= 5, ]
dd$diagnosis[ dd$diagnosis=="control" ] <- paste0("control (n=", sum(dd$diagnosis=="control"), ")")
dd$diagnosis[ dd$diagnosis=="parkinson" ] <- paste0("parkinson (n=", sum(dd$diagnosis=="parkinson"), ")")
dPlot <- ggplot(dd, aes(x=days, fill=diagnosis)) +
geom_histogram(aes(y=..density..*5), alpha=0.75, binwidth=5) +
facet_wrap(~diagnosis, nrow=2) +
labs(x="Days on app", y="Density") +
guides(fill=FALSE)
fName3 <- file.path(tempdir(), "figure3-participantDays.pdf")
pdf(fName3, width = 8, height = 6)
show(dPlot)
dev.off()
#####
## STORE THE PLOTS IN SYNAPSE
#####
## GET THIS CODE STORED IN GITHUB
mpowerRepo <- getRepo('brian-bot/mPower-sdata')
thisCode <- getPermlink(mpowerRepo, 'mPower-summaries.R')
## CREATE THE PROVENANCE STEP (ACTIVITY) THAT GENERATES THESE PLOTS
act <- Activity(name="Figure Generation",
used=lapply(as.list(q$table.id), function(x){list(entity=x)}),
executed=list(list(url=thisCode, name=basename(thisCode))))
act <- synStore(act)
## STORE THE PLOTS IN SYNAPSE
fig2Output <- synStore(File(path=fName2, parentId="syn5480005"), activity=act)
fig3Output <- synStore(File(path=fName3, parentId="syn5480005"), activity=act)