Skip to content

Commit 75ea020

Browse files
author
algorithmica-repository
committed
uploading solutions to assignments and datasets
1 parent 3496cd8 commit 75ea020

File tree

6 files changed

+1631
-0
lines changed

6 files changed

+1631
-0
lines changed

assignments/sol1.R

Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
#Problem 1
2+
#Q1
3+
rain.df = read.table("E:/data analytics/datasets/rainfall.dat")
4+
#Q2
5+
dim(rain.df)[1] # for number of rows
6+
#Q3
7+
dim(rain.df)[2]
8+
#Q4
9+
colnames(rain.df)
10+
#Q5
11+
rain.df[2,4]
12+
#Q6
13+
rain.df[2, ]
14+
#Q7
15+
names(rain.df) = c("year", "month", "day", seq(0,23))
16+
str(rain.df)
17+
#Q8
18+
rain.df$daily = apply(sum, X=rain.df[,4:27], MARGIN=1)
19+
head(rain.df)
20+
tail(rain.df)
21+
22+
#rain.df = rain.df %>% rowwise() %>% mutate(daily=sum(0,1))
23+
#rain.df = rain.df %>% group_by(1:n()) %>% mutate(daily=sum(0:23))
24+
#rain.df[20,]
25+
26+
#Problem 2
27+
28+
# Q1
29+
DeepSea <- read.table(file ="E:/data analytics/datasets/ISIT.txt", header = TRUE)
30+
31+
# Q2
32+
names(DeepSea)
33+
str(DeepSea)
34+
dim(DeepSea)
35+
head(DeepSea)
36+
37+
# Q3
38+
unique(DeepSea$Station)
39+
40+
# Q4
41+
DeepSea.sta1 = DeepSea[DeepSea$Station==1,]
42+
DeepSea.sta1
43+
44+
DeepSea.sta1 = DeepSea %>% filter(Station==1)
45+
DeepSea.sta1
46+
47+
# Q5
48+
nrow(DeepSea.sta1)
49+
min(DeepSea.sta1$SampleDepth)
50+
mean(DeepSea.sta1$SampleDepth)
51+
max(DeepSea.sta1$SampleDepth)
52+
53+
DeepSea.sta1 %>% summarise(total=n(),min_depth=min(SampleDepth), mean_depth=mean(SampleDepth), max_depth=max(SampleDepth))
54+
55+
# Q6
56+
nrow(DeepSea[DeepSea$Station==1,])
57+
nrow(DeepSea[DeepSea$Station==2,])
58+
nrow(DeepSea[DeepSea$Station==3,])
59+
nrow(DeepSea[DeepSea$Station==4,])
60+
nrow(DeepSea[DeepSea$Station==5,])
61+
62+
DeepSea %>% filter(Station==1 | Station==2 | Station==3 | Station==4 | Station==5) %>% group_by(Station) %>% summarize(count=n())
63+
64+
# Q7
65+
DeepSea %>% group_by(Station) %>% summarize(count=n()) %>% filter(min_rank(count)>2)
66+
67+
68+
# Q8
69+
DeepSea.fall = DeepSea[DeepSea$Month==8 | DeepSea$Month==9 | DeepSea$Month==10, ]
70+
nrow(DeepSea.fall)
71+
72+
DeepSea %>% filter(Month==8 | Month==9 | Month==10) %>% summarise(count=n())
73+
74+
# Q9
75+
DeepSea.dep2000 = DeepSea[DeepSea$SampleDepth>2000, ]
76+
nrow(DeepSea.dep2000)
77+
78+
DeepSea.dep2000 = DeepSea %>% filter(SampleDepth>2000) %>% summarise(count=n())
79+
80+
# Q10
81+
DeepSea.dep2000.fall2001 <- DeepSea[DeepSea$SampleDepth>2000 & (DeepSea$Month==8 | DeepSea$Month==9 | DeepSea$Month==10) & DeepSea$Year==2001, ]
82+
nrow(DeepSea.dep2000.fall2001)
83+
84+
DeepSea %>% filter(SampleDepth>2000 & (Month==8 | Month==9 | Month==10) & Year==2001) %>% summarise(count=n())
85+
86+
87+
#Problem 3
88+
89+
# Step 1
90+
DeepSea1 = read.table(file ="E:/data analytics/datasets/DeepSea1.txt", header = TRUE)
91+
92+
# Step 2
93+
DeepSea2 = read.table(file ="E:/data analytics/datasets/DeepSea2.txt", header = TRUE)
94+
95+
# Step 3
96+
head(DeepSea1)
97+
head(DeepSea2)
98+
99+
# Step 4
100+
DeepSea <- merge(DeepSea1, DeepSea2, by.x = 'ID', by.y = 'SampleID')
101+
nrow(DeepSea)
102+
nrow(DeepSea1)
103+
nrow(DeepSea2)
104+
105+
DeepSea=inner_join(DeepSea1, DeepSea2, by=c('ID'='SampleID'))
106+
nrow()
107+
108+
# Step 5
109+
DeepSea.full = merge(DeepSea1, DeepSea2, by.x = 'ID', by.y = 'SampleID', all = TRUE)
110+
nrow(DeepSea.full)
111+
head(DeepSea.full)
112+
113+
nrow(right_join(DeepSea1, DeepSea2, by=c('ID'='SampleID')))
114+
115+
# Step 6
116+
DeepSea[,c('Year','Month','Station','SampleDepth')]
117+
118+
DeepSea %>% select(Year,Month,Station,SampleDepth)
119+
120+
# Step 7
121+
DeepSea[order(DeepSea$SampleDepth), c('Year','Month','Station','SampleDepth')]
122+
123+
DeepSea %>% select(Year,Month,Station,SampleDepth) %>% arrange(SampleDepth)
124+
125+
# Step 8
126+
DeepSea[order(DeepSea$Station,-DeepSea$SampleDepth), c('Year','Month','Station','SampleDepth')]
127+
128+
DeepSea %>% select(Year,Month,Station,SampleDepth) %>% arrange(Station,desc(SampleDepth))
129+
130+
# Step 9
131+
DeepSea$fYear <- factor(DeepSea$Year)
132+
DeepSea$fMonth <- factor(DeepSea$Month)
133+
134+
DeepSea = DeepSea %>% mutate(fYear=factor(Year), fMonth=factor(Month))
135+
str(DeepSea)
136+
137+
# Step 10
138+
levels(DeepSea$fYear)
139+
levels(DeepSea$fMonth)
140+
141+
# Step 11
142+
DeepSea$fMonthName <- factor(DeepSea$Month, levels=c(3,4,8,10), labels = c('March','April','August','October'))
143+
144+
DeepSea = DeepSea %>% mutate(fMonthName=factor(DeepSea$Month, levels=c(3,4,8,10), labels = c('March','April','August','October')))
145+
str(DeepSea)
146+
147+
148+
# Step 12
149+
write.table(DeepSea[,c('ID','Year','Month','Station','SampleDepth','fYear','fMonth','fMonthName')], file = "DeepSea.txt", sep="\t", quote = TRUE, append = FALSE, na = "NA", row.names = FALSE)
150+
151+
res=DeepSea %>% select(ID, Year, Month, Station, SampleDepth, fYear, fMonth, fMonthName)
152+
write.table(res, file = "DeepSea.txt", sep="\t", quote = TRUE, append = FALSE, na = "NA", row.names = FALSE)
153+
154+
155+
#Problem 3
156+
# Step 1
157+
setwd('E:/data analytics/datasets')
158+
Temp = read.table(file = "Temperature.txt", header = TRUE)
159+
160+
# Step 2
161+
names(Temp)
162+
str(Temp)
163+
dim(Temp)
164+
head(Temp)
165+
166+
# Step 3
167+
tapply(Temp$Temperature, INDEX = Temp$Month, FUN = mean, na.rm = TRUE)
168+
tapply(Temp$Temperature, INDEX = Temp$Month, FUN = sd, na.rm = TRUE)
169+
170+
Temp %>% group_by(Month) %>% summarise(temp_mean=mean(Temperature, na.rm=TRUE), temp_sd=sd(Temperature,na.rm=TRUE))
171+
172+
# Step 4
173+
tapply(Temp$Temperature, INDEX = list(Temp$Month,Temp$Station), FUN = mean, na.rm = TRUE)
174+
tapply(Temp$Temperature, INDEX = list(Temp$Month,Temp$Station), FUN = sd, na.rm = TRUE)
175+
176+
Temp %>% group_by(Month,Station) %>% summarise(temp_mean=mean(Temperature, na.rm=TRUE), temp_sd=sd(Temperature,na.rm=TRUE))
177+
178+
179+
# Step 5
180+
Temp.1990 <- Temp[Temp$Year == 1990 , ]
181+
tapply(Temp.1990$Temperature, INDEX = Temp.1990$Month, FUN=mean, na.rm = TRUE)
182+
tapply(Temp.1990$Temperature, INDEX = Temp.1990$Month, FUN=sd, na.rm = TRUE)
183+
184+
Temp %>% filter(Year==1990) %>% group_by(Month) %>% summarise(temp_mean=mean(Temperature, na.rm=TRUE), temp_sd=sd(Temperature,na.rm=TRUE))
185+
186+
187+
# Step 6
188+
sapply(Temp[, c('Salinity','Temperature','CHLFa')], FUN = mean, na.rm = TRUE)
189+
sapply(Temp[, c('Salinity','Temperature','CHLFa')], FUN = sd, na.rm = TRUE)
190+
191+
Temp %>% summarise(temp_mean=mean(Salinity, na.rm=TRUE), temp_sd=sd(Salinity,na.rm=TRUE))
192+
Temp %>% summarise(temp_mean=mean(Temperature, na.rm=TRUE), temp_sd=sd(Temperature,na.rm=TRUE))
193+
Temp %>% summarise(temp_mean=mean(CHLFa, na.rm=TRUE), temp_sd=sd(CHLFa,na.rm=TRUE))
194+
195+
Temp %>% select(Salinity,Temperature,CHLFa) %>% summarise_each(funs(mean(.,na.rm=TRUE),sd(.,na.rm=TRUE)))
196+
197+
# Step 7
198+
Temp.dant <- Temp[Temp$Station == 'DANT' , ]
199+
sapply(Temp.dant[, c('Salinity','Temperature','CHLFa')], FUN = mean, na.rm = TRUE)
200+
sapply(Temp.dant[, c('Salinity','Temperature','CHLFa')], FUN = sd, na.rm = TRUE)
201+
202+
Temp %>% filter(Station=='DANT') %>% select(Salinity,Temperature,CHLFa) %>% summarise_each(funs(mean(.,na.rm=TRUE),sd(.,na.rm=TRUE)))
203+
204+
205+
# Step 8
206+
summary(Temp[, c('Salinity','Temperature','CHLFa')])
207+
208+
Temp %>% select(Salinity,Temperature,CHLFa) %>% summary()
209+
210+
# Step 9
211+
table(Temp$Station)
212+
table(Temp$Year)
213+
table(Temp$Station, Temp$Year)
214+
215+
216+

assignments/sol2.R

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# load required libraries
2+
library(dplyr)
3+
4+
# loading dataset
5+
Forest = read.csv("E:/data analytics/datasets/forestfires.csv", header = TRUE, sep = ',')
6+
Forest
7+
8+
# Task 1
9+
10+
# How many observations are there in the dataset?
11+
nrow(Forest)
12+
13+
Forest %>% summarize(count=n())
14+
15+
# How many observations are there with a fire (i.e. area>0)?
16+
nrow(Forest[Forest$area>0, ])
17+
18+
nrow(subset(Forest, area>0))
19+
20+
Forest %>% filter(area>0) %>% summarise(count=n())
21+
22+
23+
# How many observations are there with rain (i.e. rain>0)?
24+
nrow(Forest[Forest$rain>0, ])
25+
26+
Forest %>% filter(rain>0) %>% summarise(count=n())
27+
28+
# How many observations are there with both a fire and rain?
29+
nrow(Forest[Forest$area>0 & Forest$rain>0, ])
30+
31+
Forest %>% filter(area>0 & rain>0) %>% summarise(count=n())
32+
33+
34+
# Task 2
35+
36+
# Show the columns month, day, area of all the observations.
37+
Forest[, c('month', 'day', 'area')]
38+
39+
Forest %>% select(month, day, area)
40+
41+
# Show the columns month, day, area of the observations with a fire.
42+
Forest[Forest$area>0, c('month', 'day', 'area')]
43+
44+
Forest %>% filter(area>0) %>% select(month, day, area)
45+
46+
47+
# Task 3
48+
49+
# How large are the five largest fires (i.e. having largest area)?
50+
Forest %>% arrange(desc(area)) %>% top_n(5) %>% select(area)
51+
52+
# What are the corresponding month, temp, RH, wind, rain, area?
53+
Forest %>% arrange(desc(area)) %>% top_n(5) %>% select(month, temp, RH, wind, rain, area)
54+
55+
56+
# Task 4
57+
58+
# Reorder factor levels of month to be from Jan to Dec.
59+
levels(Forest$month)
60+
Forest$month = factor(Forest$month, levels = c('jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'))
61+
levels(Forest$month)
62+
63+
# Add one column to the data indicating whether a fire occurred for each observation ('TRUE' for area>0 and 'FALSE' for area==0).
64+
Forest$fire = factor(Forest$area>0)
65+
66+
Forest = Forest %>% mutate(fire=area>0)
67+
str(Forest)
68+
69+
Forest = Forest %>% mutate(fire=area>0)
70+
Forest$fire = factor(Forest$fire)
71+
str(Forest)
72+
73+
74+
# Task 5
75+
76+
# What is the mean area/wind/temp/RH per month?
77+
tapply(Forest$area, Forest$month, mean) # group by month values and apply mean on area column of each group
78+
tapply(Forest$wind, Forest$month, mean)
79+
tapply(Forest$temp, Forest$month, mean)
80+
tapply(Forest$RH, Forest$month, mean)
81+
82+
Forest %>% group_by(month) %>% summarise(mean_area=mean(area), mean_wind=mean(wind), mean_temp=mean(temp), mean_rh=mean(RH))
83+
84+
# How many observations are there in each month?
85+
table(Forest$month)
86+
87+
Forest %>% group_by(month) %>% summarise(count=n())
88+
89+
# How many observations are there with a fire in each month?
90+
table(Forest[Forest$area>0, ]$month)
91+
92+
Forest %>% filter(area>0) %>% group_by(month) %>% summarise(count=n())
93+
94+
# What is the probability of a fire in each month?
95+
table(Forest[Forest$area>0, ]$month) / table(Forest$month)
96+
97+
Forest %>% group_by(month) %>% summarise(prob=sum(area>0)/n())

datasets/20news-bydate.tar.gz

13.8 MB
Binary file not shown.

0 commit comments

Comments
 (0)