-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathretail_analysis.sql
More file actions
250 lines (184 loc) · 8.62 KB
/
retail_analysis.sql
File metadata and controls
250 lines (184 loc) · 8.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
CREATE DATABASE RETAIL_DATA;
USE RETAIL_DATA;
SELECT*FROM Customer;
SELECT*FROM TRANSACTIONS;
SELECT*FROM PROD_CAT_INFO;
-- DATA PREPARATION
--- Q1 What is the total number of rows in each of the 3 tables in the database?
SELECT 'CUSTOMER' AS TABLE_NAME, COUNT(*) AS TOTAL_RECORD FROM Customer
UNION ALL
SELECT 'PROD_CAT_INFO' AS TABLE_NAME,COUNT(*) AS TOTAL_RECORD FROM PROD_CAT_INFO
UNION ALL
SELECT 'TRANSACTIONS' AS TABLE_NAME,COUNT (*) AS TOTAL_RECORD FROM TRANSACTIONS
UNION ALL
SELECT 'GRAND TOTAL' AS TABLE_NAME, SUM(TOTAL_RECORD) FROM
(SELECT 'CUSTOMER' AS TABLE_NAME, COUNT(*) AS TOTAL_RECORD FROM Customer
UNION ALL
SELECT 'PROD_CAT_INFO' AS TABLE_NAME,COUNT(*) AS TOTAL_RECORD FROM PROD_CAT_INFO
UNION ALL
SELECT 'TRANSACTIONS' AS TABLE_NAME,COUNT (*) AS TOTAL_RECORD FROM TRANSACTIONS) AS T1;
---Q2 Whatis the total number of transactions that have a return?
SELECT 'RETURN' AS [TRANSACTION], COUNT(CAST(Qty AS FLOAT)) AS TOTAL_RETURN_TRANSACTION FROM( SELECT
QTY
FROM TRANSACTIONS
WHERE QTY < 0) AS T1;
---Q3 As you would have noticed, the dates provided across the datasets are not in a correct format. As first steps, pls convert the date variables into valid date formats before proceeding ahead.
SELECT *,
CONVERT(DATE, DOB, 105) AS NEW_FORMAT_DOB
FROM Customer;
SELECT *,
CONVERT(DATE, tran_date, 105) AS NEW_FORMAT_TRAN_DATE
FROM TRANSACTIONS;
---Q4 What is the time range of the transaction data available for analysis? Show the output in number of days, months and years simultaneously in different columns.
SELECT
MIN(CONVERT(DATE, tran_date, 105)) AS BEGIN_TRANSACTION_DATE,
MAX(CONVERT(DATE, tran_date, 105)) AS END_TRANSACTION_DATE,
DATEDIFF(DAY, MIN(CONVERT(DATE, tran_date, 105)), MAX(CONVERT(DATE, tran_date, 105))) AS NUMBER_OF_DAYS,
DATEDIFF(MONTH, MIN(CONVERT(DATE, tran_date, 105)), MAX(CONVERT(DATE, tran_date, 105))) AS NUMBER_OF_MONTHS,
DATEDIFF(YEAR, MIN(CONVERT(DATE, tran_date, 105)), MAX(CONVERT(DATE, tran_date, 105))) AS NUMBER_OF_YEAR
FROM TRANSACTIONS;
---Q5 Which product category does the sub-category “DIY” belong to?
SELECT PROD_CAT FROM
PROD_CAT_INFO
WHERE PROD_SUBCAT = 'DIY';
-- DATA ANALYSIS
---Q1 Which channel is most frequently used for transactions?
SELECT TOP 1 STORE_TYPE AS CHANNELS, COUNT(STORE_TYPE) AS TOTAL_TRANSACTIONS
FROM TRANSACTIONS
GROUP BY STORE_TYPE
ORDER BY TOTAL_TRANSACTIONS DESC;
---Q2 What is the count of Male and Female customers in the database?
SELECT 'MALE' AS GENDER, COUNT(GENDER) AS TOTAL_COUNT
FROM Customer
WHERE Gender='M'
UNION ALL
SELECT 'FEMALE' AS GENDER, COUNT(GENDER) AS TOTAL_COUNT
FROM Customer
WHERE Gender='F';
---Q3 From which city do we have the maximum number of customers and how many?
SELECT TOP 1
city_code, COUNT(CITY_CODE) AS MAX_CUSTOMER
FROM Customer
GROUP BY city_code
ORDER BY MAX_CUSTOMER DESC;
---Q4 How many sub-categories are there under the Books category?
SELECT 'BOOKS' AS CATEGORY, COUNT(PROD_SUBCAT) AS COUNT_OF_SUB_CAT_OF_BOOK
FROM
PROD_CAT_INFO
WHERE PROD_CAT LIKE 'BOO%';
---Q5 What is the maximum quantity of products ever ordered?
SELECT TOP 1
TA.prod_cat_code AS PRODUCT_CATEGORY_CODE, prod_cat AS PRODUCT_CATEGORY,
COUNT(cast(Qty as float)) AS MAX_QUANTITY
FROM TRANSACTIONS AS TA
LEFT JOIN PROD_CAT_INFO AS PCI ON TA.PROD_CAT_CODE=PCI.PROD_CAT_CODE AND
TA.prod_subcat_code=PCI.prod_sub_cat_code
GROUP BY TA.prod_cat_code,prod_cat
ORDER BY MAX_QUANTITY DESC;
---Q6 What is the net total revenue generated in categories Electronics and Books?
SELECT PCI.prod_cat AS PRODUCT_CATEGORY, SUM(CAST(TOTAL_AMT AS FLOAT)) AS TOTAL_REVENUE FROM
TRANSACTIONS AS TR
INNER JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
WHERE PCI.prod_cat = 'BOOKS' OR PCI.prod_cat= 'ELECTRONICS'
GROUP BY PCI.prod_cat;
---Q7 How many customers have >10 transactions with us, excluding returns?
SELECT CUST_ID AS CUSTOMER_ID, COUNT(total_amt) AS TOTAL_NUMBER_OF_TRANSACTIONS
FROM
Transactions
WHERE QTY>0
GROUP BY CUST_ID
HAVING COUNT(total_amt)>10;
---Q8 What is the combined revenue earned from the “Electronics” & “Clothing” categories, from “Flagship stores”?
SELECT
prod_cat AS PRODUCT_CATEGORY, SUM(CAST(TOTAL_AMT AS FLOAT)) AS TOTAL_AMT
FROM
Transactions AS TR
LEFT JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
WHERE Store_type = 'FLAGSHIP STORE' AND PROD_CAT IN ('CLOTHING', 'ELECTRONICS')
GROUP BY prod_cat
UNION ALL
SELECT 'GRAND TOTAL', SUM (TOTAL_AMT) FROM (SELECT
prod_cat, SUM(CAST(TOTAL_AMT AS FLOAT)) AS TOTAL_AMT
FROM
Transactions AS TR
LEFT JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
WHERE Store_type = 'FLAGSHIP STORE' AND PROD_CAT IN ('CLOTHING', 'ELECTRONICS')
GROUP BY prod_cat) AS T1 ;
---Q9 What is the total revenue generated from “Male” customers in “Electronics” category? Output should display total revenue by prod sub-cat.
SELECT GENDER, PROD_CAT, PROD_SUBCAT,SUM(CAST(TOTAL_AMT AS FLOAT))
AS TOTAL_REVENUE FROM Customer AS C
LEFT JOIN Transactions AS TR
ON C.customer_Id=TR.cust_id
INNER JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
WHERE GENDER = 'M' AND PROD_CAT='ELECTRONICS'
GROUP BY GENDER, PROD_CAT, PROD_SUBCAT;
---Q10 What is percentage of sales and returns by product sub category; display only top 5 sub categories in terms of sales?
SELECT TOP 5 prod_subcat AS PRODUCT_SUB_CATEGORY, SUM(CAST(TOTAL_AMT AS FLOAT)) AS TOTAL_SALES,
SUM(CAST(TOTAL_AMT AS FLOAT))/(SELECT SUM(CAST(TOTAL_AMT AS FLOAT)) FROM Transactions)
AS TOTAL_SALES_PERCENTAGE,
SUM(CAST(CASE WHEN QTY<0 THEN Qty END AS FLOAT)) / (SELECT SUM(CAST(Qty AS FLOAT)) FROM Transactions WHERE Qty<0)
AS TOTAL_RETURNS_PERCENTAGE
FROM
Transactions AS TR
LEFT JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
GROUP BY prod_subcat
ORDER BY TOTAL_SALES DESC;
---Q11 For all customers aged between 25 to 35 years find what is the net total revenue generated by these consumers in last 30 days of transactions from max transaction date available in the data?
SELECT CUSTOMER_ID, DATEDIFF(YEAR, CONVERT(DATE, DOB, 105), GETDATE()) AS CUST_AGE,
CONVERT(DATE, TRAN_DATE, 105) AS TRANSACTION_DATE,
SUM(CAST(TOTAL_AMT AS FLOAT)) AS TOTAL_SALES
FROM Customer AS C
LEFT JOIN Transactions AS TR
ON C.customer_Id=TR.cust_id
WHERE DATEDIFF(YEAR, CONVERT(DATE, DOB, 105), GETDATE()) BETWEEN 25 AND 35
AND
CONVERT(DATE, TRAN_DATE, 105) BETWEEN CONVERT(DATE, '1-11-2013', 105) AND
CONVERT(DATE, '30-11-2013', 105)
GROUP BY CUSTOMER_ID, DATEDIFF(YEAR, CONVERT(DATE, DOB, 105), GETDATE()),
CONVERT(DATE, TRAN_DATE, 105);
---Q12 Which product category has seen the max value of returns in the last 3 months of transactions?
SELECT TOP 1 PCI.prod_cat AS PRODUCT_CATEGORY,
SUM(CAST(TOTAL_AMT AS FLOAT)) AS TOTAL_VALUE_OF_RETURN
FROM
Transactions AS TR
INNER JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
WHERE Qty<0 AND CONVERT(DATE, TR.tran_date, 105) BETWEEN CONVERT(DATE, '1-09-2013', 105) AND
CONVERT(DATE, '30-11-2013', 105)
GROUP BY PCI.prod_cat
ORDER BY TOTAL_VALUE_OF_RETURN;
---Q13 Which store-type sells the maximum products; by value of sales amount and by quantity sold?
SELECT TOP 1 Store_type, SUM(CAST(TOTAL_AMT AS FLOAT)) AS TOTAL_SALE_AMT,
COUNT(prod_cat) AS QUANTITY_OF_SALE
FROM
Transactions AS TR
INNER JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
GROUP BY Store_type
ORDER BY TOTAL_SALE_AMT DESC, QUANTITY_OF_SALE DESC;
---Q14 What are the categories for which average revenue is above the overall average.
SELECT
prod_cat AS PRODUCT_CATEGORY, AVG(CAST(TOTAL_AMT AS FLOAT)) AS SALES_MORE_THAN_AVG
FROM
Transactions AS TR
INNER JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
GROUP BY prod_cat
HAVING AVG(CAST(TOTAL_AMT AS FLOAT)) >
(SELECT AVG(CAST(TOTAL_AMT AS FLOAT)) FROM Transactions);
---Q15 ind the average and total revenue by each subcategory for the categories which are among top 5 categories in terms of quantity sold.
SELECT * FROM (SELECT ROW_NUMBER() OVER(ORDER BY COUNT (prod_cat) DESC ) AS RNUM, prod_cat,
prod_subcat,
AVG(CAST(TOTAL_AMT AS FLOAT)) AS AVG_SALE,
SUM(CAST(TOTAL_AMT AS FLOAT)) AS TOTAL_SALE,
COUNT (prod_cat) AS TOTAL_QUANTITY
FROM
Transactions AS TR
INNER JOIN prod_cat_info AS PCI
ON TR.prod_cat_code=PCI.prod_cat_code AND TR.prod_subcat_code=PCI.prod_sub_cat_code
GROUP BY prod_cat, prod_subcat) AS T1 WHERE RNUM BETWEEN 1 AND 5;