-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path04_rfm_analysis.sql
More file actions
133 lines (104 loc) · 3.68 KB
/
04_rfm_analysis.sql
File metadata and controls
133 lines (104 loc) · 3.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
--
-- CANNOT FORGET THAT RECENCY HAS A INVERTED SCALE
CREATE OR REPLACE VIEW rfm_scores as
WIth rfm_base as (
Select
customerkey,
recency_days,
order_count,
lifetime_revenue
from customer_summary
),
rfm_ranks as (
select
rb.*,
percent_rank() over (order by recency_days ASC) as recency_pct,
percent_rank() over (order by order_count DESC) as frequency_pct,
percent_rank() over (order by lifetime_revenue DESC) as monetary_pct
from rfm_base as rb
),
rfm_scored as (
select
customerkey,
recency_days,
order_count,
lifetime_revenue,
CASE
when recency_pct < 0.20 then 5
when recency_pct < 0.40 then 4
when recency_pct < 0.60 then 3
when recency_pct < 0.80 then 2
else 1
end as recency_score,
CASE
WHEN order_count = 1 THEN 1
WHEN order_count = 2 THEN 2
WHEN order_count = 3 THEN 3
WHEN order_count = 4 THEN 4
ELSE 5
END AS frequency_score,
CASE
WHEN monetary_pct < 0.20 THEN 5
WHEN monetary_pct < 0.40 THEN 4
WHEN monetary_pct < 0.60 THEN 3
WHEN monetary_pct < 0.80 THEN 2
ELSE 1
END AS monetary_score
FROM rfm_ranks
)
select
customerkey,
recency_days,
order_count,
lifetime_revenue,
recency_score,
frequency_score,
monetary_score,
concat(recency_score,frequency_score,monetary_score) as rfm_score,
(recency_score+frequency_score+monetary_score) as rfm_total_score
from rfm_scored
order by customerkey
---------
-- get the metric boundaries:
SELECT
-- recency boundaries (ascending, so low value = score 5)
PERCENTILE_CONT(0.20) WITHIN GROUP (ORDER BY recency_days ASC) AS recency_p20,
PERCENTILE_CONT(0.40) WITHIN GROUP (ORDER BY recency_days ASC) AS recency_p40,
PERCENTILE_CONT(0.60) WITHIN GROUP (ORDER BY recency_days ASC) AS recency_p60,
PERCENTILE_CONT(0.80) WITHIN GROUP (ORDER BY recency_days ASC) AS recency_p80,
-- frequency boundaries (descending, so high value = score 5)
PERCENTILE_CONT(0.20) WITHIN GROUP (ORDER BY order_count DESC) AS frequency_p20,
PERCENTILE_CONT(0.40) WITHIN GROUP (ORDER BY order_count DESC) AS frequency_p40,
PERCENTILE_CONT(0.60) WITHIN GROUP (ORDER BY order_count DESC) AS frequency_p60,
PERCENTILE_CONT(0.80) WITHIN GROUP (ORDER BY order_count DESC) AS frequency_p80,
-- monetary boundaries (descending, so high value = score 5)
PERCENTILE_CONT(0.20) WITHIN GROUP (ORDER BY lifetime_revenue DESC) AS monetary_p20,
PERCENTILE_CONT(0.40) WITHIN GROUP (ORDER BY lifetime_revenue DESC) AS monetary_p40,
PERCENTILE_CONT(0.60) WITHIN GROUP (ORDER BY lifetime_revenue DESC) AS monetary_p60,
PERCENTILE_CONT(0.80) WITHIN GROUP (ORDER BY lifetime_revenue DESC) AS monetary_p80
FROM customer_summary;
/* results:
"recency_p20","recency_p40","recency_p60","recency_p80","frequency_p20","frequency_p40","frequency_p60","frequency_p80","monetary_p20","monetary_p40","monetary_p60","monetary_p80"
268.2000000000007,542,858,1810,2,2,1,1,6652.996374928259,3415.654995870336,1701.258304,606.9617839999996
*/
-- number of customers by order count
SELECT
order_count,
COUNT(*) AS num_customers,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS pct_customers
FROM customer_summary
GROUP BY order_count
ORDER BY order_count;
/* Results:
"order_count","num_customers","pct_customers"
"1","27541","55.65"
"2","13914","28.12"
"3","5400","10.91"
"4","1873","3.78"
"5","559","1.13"
"6","145","0.29"
"7","39","0.08"
"8","14","0.03"
"9","1","0.00"
"10","1","0.00"
*/