01. Revenue
Revenue : 매출
# 7월동안 매출
select sum(price) as revenue
from tbl_puchase
where puchased_at between '2020-07-01' and '2020-07-31';
02. MAU(Monthly Active Users)
# 7월동안 방문한 고객의 수
select count(distinct customer_id)
from tbl.visit
where puchased_at between '2020-07-01' and '2020-07-31';
03. Paying Rate
구매유저 수 / 전체 활성유저
# 7월동안 구매한 고객의 비율
# 구매유저
select count(distinct customer_id)
from tbl_purchase
where puchased_at between '2020-07-01' and '2020-07-31';
# 활성유저
select count(distinct customer_id)
from tbl_visit
where visited_at between '2020-07-01' and '2020-07-31';
# 나온 결과로 그냥 나눈 값 출력
select round(11174 / 16414 * 100, 2)
04. ARPPU
한명당 월 평균 구매액
# 7월에 구매 유저의 월 평균 구매액
# 고객 당 구매금액 평균
selct avg(revenue) as ARPPU
from (select customer_id, avg(price) as revenue
from tbl_purchase
where puchased_at between '2020-07-01' and '2020-07-31'
group by customer_id ) a;
05. 고과금 유저
# 7월에 가장 많이 구매한 top3 고객과 top10~15 고객
select customer_id, avg(price) as revenue
from tbl_purchase
where puchased_at between '2020-07-01' and '2020-07-31'
group by 1
order by 2 desc
limit 3;
# top10~15
~ limit 9,6 # 9개를 넘기고 그 다음 6개를 출력
~ limit 3 offset 9 # 9개를 넘기고 그 다음 3개 출력
+ 날짜·시간별 함수
- select now()
- select current_date();
- select extract(month from '2025-02-12'); # 날짜의 month만 뽑아서 추출
- select day('2025-02-12'); # 날짜의 day만 뽑아서 추출
- select date_add('2025-02-12', intrval 7day); # 날짜에 7일 더하기
- select date_sub('2025-02-12', intrval 7day); # 날짜에 7일 빼기
- select datediff('2025-02-12', '2025-02-21'); # 두 날짜의 차이
- select timediff('2025-02-12 12:00:00', '2025-02-21 12:10:00'); # 두 날짜·시간의 차이
- select date_format(now(), '%Y-%m-%d %W');
06. DAU(Daily Active Users)
하루 활성 유저
# 7월의 평균 DAU, DAU가 증가하는 추세인가요 ?
select round(avg(users),2) as DAU
from ( select date_format(visited_at - interval 9 hour, '%Y-%m-%d') as date_at,
# 표준시간의 오차로 interval 9 해주기
count(distinct customer_id) as users
from tbl_visit
where visited_at between '2020-07-01' and '2020-07-31'
group by 1
order by 2 ) a;
07. WAU(Weekly Active Users)
# 7월의 WAU
select avg(users) as WAU
form ( select date_format(visited_at - interval 9 hour, '%Y-%m-%U') as date_at
count(distinct customer_id) as users
from tbl_visit
where visited_at between '2020-07-05' and '2020-07-25'
group by 1
order by 2) a;
08. Daily·Weekly Revenue
위의 코드에서 count를 sum(price) as revenue로 서브쿼리에서 구하고
avg(revenue)로 결과 도출하기
09. 요일별 Revenue
위의 코드에서 '%W', sum(price) as revenue로 서브쿼리에서 구하고
avg(revenue)로 결과 도출하기
%w : %W = 0 : Sunday ~ 7 : Saturday
10. 시간대별 Revenue
date_format 을 '%H'로 설정
11. 요일·시간대별 Revenue
select dayofweek_at, hour_at, avg(revenue)
from (
select date_format(purchased_at - interval 9 hour, '%Y-%m-%d') as date_at,
date_format(purchased_at - interval 9 hour, '%M') as dayofweek_at,
date_format(purchased_at - interval 9 hour, '%W') as hour_at,
sum(price) as revenue
from tbl_purchase
where visited_at between '2020-07-01' and '2020-07-31'
group by 1, 2, 3) a
group by 1,2
order by 3 desc
12. 그룹핑하여 집계
select case when length(gender) < 1 then 'Others'
else gender end as gender,
count(*)
from tbl_customer
group by 1;
13. 세그먼트별 분포
고객분류 - group by로 성별, 나이, 등의 여러 기준으로 고객 나누기
14. 일별 매출 증감률
# with 으로 select한 구문을 table처럼 사용 가능
with tbl_revenue as (
select date_format(purchased_at - interval 9 hour, '%Y-%m-%d') as d_date,
sum(price) as revenue
from tbl_purchase
where purchased_at between '2020-07-01' and '2020-07-31'
group by 1)
# revenue 를 d_date 기준으로 한칸씩 밀어쓰기
select lag(revenue) over (order by d_date asc)
from tbl_revenue;
# 어제 revenue에 오늘 revenue 빼기
# 어제의 revenue 대비 오늘의 revenue 차이의 비율 --> 증감률
select revenue - lag(revenue) over (order by d_date asc) as diff_revenue,
round(( revenue - lag(revenue) over (order by d_date asc)) /
lag(revenue) over (order by d_date asc)*100,2) as chg_revenue
from tbl_revenue;
17. 일별 고과금 유저
rank() over (partition by c1, c2 order by price desc)
select *
from (
select date_format(purchased_at - interval 9 hour, '%Y-%m-%d') as d_date,
customer_id, sum(price) as revenue,
dense_rank() over (partition by date_format(purchased_at - interval 9 hour, '%Y-%m-%d')
order by sum(price) desc) as rank_rev
from tbl_purchase
where purchased_at between '2020-07-01' and '2020-07-31'
group by 1, 2 ) a
where rank_rev < 4
18. 신규유저 Paying Conversion
# 7월이 신규유저가 하루 만에 결제로 넘어가는 비율이 어떻게 되나요?
# 결제까지 보통 몇 분 정도가 소요되는지 알고 싶어요.
with rt_tbl as (
select c.customer_id,
p.customer_id as paying_user,
time_to_sec(timediff(p.purchased_at, c.created_at)) / 3600 as diff_time
from tbl_customer c
left join (select customer_id, min(purchased_at) as purchased_at
from tbl_purchase group by 1) p
on c.customer_id = p.customer_id
and p.purchased_at < c.created_at + interval 1 day
where c.created_at between '2020-07-01' and '2020-07-31'
)
# 결과
select (select round(count(paying_user)/count(customer_id)*100,2) from rt_tbl) as purchase_with_1day,
(select avg(diff_time) from rt_tbl) as avg_purchase_time
from dual;
19. Retention
Retention : 시간이 지날수록 얼마나 많은 유저가 제품이나 서비스로 다시 돌아오는 지
# 우리 서비스는 유저의 재방문율이 높은 서비스인가요?
# 7월 기준 Day1 Retention이 어떤지 구하고, 추세를 보기 위해 Daily로 추출해주세요.
select date_format(a.visited_at - interval 9 hour, '%Y-%m-%d') as d_date,
count(distinct a.customer_id) as active_user,
count(distinct b.customer_id) as retained_user,
count(distinct b.customer_id)/count(distinct a.customer_id) as retention
from tbl_visit a left join tbl_visit b on a.customer_id = b.customer_id
and date_format(a.visited_at-interval 9 hour,'%Y-%m-%d')=
date_format(b.visited_at-interval 9 hour - interval 1 day,'%Y-%m-%d') # b는 어제 데이터가 담기게
where a.visited_at between '2020-07-01' and '2020-07-31'
group by 1;
20. 가입기간별 DAU
# 우리 서비스는 신규유저가 많나요? 기존유저가 많나요?
# 가입기간별로 고객 분포가 어떤지 알려주세요, DAU 기준으로 부탁합니다
with tbl_visit_by_joined as (
select date_format(visited_at - interval 9 hour, '%Y-%m-%d') as d_date,
a.customer_id,
b.created_at as d_joined,
max(a.visited_at) as last_visit,
datediff(max(a.visited_at), b.created_at) as date_diff
from tbl_visit a
left join tbl_customer b on a.customer_id = b.customer_id
where a.visited_at between '2020-07-01' and '2020-07-31'
group by 1, 2, 3
)
select a.d_date,
case when a.date_diff >= 730 then '2년 이상'
when a.date_diff >= 365 then '1년 이상'
when a.date_diff >= 183 then '6개월 이상'
when a.date_diff >= 91 then '3개월 이상'
when a.date_diff >= 30 then '1개월 이상'
else '1개월 미만'
end as segment,
b.all_users,
count(a.customer_id) as users,
round(count(a.customer_id)/b.all_users*100,2) as ratio
from tbl_visit_by_joined a
left join (select d_date, count(customer_id) as all_users
from tbl_visit_by_joined group by 1) b
on a.d_date=b.d_date
group by 1,2,3
order by 1,2;
+ 가운데 점 삽입 : alt+0183
'패스트캠퍼스' 카테고리의 다른 글
[패스트캠퍼스] 데이터 분석 부트캠프 17기 - 12주차 (1) Tableau - 측정값 (0) | 2025.03.07 |
---|---|
[패스트캠퍼스] 데이터 분석 부트캠프 17기 - 9주차 (4) MySQL hakerrank (0) | 2025.02.14 |
[패스트캠퍼스] 데이터 분석 부트캠프 17기 - 9주차 (2) MySQL 서브쿼리 (0) | 2025.02.07 |
[패스트캠퍼스] 데이터 분석 부트캠프 17기 - 9주차 (1) MySQL JOIN (0) | 2025.02.04 |
[패스트캠퍼스] 데이터 분석 부트캠프 17기 - 8주차 (3) MySQL 조건,고급 (0) | 2025.02.04 |