CREATE DATABASE test;
use test;
CREATE TABLE user_log(
imp_date int,
userid varchar(255)
);
INSERT INTO user_log(imp_date,userid) VALUES
(20220121,'0001'),
(20220121,'0001'),
(20220121,'0002'),
(20220121,'0002'),
(20220121,'0003'),
(20220121,'0004'),
(20220122,'0001'),
(20220122,'0003'),
(20220127,'0002'),
(20220128,'0001'),
(20220226,'0001');
WITH
daily_user AS (
SELECT distinct imp_date,userid,
STR_TO_DATE(CAST(imp_date AS CHAR), '%Y%m%d') AS dt
FROM user_log
)
select t1.imp_date as base_date,
count(distinct t1.userid) as total_users,
count(distinct t2.userid) as retained_users,
round(
if(count(distinct t1.userid)>0,
count(distinct t2.userid)/count(distinct t1.userid),
0), 4
) as retained_ratio
from daily_user t1
left join daily_user t2
on t1.userid = t2.userid
and t2.dt BETWEEN DATE_ADD(t1.dt, INTERVAL 1 DAY) AND DATE_ADD(t1.dt, INTERVAL 7 DAY)
group by t1.imp_date
order by t1.imp_date;
with
user_visit as (
select STR_TO_DATE(CAST(imp_date as CHAR), '%Y%m%d') as dte, userid
from user_log
group by imp_date,userid
),
user_activity as (
select t1.dte, t1.userid, count(t2.dte) as active_days
from user_visit t1
left join user_visit t2
on t1.userid = t2.userid and t2.dte BETWEEN date_sub(t1.dte, interval 29 day) and t1.dte
group by t1.dte, t1.userid
),
daily_activity AS (
SELECT
dte,
COUNT(DISTINCT userid) AS dau,
COUNT(DISTINCT CASE WHEN active_days > 10 THEN userid END) AS high_active,
COUNT(DISTINCT CASE WHEN active_days BETWEEN 5 AND 10 THEN userid END) AS medium_active,
COUNT(DISTINCT CASE WHEN active_days < 5 and active_days > 0 THEN userid END) AS low_active
FROM user_activity
GROUP BY dte
)
SELECT
dte,
dau,
high_active,
medium_active,
low_active,
ROUND(high_active * 100.0 / dau, 2) AS high_active_pct,
ROUND(medium_active * 100.0 / dau, 2) AS medium_active_pct,
ROUND(low_active * 100.0 / dau, 2) AS low_active_pct
FROM daily_activity
ORDER BY dte;