编辑代码

CREATE DATABASE test;
use test;
CREATE TABLE user_log(
	imp_date int,
	userid varchar(255)
);

INSERT INTO user_log(imp_date,userid) VALUES
(20220121,'0001'),
(20220121,'0001'),
(20220121,'0002'),
(20220121,'0002'),
(20220121,'0003'),
(20220121,'0004'),
(20220122,'0001'),
(20220122,'0003'),
(20220127,'0002'),
(20220128,'0001'),
(20220226,'0001');


/*三、app 的用户访问表 user_log,表中有两个字段,分别是:
日期 imp_date、用户 ID userid,记录了用户当天的访问记录,多次访问会有多次记录。

1、请统计一个月内,每天的七日内留存率
说明:
某日的七日内留存率 = 该日访问用户在后续 7 天(不含当日)内继续访问的用户数 ÷ 该日访问用户数。*/

WITH 
daily_user AS (
    SELECT distinct imp_date,userid,
    STR_TO_DATE(CAST(imp_date AS CHAR), '%Y%m%d') AS dt
    FROM user_log
)


select t1.imp_date as base_date,
count(distinct t1.userid) as total_users,
count(distinct t2.userid) as retained_users,
round(
    if(count(distinct t1.userid)>0,
    count(distinct t2.userid)/count(distinct t1.userid),
    0), 4
    ) as retained_ratio
from daily_user t1
left join daily_user t2
on t1.userid = t2.userid
and t2.dt BETWEEN DATE_ADD(t1.dt, INTERVAL 1 DAY) AND DATE_ADD(t1.dt, INTERVAL 7 DAY)
group by t1.imp_date
order by t1.imp_date;

/*2、请统计一个月内,每天 dau 用户里的高中低活用户比例
说明:统计日的近 30 天里,用户访问 10 天以上的叫做高活,访问 5-10 天的叫做中活,访问 1-5 天的叫做低活*/

/*转换日期格式并且去重*/
with
user_visit as (
    select STR_TO_DATE(CAST(imp_date as CHAR), '%Y%m%d') as dte, userid
    from user_log
    group by imp_date,userid
),

/*计算每个用户在从有记录的天数起30天内的访问天数*/
user_activity as (
    select t1.dte, t1.userid, count(t2.dte) as active_days
    from user_visit t1
    left join user_visit t2
    on t1.userid = t2.userid and t2.dte BETWEEN date_sub(t1.dte, interval 29 day) and t1.dte
    group by t1.dte, t1.userid
),

daily_activity AS (
    SELECT 
        dte,
        COUNT(DISTINCT userid) AS dau,
        COUNT(DISTINCT CASE WHEN active_days > 10 THEN userid END) AS high_active,
        COUNT(DISTINCT CASE WHEN active_days BETWEEN 5 AND 10 THEN userid END) AS medium_active,
        COUNT(DISTINCT CASE WHEN active_days < 5 and active_days > 0 THEN userid END) AS low_active
    FROM user_activity
    GROUP BY dte
)

SELECT 
    dte,
    dau,
    high_active,
    medium_active,
    low_active,
    ROUND(high_active * 100.0 / dau, 2) AS high_active_pct,
    ROUND(medium_active * 100.0 / dau, 2) AS medium_active_pct,
    ROUND(low_active * 100.0 / dau, 2) AS low_active_pct
FROM daily_activity
ORDER BY dte;