编辑代码

-- 1️⃣ 创建男性用户的家庭网格信息表,仅限 2020 年深圳市
DROP TABLE IF EXISTS male_home_grid_2020;

CREATE TABLE male_home_grid_2020 AS
SELECT uid, age, province, city, date, home_grid_id
FROM (
    SELECT 
        ua.uid, 
        ua.age, 
        ua.province, 
        ua.city, 
        CAST(SUBSTRING(ua.date, 1, 6) AS INT) AS date,  -- 统一 `date` 格式为 `yyyyMM`
        sp.final_grid_id AS home_grid_id,
        ROW_NUMBER() OVER (PARTITION BY ua.uid, ua.date ORDER BY sp.final_grid_id) AS row_num
    FROM user_attribute ua
    JOIN stay_poi sp 
    ON ua.uid = sp.uid
    WHERE ua.gender = '01'  -- 只提取男性
    AND sp.ptype = 1  -- 只保留居住地
    AND ua.province = '广东省'  -- 只保留广东省
    AND ua.city IN ('深圳', '深圳市')  -- 兼容 `深圳` 或 `深圳市`
    AND CAST(SUBSTRING(ua.date, 1, 6) AS INT) BETWEEN 202001 AND 202012  -- 只保留 2020 年数据
) t
WHERE row_num = 1;  -- 只保留每个 `uid + date` 的第一条数据

-- 2️⃣ 确保 `male_home_grid_2020` 有数据
SELECT COUNT(*) FROM male_home_grid_2020;

-- 3️⃣ 计算深圳市 2020 年相邻月份的迁居信息
DROP TABLE IF EXISTS male_user_relocation_2020;

CREATE TABLE male_user_relocation_2020 AS
SELECT 
    a.uid,  
    a.age, 
    a.province, 
    a.city,
    a.date AS current_month, 
    a.home_grid_id AS current_grid,
    b.date AS next_month, 
    b.home_grid_id AS next_grid
FROM male_home_grid_2020 a
JOIN male_home_grid_2020 b
ON a.uid = b.uid  
AND a.province = b.province
AND a.city = b.city
AND b.date = a.date + 1  -- 确保 `date` 计算正确
WHERE a.home_grid_id <> b.home_grid_id;  -- 只保留发生迁居的用户

-- 4️⃣ 确保 `male_user_relocation_2020` 有数据
SELECT COUNT(*) FROM male_user_relocation_2020;

-- 5️⃣ 统计深圳市 2020 年的迁居情况,按网格聚合
DROP TABLE IF EXISTS shenzhen_grid_migration_2020;

CREATE TABLE shenzhen_grid_migration_2020 AS
SELECT 
    current_grid, 
    next_grid, 
    COUNT(DISTINCT uid) AS num_migrated_users  -- 统计唯一用户数
FROM male_user_relocation_2020
GROUP BY current_grid, next_grid;

-- 6️⃣ 确保 `shenzhen_grid_migration_2020` 有数据
SELECT COUNT(*) FROM shenzhen_grid_migration_2020;

-- 7️⃣ 查询深圳市 2020 年的总迁居人数
SELECT SUM(num_migrated_users) AS total_migrations FROM shenzhen_grid_migration_2020;


-- 1️⃣ 创建男性用户的家庭网格信息表,仅限 2020 年深圳市
DROP TABLE IF EXISTS male_home_grid_2020;

CREATE TABLE male_home_grid_2020 AS
SELECT uid, age, province, city, date, home_grid_id
FROM (
    SELECT 
        ua.uid, 
        ua.age, 
        ua.province, 
        ua.city, 
        CAST(SUBSTRING(ua.date, 1, 6) AS INT) AS date,  -- 统一 `date` 格式为 `yyyyMM`
        sp.final_grid_id AS home_grid_id,
        ROW_NUMBER() OVER (PARTITION BY ua.uid, ua.date ORDER BY sp.final_grid_id) AS row_num
    FROM user_attribute ua
    JOIN stay_poi sp 
    ON ua.uid = sp.uid
    WHERE ua.gender IN ('01', '1')  -- 兼容不同格式的性别字段
    AND sp.ptype = 1  -- 只保留居住地
    AND ua.province = '广东省'  -- 只保留广东省
    AND ua.city IN ('深圳', '深圳市')  -- 兼容 `深圳` 或 `深圳市`
    AND CAST(SUBSTRING(ua.date, 1, 6) AS INT) BETWEEN 202001 AND 202012  -- 只保留 2020 年数据
) t
WHERE row_num = 1;  -- 只保留每个 `uid + date` 的第一条数据

-- 2️⃣ 确保 `male_home_grid_2020` 有数据
SELECT COUNT(*) FROM male_home_grid_2020;

-- 3️⃣ 计算深圳市 2020 年相邻月份的迁居信息
DROP TABLE IF EXISTS male_user_relocation_2020;

CREATE TABLE male_user_relocation_2020 AS
SELECT 
    a.uid,  
    a.age, 
    a.province, 
    a.city,
    a.date AS current_month, 
    a.home_grid_id AS current_grid,
    b.date AS next_month, 
    b.home_grid_id AS next_grid
FROM male_home_grid_2020 a
JOIN male_home_grid_2020 b
ON a.uid = b.uid  
AND a.province = b.province
AND a.city = b.city
AND CAST(b.date AS INT) = CAST(a.date AS INT) + 1  -- 确保 `date` 计算正确
WHERE a.home_grid_id <> b.home_grid_id;  -- 只保留发生迁居的用户

-- 4️⃣ 确保 `male_user_relocation_2020` 有数据
SELECT COUNT(*) FROM male_user_relocation_2020;

-- 5️⃣ 统计深圳市 2020 年的迁居情况,按网格聚合
DROP TABLE IF EXISTS shenzhen_grid_migration_2020;

CREATE TABLE shenzhen_grid_migration_2020 AS
SELECT 
    current_grid, 
    next_grid, 
    COUNT(DISTINCT uid) AS num_migrated_users  -- 统计唯一用户数
FROM male_user_relocation_2020
GROUP BY current_grid, next_grid;

-- 6️⃣ 确保 `shenzhen_grid_migration_2020` 有数据
SELECT COUNT(*) FROM shenzhen_grid_migration_2020;

-- 7️⃣ 查询深圳市 2020 年的总迁居人数
SELECT SUM(num_migrated_users) AS total_migrations FROM shenzhen_grid_migration_2020;