编辑代码

DROP TABLE IF EXISTS sz_user_home_location_2020;
CREATE TABLE sz_user_home_location_2020 AS
SELECT
    ua.uid,
    ua.gender,
    ua.age,
    ua.province,
    ua.city,
    DATE_FORMAT(FROM_UNIXTIME(UNIX_TIMESTAMP(CAST(sp.date AS STRING), 'yyyyMMdd')), 'yyyyMM') AS month,
    sp.weighted_centroid_lat AS home_lat,
    sp.weighted_centroid_lon AS home_lon
FROM user_attribute ua
JOIN stay_poi sp
ON ua.uid = sp.uid
WHERE ua.city = 'V0440300' 
AND sp.city = 'V0440300'
AND sp.ptype = 1  
AND sp.date BETWEEN 20200101 AND 20201231;
DROP TABLE IF EXISTS sz_user_relocation_2020;
CREATE TABLE sz_user_relocation_2020 AS
SELECT 
    a.uid, a.gender, a.age, a.province, a.city,
    a.month AS current_month, a.home_lat AS current_lat, a.home_lon AS current_lon,
    b.month AS next_month, b.home_lat AS next_lat, b.home_lon AS next_lon,
    SQRT(POW((COALESCE(a.home_lat, 0) - COALESCE(b.home_lat, 0)) * 111, 2) + 
         POW((COALESCE(a.home_lon, 0) - COALESCE(b.home_lon, 0)) * 111, 2)) AS move_distance_km
FROM sz_user_home_location_2020 a
JOIN sz_user_home_location_2020 b
ON a.uid = b.uid 
AND (CAST(a.month AS INT) DIV 100 * 100 + (CAST(a.month AS INT) % 100 + 1)) = CAST(b.month AS INT)
WHERE 
    (a.home_lat <> b.home_lat OR a.home_lon <> b.home_lon)
    AND SQRT(POW((a.home_lat - b.home_lat) * 111, 2) + POW((a.home_lon - b.home_lon) * 111, 2)) > 1;
SELECT gender, age, province, city, current_month, next_month, move_distance_km 
FROM sz_user_relocation_2020 
LIMIT 10;
SELECT COUNT(*) AS total_records, COUNT(DISTINCT uid) AS unique_users
FROM sz_user_relocation_2020;