DROP TABLE IF EXISTS sz_user_home_location_2020;
CREATE TABLE sz_user_home_location_2020 AS
SELECT
ua.uid,
ua.gender,
ua.age,
ua.province,
ua.city,
DATE_FORMAT(FROM_UNIXTIME(UNIX_TIMESTAMP(CAST(sp.date AS STRING), 'yyyyMMdd')), 'yyyyMM') AS month,
sp.weighted_centroid_lat AS home_lat,
sp.weighted_centroid_lon AS home_lon
FROM user_attribute ua
JOIN stay_poi sp
ON ua.uid = sp.uid
WHERE ua.city = 'V0440300'
AND sp.city = 'V0440300'
AND sp.ptype = 1
AND sp.date BETWEEN 20200101 AND 20201231;
DROP TABLE IF EXISTS sz_user_relocation_2020;
CREATE TABLE sz_user_relocation_2020 AS
SELECT
a.uid, a.gender, a.age, a.province, a.city,
a.month AS current_month, a.home_lat AS current_lat, a.home_lon AS current_lon,
b.month AS next_month, b.home_lat AS next_lat, b.home_lon AS next_lon,
SQRT(POW((COALESCE(a.home_lat, 0) - COALESCE(b.home_lat, 0)) * 111, 2) +
POW((COALESCE(a.home_lon, 0) - COALESCE(b.home_lon, 0)) * 111, 2)) AS move_distance_km
FROM sz_user_home_location_2020 a
JOIN sz_user_home_location_2020 b
ON a.uid = b.uid
AND (CAST(a.month AS INT) DIV 100 * 100 + (CAST(a.month AS INT) % 100 + 1)) = CAST(b.month AS INT)
WHERE
(a.home_lat <> b.home_lat OR a.home_lon <> b.home_lon)
AND SQRT(POW((a.home_lat - b.home_lat) * 111, 2) + POW((a.home_lon - b.home_lon) * 111, 2)) > 1;
SELECT gender, age, province, city, current_month, next_month, move_distance_km
FROM sz_user_relocation_2020
LIMIT 10;
SELECT COUNT(*) AS total_records, COUNT(DISTINCT uid) AS unique_users
FROM sz_user_relocation_2020;