안녕하세요
다시 sql퍼포먼스 관련 고견을 구합니다.
다음과 같은 데이타구조에서 전후 레코드를 비교하여 최대 체재시간이 30분 이상인 경우에 플래그를 설정하는 sql입니다만
대상 테이블이 4억건 이상 있는 테이블입니다. sql만 놓고 보았을 때 성능향상을 위한 개선의 여지가 있는지 봐주시면 감사하겠습니다.
입력이미지
user_id | location_datetime | geocode |
A1 | 2017/4/1 01:14:38 | 12345678 |
A1 | 2017/4/1 01:15:06 | 12345678 |
A1 | 2017/4/1 01:31:39 | 12345678 |
A1 | 2017/4/1 11:19:24 | 12345678 |
A2 | 2017/4/1 13:28:15 | 23456789 |
A2 | 2017/4/1 13:35:10 | 23456789 |
A2 | 2017/4/1 19:56:18 | 34567890 |
A2 | 2017/4/1 19:56:46 | 34567890 |
A3 | 2017/4/1 07:17:21 | 12345678 |
A3 | 2017/4/1 07:22:14 | 12345678 |
A3 | 2017/4/1 08:42:18 | 12345678 |
A3 | 2017/4/1 08:47:20 | 12345678 |
A3 | 2017/4/1 08:52:20 | 12345678 |
A3 | 2017/4/1 08:57:20 | 12345678 |
A3 | 2017/4/1 09:07:21 | 12345678 |
출력이미지
popinfo_id | location_datetime_kr | geocode | stay_time_max | flag_stay_prot |
A1 | 2017/4/1 10:14:38 | 12345678 | 10:04:46 | 1 |
A1 | 2017/4/1 10:15:06 | 12345678 | 10:04:46 | 1 |
A1 | 2017/4/1 10:31:39 | 12345678 | 10:04:46 | 1 |
A1 | 2017/4/1 20:19:24 | 12345678 | 10:04:46 | 1 |
A2 | 2017/4/1 22:28:15 | 23456789 | 00:06:55 | 0 |
A2 | 2017/4/1 22:35:10 | 23456789 | 00:06:55 | 0 |
A2 | 2017/4/2 04:56:18 | 34567890 | 00:00:28 | 0 |
A2 | 2017/4/2 04:56:46 | 34567890 | 00:00:28 | 0 |
A3 | 2017/4/1 16:17:21 | 12345678 | 01:50:00 | 1 |
A3 | 2017/4/1 16:22:14 | 12345678 | 01:50:00 | 1 |
A3 | 2017/4/1 17:42:18 | 12345678 | 01:50:00 | 1 |
A3 | 2017/4/1 17:47:20 | 12345678 | 01:50:00 | 1 |
A3 | 2017/4/1 17:52:20 | 12345678 | 01:50:00 | 1 |
A3 | 2017/4/1 17:57:20 | 12345678 | 01:50:00 | 1 |
A3 | 2017/4/1 18:07:21 | 12345678 | 01:50:00 | 1 |
그리고 실행SQL은 다음과 같습니다.
WITH t1 AS
(
SELECT
(location_datetime + interval '9 hours') AS location_datetime_kr
,user_id as popinfo_id
--위도/경도에서 구한 1Km범위의단위코드
,geocode
FROM
locations_info
WHERE
(location_datetime + interval '9 hours') BETWEEN '2016-01-01 00:00:00' and '2016-12-31 23:59:59'
)
, t2 AS
(
SELECT
popinfo_id
, location_datetime_kr
, geocode
--앞레코드 비교
, CASE
-- 앞레코드와 같은 user에 같은 지역(같은 geocode)의 경우
WHEN(
popinfo_id = LAG(popinfo_id, 1, '') OVER (ORDER BY popinfo_id, location_datetime_kr)
AND geocode = LAG(geocode, 1, '') OVER (ORDER BY popinfo_id, location_datetime_kr)
)
THEN LAG(location_datetime_kr, 1, null) OVER (ORDER BY popinfo_id, location_datetime_kr)
ELSE null
END AS w_lag
-- (현재레코드 location_datetime_kr)-(앞레코드 location_datetime_kr)
, CASE
-- 앞레코드와 같은 user에 같은 지역(같은 geocode)의 경우
WHEN(
popinfo_id = LAG(popinfo_id, 1, '') OVER (ORDER BY popinfo_id, location_datetime_kr)
AND geocode = LAG(geocode, 1, '') OVER (ORDER BY popinfo_id, location_datetime_kr)
)
THEN location_datetime_kr - LAG(location_datetime_kr, 1, null) OVER (ORDER BY popinfo_id, location_datetime_kr)
ELSE null
END AS w_time_sub
-- user와 geocode 단위로 선두에 flg설정
, CASE
-- 앞레코드와 user 또는 지역(geocode)이 다른 경우
WHEN(
popinfo_id <> LAG(popinfo_id, 1, '') OVER (ORDER BY popinfo_id, location_datetime_kr)
OR geocode <> LAG(geocode, 1, '') OVER (ORDER BY popinfo_id, location_datetime_kr)
)
THEN 1
ELSE 0
END AS w_num1
FROM
t1
ORDER BY
popinfo_id, location_datetime_kr
)
, t3 AS
(
SELECT
popinfo_id
, location_datetime_kr
, geocode
, w_num1
, w_time_sub
-- user와 geocode 단위로 넘버링
, sum(w_num1) OVER (ORDER BY popinfo_id, location_datetime_kr) AS w_num2
FROM
t2
)
, t4 AS
(
SELECT
popinfo_id
, location_datetime_kr
, geocode
, w_num1
, w_num2
-- user와 geocode단위로 레코드간 체재시간을 표시
, SUM(w_time_sub)
OVER (PARTITION BY w_num2
ORDER BY popinfo_id, location_datetime_kr
) AS stay_time_accum
FROM
t3
)
SELECT
popinfo_id
, location_datetime_kr
, geocode
-- 최대 체재시간
, MAX(stay_time_accum)
OVER(PARTITION BY w_num2) AS stay_time_max
-- 최대 체재시간이 30분이상인 경우
, CASE
WHEN(
MAX(stay_time_accum) OVER(PARTITION BY w_num2) >= '00:30:00'
)
THEN 1
ELSE 0
END AS flg_stay_prot
FROM
t4
컬럼을 가공하여 조건값과 비교하지 마세요.
컬럼은 그대로 두고 조건값을 가공하여 비교하세요.
WITH pop_locations AS ( SELECT 'A1' user_id, '2017/4/1 01:14:38'::timestamp location_datetime, 12345678 geocode UNION ALL SELECT 'A1', '2017/4/1 01:15:06', 12345678 UNION ALL SELECT 'A1', '2017/4/1 01:31:39', 12345678 UNION ALL SELECT 'A1', '2017/4/1 11:19:24', 12345678 UNION ALL SELECT 'A2', '2017/4/1 13:28:15', 23456789 UNION ALL SELECT 'A2', '2017/4/1 13:35:10', 23456789 UNION ALL SELECT 'A2', '2017/4/1 19:56:18', 34567890 UNION ALL SELECT 'A2', '2017/4/1 19:56:46', 34567890 UNION ALL SELECT 'A3', '2017/4/1 07:17:21', 12345678 UNION ALL SELECT 'A3', '2017/4/1 07:22:14', 12345678 UNION ALL SELECT 'A3', '2017/4/1 08:42:18', 12345678 UNION ALL SELECT 'A3', '2017/4/1 08:47:20', 12345678 UNION ALL SELECT 'A3', '2017/4/1 08:52:20', 12345678 UNION ALL SELECT 'A3', '2017/4/1 08:57:20', 12345678 UNION ALL SELECT 'A3', '2017/4/1 09:07:21', 12345678 ) SELECT user_id , location_datetime_kr , geocode , stay_time_max , CASE WHEN stay_time_max >= '00:30:00' THEN 1 ELSE 0 END flg_stay_prot FROM (SELECT user_id , location_datetime_kr , geocode , MAX(location_datetime_kr) OVER(PARTITION BY user_id, gb) - MIN(location_datetime_kr) OVER(PARTITION BY user_id, gb) stay_time_max FROM (SELECT user_id , location_datetime + INTERVAL '9 hours' location_datetime_kr , geocode , ROW_NUMBER() OVER(PARTITION BY user_id ORDER BY location_datetime) - ROW_NUMBER() OVER(PARTITION BY user_id, geocode ORDER BY location_datetime) gb FROM pop_locations WHERE location_datetime BETWEEN '2017-01-01 00:00:00'::timestamp - INTERVAL '9 hours' AND '2017-12-31 23:59:59'::timestamp - INTERVAL '9 hours' ) a ) a ;