- 골치아픈 Case
- I. Skewed Data
- I.1. 테스트 데이터 생성
- I.2. 통계정보 확인
- I.3. 테스트
골치아픈 Case
구분 | 설명 |
---|
Skewed Data | - Oracle은 값에 따른 Data 분포의 편차가 큰 경우 잘 해석하지 못함.
- Skew 문제를 해결할 수 있는 유일한 방법은 Histogram
- 하지만 Histogram으로도 100% 해결할 수 없는 Case가 있음
|
Correlated Columns | - Oracle이 Cardinality를 계산할 때 사용하는 공식들은 Column들이 서로 독립적인것을 전제로 함.
- Oracle은 이 가정에 위배되는 Data가 존재하는 경우 잘 해석하지 못함.
|
Join Cardinality | - Skewed Data를 잘 해석하지 못하는 것처럼 Join시에도 Join Column들의 Data 편차가 있을 경우
잘 해석하지 못함.
|
Partition Key | - Partition Table에 대한 Cardinality 예측 또한 예상치 못한 결과를 보임
- Partition Elimination이 가능한 경우와 불가능한 경우 많은 차이를 보이며, 가능하더라도 단일 Partition
을 Access 하는 경우와 다중 Partiton을 Access 하는 경우에 다른 Patten의 Cardanality 계산이 이루어짐
|
I. Skewed Data
I.1. 테스트 데이터 생성
-- create object
DROP TABLE T1 PURGE;
CREATE TABLE T1(C1 VARCHAR2(1), C2 INT);
CREATE INDEX T1_N1 ON T1(C1);
-- create data
INSERT INTO T1
SELECT 'A',
LEVEL
FROM DUAL
CONNECT BY LEVEL <= 10000
UNION ALL
SELECT 'B', LEVEL
FROM DUAL
CONNECT BY LEVEL <= 10
;
COMMIT;
-- gather stats "without" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T1', METHOD_OPT => 'FOR ALL COLUMNS SIZE 1', NO_INVALIDATE => FALSE);
I.2. 통계정보 확인
-- Table
SELECT TABLE_NAME,
NUM_ROWS,
BLOCKS,
SAMPLE_SIZE,
TO_CHAR(LAST_ANALYZED, 'YYYY/MM/DD HH24:MI:SS') AS LAST_ANAL
FROM USER_TAB_STATISTICS
WHERE TABLE_NAME = UPPER('T1')
;
TABLE_NAME | NUM_ROWS | BLOCKS | SAMPLE_SIZE | LAST_ANAL |
---|
T1 | 10010 | 28 | 10010 | 2009-03-01 1:43:32 PM |
-- Column
SELECT S.TABLE_NAME,
S.COLUMN_NAME,
S.NUM_DISTINCT,
S.NUM_NULLS,
S.DENSITY,
S.LOW_VALUE,
S.HIGH_VALUE,
S.HISTOGRAM
FROM USER_TAB_COLS S
WHERE S.TABLE_NAME = UPPER('T1')
;
TABLE_NAME | COLUMN_NAME | NUM_DISTINCT | NUM_NULLS | DENSITY | LOW_VALUE | HIGH_VALUE | HISTOGRAM | T1 | C2 | 10000 | 0 | 0.0001 | C102 | C302 | NONE |
---|
T1 | C1 | 2 | 0 | 0.5 | 41 | 42 | NONE |
-- Histogram
SELECT TABLE_NAME,
COLUMN_NAME,
ENDPOINT_NUMBER,
ENDPOINT_VALUE || '(' || ENDPOINT_ACTUAL_VALUE || ')' AS ENDPOINT_VALUE
FROM USER_TAB_HISTOGRAMS
WHERE TABLE_NAME = UPPER('T1')
ORDER BY COLUMN_NAME,
ENDPOINT_NUMBER
;
TABLE_NAME | COLUMN_NAME | ENDPOINT_NUMBER | ENDPOINT_VALUE |
---|
T1 | C1 | 0 | 337499295804764000000000000000000000() |
T1 | C1 | 1 | 342691592663299000000000000000000000() |
T1 | C2 | 0 | 1() |
T1 | C2 | 1 | 10000() |
I.3. 테스트
1) C1 Columns 'A' Cardanality
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1
WHERE C1 = 'A'
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 31 |
|* 2 | TABLE ACCESS FULL| T1 | 1 | 10000 | 9 (0)| 10000 |00:00:00.01 | 31 |
--------------------------------------------------------------------------------------------------
2) C1 Columns 'B' Cardanality
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1
WHERE C1 = 'B'
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 31 |
|* 2 | TABLE ACCESS FULL| T1 | 1 | 5005 | 9 (0)| 10 |00:00:00.01 | 31 |
--------------------------------------------------------------------------------------------------
3) gather stats "with" histogram(SKEWONLY : 데이터 분산도에 따라 생성 결정)
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T1', METHOD_OPT => 'FOR ALL COLUMNS SIZE SKEWONLY', NO_INVALIDATE => FALSE);
-- Histogram
SELECT S.TABLE_NAME,
S.COLUMN_NAME,
S.NUM_DISTINCT,
S.NUM_NULLS,
S.DENSITY,
S.LOW_VALUE,
S.HIGH_VALUE,
S.HISTOGRAM
FROM USER_TAB_COLS S
WHERE S.TABLE_NAME = UPPER('T1')
;
TABLE_NAME | COLUMN_NAME | NUM_DISTINCT | NUM_NULLS | DENSITY | LOW_VALUE | HIGH_VALUE | HISTOGRAM | T1 | C2 | 10000 | 0 | 0.0001 | C102 | C302 | NONE |
---|
T1 | C1 | 2 | 0 | 00004995004995005 | 41 | 42 | FREQUENCY |
4) C1 Columns 'A' Cardanality with histogram
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1
WHERE C1 = 'A'
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 31 |
|* 2 | TABLE ACCESS FULL| T1 | 1 | 10000 | 9 (0)| 10000 |00:00:00.01 | 31 |
--------------------------------------------------------------------------------------------------
;
5) C1 Columns 'B' Cardanality with histogram
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1
WHERE C1 = 'B'
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
--------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 2 |
|* 2 | INDEX RANGE SCAN| T1_N1 | 1 | 10 | 1 (0)| 10 |00:00:00.01 | 2 |
--------------------------------------------------------------------------------------------------
;