목차
I. 통계정보 조작
II. CARDINALITY Hint
III. OPT_ESTIMATE Hint
IV. Dynamic Sampling
I. 통계정보 조작
I.1. 개념 설명
- 통계정보를 조작하여 Cardinality를 제어하는 기법
- Cardinality는 Selectivity에 의해 결정되고, Selectivity는 주로 Density에 의해 결정됨
- 따라서, Density를 조작할 수 있으면 Cardinality를 조작할 수 있으며 Histogram 또한 마찬가지임
I.2. 테스트 Object 생성
DROP TABLE T1 PURGE;
DROP TABLE T2 PURGE;
CREATE TABLE T1(ID1 INT, STATUS1 CHAR(5));
CREATE TABLE T2(ID2 INT);
CREATE INDEX T1_IDX ON T1(ID1);
CREATE INDEX T2_IDX ON T2(ID2);
INSERT INTO T1
SELECT -- column id1 is uniform
MOD(R, 1000),
-- column status1 is skewed!!!
CASE WHEN R BETWEEN 1 AND 9000 THEN '-1'
ELSE TO_CHAR(MOD(R, 1000)) END
FROM (SELECT LEVEL AS R
FROM DUAL
CONNECT BY LEVEL <= 10000)
ORDER BY DBMS_RANDOM.RANDOM
;
INSERT INTO T2
SELECT MOD(R, 100)
FROM (SELECT LEVEL AS R
FROM DUAL
CONNECT BY LEVEL <= 10000)
;
BEGIN
DBMS_STATS.GATHER_TABLE_STATS(USER,
'T1',
CASCADE => TRUE,
ESTIMATE_PERCENT => 100,
METHOD_OPT => 'FOR ALL COLUMNS SIZE 1',
NO_INVALIDATE => FALSE
);
END;
/
BEGIN
DBMS_STATS.GATHER_TABLE_STATS(USER,
'T2',
CASCADE => TRUE,
ESTIMATE_PERCENT => 100,
METHOD_OPT => 'FOR ALL COLUMNS SIZE 1',
NO_INVALIDATE => FALSE
);
END;
/
I.3. 통계정보 확인
-- T1 테이블의 컬럼 통계정보
SELECT S.TABLE_NAME,
S.COLUMN_NAME,
S.NUM_DISTINCT,
S.NUM_NULLS,
S.DENSITY,
S.LOW_VALUE,
S.HIGH_VALUE,
S.HISTOGRAM
FROM USER_TAB_COLS S
WHERE S.TABLE_NAME = UPPER('T1')
;
TABLE_NAME | COLUMN_NAME | NUM_DISTINCT | NUM_NULLS | DENSITY | LOW_VALUE | HIGH_VALUE | HISTOGRAM |
---|
T1 | STATUS1 | 1001 | 0 | .000999001 | 2D31202020 | 3939392020 | NONE |
T1 | ID1 | 1001 | 0 | .001 | 80 | C20A64 | NONE |
I.4. 테스트
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1 T1,
T2 T2
WHERE T1.ID1 = T2.ID2
AND T1.ID1 BETWEEN 1
AND 100
AND T1.STATUS1 = '-1'
;
COUNT(*)
----------
89100
1 row selected.
Elapsed: 00:00:00.12
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(NULL, NULL, 'IOSTATS COST LAST'));
-----------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
-----------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.10 | 1221 |
| 2 | NESTED LOOPS | | 1 | 101 | 10 (0)| 89100 |00:00:00.18 | 1221 |
|* 3 | TABLE ACCESS FULL| T1 | 1 | 1 | 9 (0)| 900 |00:00:00.01 | 31 |
|* 4 | INDEX RANGE SCAN | T2_IDX | 900 | 100 | 1 (0)| 89100 |00:00:00.10 | 1190 |
-----------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - filter(("T1"."ID1"<=100 AND "T1"."STATUS1"='-1' AND "T1"."ID1">=1))
4 - access("T1"."ID1"="T2"."ID2")
filter(("T2"."ID2">=1 AND "T2"."ID2"<=100))
- 'T1' 테이블에 -1 값이 9000로우가 있음에도 불구하고 통계정보에서는 이를 모르기 때문에 E-Rows를 1로 계산을 했다.
만약 정확히 9000로우를 알고 있었다면 Hash Join을 사용했을 것이다.
이를 해결하기 위해서는 히스토그램 생성과 통계정보 조작 2가지 방법이 있다.
I.5. 히스토그램 생성
1) 히스토그램 생성 전
SELECT TABLE_NAME,
COLUMN_NAME,
ENDPOINT_NUMBER,
ENDPOINT_VALUE || '(' || ENDPOINT_ACTUAL_VALUE || ')' AS ENDPOINT_VALUE
FROM USER_TAB_HISTOGRAMS
WHERE TABLE_NAME = UPPER('T1')
ORDER BY COLUMN_NAME,
ENDPOINT_NUMBER
;
TABLE_NAME | COLUMN_NAME | ENDPOINT_NUMBER | ENDPOINT_VALUE |
---|
T1 | ID1 | 0 | 0() |
T1 | ID1 | 1 | 999() |
T1 | STATUS1 | 0 | 234649741948204000000000000000000000() |
T1 | STATUS1 | 1 | 297121544231514000000000000000000000() |
2) 히스토그램 생성
BEGIN
DBMS_STATS.GATHER_TABLE_STATS(USER,
'T1',
ESTIMATE_PERCENT => 100,
CASCADE => TRUE,
METHOD_OPT => 'FOR COLUMNS STATUS1 SIZE SKEWONLY',
NO_INVALIDATE => FALSE);
END;
/
3) 히스토그램 생성 후
SELECT TABLE_NAME,
COLUMN_NAME,
ENDPOINT_NUMBER,
ENDPOINT_VALUE || '(' || ENDPOINT_ACTUAL_VALUE || ')' AS ENDPOINT_VALUE
FROM USER_TAB_HISTOGRAMS
WHERE TABLE_NAME = UPPER('T1')
ORDER BY COLUMN_NAME,
ENDPOINT_NUMBER
;
TABLE_NAME | COLUMN_NAME | ENDPOINT_NUMBER | ENDPOINT_VALUE |
---|
T1 | ID1 | 0 | 0() |
T1 | ID1 | 1 | 999() |
T1 | STATUS1 | 228 | 234649741948204000000000000000000000() |
T1 | STATUS1 | 229 | 255400627676803000000000000000000000() |
T1 | STATUS1 | 230 | 255481361174405000000000000000000000() |
T1 | STATUS1 | 231 | 255562173900169000000000000000000000() |
T1 | STATUS1 | 232 | 260612890032291000000000000000000000() |
T1 | STATUS1 | 233 | 260693702758056000000000000000000000() |
T1 | STATUS1 | 234 | 260755025355842000000000000000000000() |
T1 | STATUS1 | 235 | 265825231615942000000000000000000000() |
T1 | STATUS1 | 236 | 265886554213728000000000000000000000() |
T1 | STATUS1 | 237 | 265967366939493000000000000000000000() |
T1 | STATUS1 | 238 | 271018083071615000000000000000000000() |
T1 | STATUS1 | 239 | 271098895797379000000000000000000000() |
T1 | STATUS1 | 240 | 276167834406879000000000000000000000() |
T1 | STATUS1 | 241 | 276230424655266000000000000000000000() |
T1 | STATUS1 | 242 | 276311158152868000000000000000000000() |
T1 | STATUS1 | 243 | 281361953513152000000000000000000000() |
T1 | STATUS1 | 244 | 281442687010754000000000000000000000() |
T1 | STATUS1 | 245 | 281504088836703000000000000000000000() |
T1 | STATUS1 | 246 | 286574215868640000000000000000000000() |
T1 | STATUS1 | 247 | 286653840171967000000000000000000000() |
T1 | STATUS1 | 248 | 286716351192191000000000000000000000() |
T1 | STATUS1 | 249 | 291786557452291000000000000000000000() |
T1 | STATUS1 | 250 | 291847880050077000000000000000000000() |
T1 | STATUS1 | 251 | 291928692775842000000000000000000000() |
T1 | STATUS1 | 252 | 296979408907964000000000000000000000() |
T1 | STATUS1 | 253 | 297060221633728000000000000000000000() |
T1 | STATUS1 | 254 | 297121544231514000000000000000000000() |
4) 플랜 다시 확인
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1 T1,
T2 T2
WHERE T1.ID1 = T2.ID2
AND T1.ID1 BETWEEN 1
AND 100
AND T1.STATUS1 = '-1'
;
COUNT(*)
----------
89100
1 row selected.
Elapsed: 00:00:00.07
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(NULL, NULL, 'IOSTATS COST LAST'));
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.04 | 62 |
|* 2 | HASH JOIN | | 1 | 14779 | 19 (6)| 89100 |00:00:00.01 | 62 |
|* 3 | TABLE ACCESS FULL| T1 | 1 | 908 | 9 (0)| 900 |00:00:00.01 | 31 |
|* 4 | TABLE ACCESS FULL| T2 | 1 | 9999 | 9 (0)| 9900 |00:00:00.01 | 31 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("T1"."ID1"="T2"."ID2")
3 - filter(("T1"."ID1"<=100 AND "T1"."STATUS1"='-1' AND "T1"."ID1">=1))
4 - filter(("T2"."ID2">=1 AND "T2"."ID2"<=100))
I.6. 통계정보 변경
1) Density 조작 전
SELECT S.TABLE_NAME,
S.COLUMN_NAME,
S.NUM_DISTINCT,
S.NUM_NULLS,
S.DENSITY,
S.LOW_VALUE,
S.HIGH_VALUE,
S.HISTOGRAM
FROM USER_TAB_COLS S
WHERE S.TABLE_NAME = UPPER('T1')
;
TABLE_NAME | COLUMN_NAME | NUM_DISTINCT | NUM_NULLS | DENSITY | LOW_VALUE | HIGH_VALUE | HISTOGRAM |
---|
T1 | STATUS1 | 1001 | 0 | .000999001 | 2D31202020 | 3939392020 | NONE |
T1 | ID1 | 1001 | 0 | .001 | 80 | C20A64 | NONE |
2) Density 조작
BEGIN
DBMS_STATS.SET_COLUMN_STATS(OWNNAME => USER,
TABNAME => 'T1',
COLNAME => 'STATUS1',
DENSITY => 1
);
END;
/
3) Density 조작 후
SELECT S.TABLE_NAME,
S.COLUMN_NAME,
S.NUM_DISTINCT,
S.NUM_NULLS,
S.DENSITY,
S.LOW_VALUE,
S.HIGH_VALUE,
S.HISTOGRAM
FROM USER_TAB_COLS S
WHERE S.TABLE_NAME = UPPER('T1')
;
TABLE_NAME | COLUMN_NAME | NUM_DISTINCT | NUM_NULLS | DENSITY | LOW_VALUE | HIGH_VALUE | HISTOGRAM |
---|
T1 | STATUS1 | 1001 | 0 | 1 | 2D31202020 | 3939392020 | NONE |
T1 | ID1 | 1001 | 0 | .001 | 80 | C20A64 | NONE |
4) 플랜 다시 확인
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1 T1,
T2 T2
WHERE T1.ID1 = T2.ID2
AND T1.ID1 BETWEEN 1
AND 100
AND T1.STATUS1 = '-1'
;
COUNT(*)
----------
89100
1 row selected.
Elapsed: 00:00:00.07
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY_CURSOR(NULL, NULL, 'IOSTATS COST LAST'));
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.04 | 62 |
|* 2 | HASH JOIN | | 1 | 15410 | 19 (6)| 89100 |00:00:00.01 | 62 |
|* 3 | TABLE ACCESS FULL| T1 | 1 | 1011 | 9 (0)| 900 |00:00:00.01 | 31 |
|* 4 | TABLE ACCESS FULL| T2 | 1 | 9999 | 9 (0)| 9900 |00:00:00.01 | 31 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("T1"."ID1"="T2"."ID2")
3 - filter(("T1"."ID1"<=100 AND "T1"."ID1">=1 AND "T1"."STATUS1"='-1'))
4 - filter(("T2"."ID2">=1 AND "T2"."ID2"<=100))
;
I.7. 통계정보 조작 시 주의할 점
- 변화가 전역적이다. 통계 정보를 참조하는 Query가 여러 개 존재할 수 있으며,
통계정보의 변경은 이들 Query 모두에 대해 영향을 미칠 수 있다. - 통계 정보에 대한 지식이 필수적이다.
- Oracle 10g 부터는 통계 정보를 변경해도 그 결과가 Query에 즉시 반영되지 않는다.
DBMS_STATS.GATHER_XXX_STTS Procedure의 NO_INVALIDATE Parameter 때문이다.
Oracle 9i까지는 FALSE, 즉 통계 정보 변경시 Query가 모두 Invalidate 된다.
하지만 Oracle 10g 부터는 AUTO로 변경되었다. 이 경우 Oracle은 통계 정보 변경 후
5시간(_OPTIMIZER_INVALIDATION_PERIOD Parameter)에 걸쳐 서서히 Query들이 Invalidation 되게끔 제어한다.
만일 즉시 반영을 원한다면 NO_INVALIDATE Parameter의 값을 FALSE로 변경해야 한다.
II. CARDINALITY Hint
II.1. 개념설명
- CARDINALITY Hint를 사용하여 Cardinality를 직접 제어할 수 있다.
II.2. 테스트 Object 생성
DROP TABLE T1 PURGE;
DROP TABLE T2 PURGE;
CREATE TABLE T1(C1 INT,
C2 VARCHAR2(10),
C3 VARCHAR2(10));
CREATE TABLE T2(C1 INT,
C2 INT);
CREATE INDEX T1_N1 ON T1(C1);
CREATE INDEX T1_N2 ON T1(C2);
CREATE INDEX T2_N1 ON T2(C1);
INSERT INTO T1
SELECT LEVEL,
'A',
'a'
FROM DUAL
CONNECT BY LEVEL <= 10000
;
INSERT INTO T1
SELECT LEVEL + 10000,
'B',
'b'
FROM DUAL
CONNECT BY LEVEL <= 1000
;
INSERT INTO T2
SELECT LEVEL,
LEVEL
FROM DUAL
CONNECT BY LEVEL <= 11000
;
COMMIT;
BEGIN
DBMS_STATS.GATHER_TABLE_STATS(USER,
'T1',
CASCADE => TRUE,
ESTIMATE_PERCENT => 100,
METHOD_OPT => 'FOR ALL COLUMNS SIZE 1',
NO_INVALIDATE => FALSE
);
END;
/
BEGIN
DBMS_STATS.GATHER_TABLE_STATS(USER,
'T2',
CASCADE => TRUE,
ESTIMATE_PERCENT => 100,
METHOD_OPT => 'FOR ALL COLUMNS SIZE 1',
NO_INVALIDATE => FALSE
);
END;
/
II.3. 테스트
EXPLAIN PLAN FOR
SELECT *
FROM T2,
(SELECT C1, C2
FROM T1
WHERE T1.C2 = 'A'
AND T1.C3 = 'b') V
WHERE T2.C1 = V.C1
;
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 2750 | 44000 | 19 (6)| 00:00:01 |
|* 1 | HASH JOIN | | 2750 | 44000 | 19 (6)| 00:00:01 |
|* 2 | TABLE ACCESS FULL| T1 | 2750 | 22000 | 9 (0)| 00:00:01 |
| 3 | TABLE ACCESS FULL| T2 | 11000 | 88000 | 9 (0)| 00:00:01 |
---------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("T2"."C1"="C1")
2 - filter("T1"."C2"='A' AND "T1"."C3"='b')
;
- T1.C2 = 'A' AND T1.C3 = 'b' 인 로우는 0건이지만 예상로우는 2750이 나왔다.
이렇게 나온 이유는 앞에서 살펴본것처럼 Correlated Columns에 대한 통계정보가 없기 때문
- 이를 해결하기 위해서 CARDINALITY Hint를 사용함.
II.4. CARDINALITY Hint 사용
EXPLAIN PLAN FOR
SELECT *
FROM T2,
(SELECT /*+ CARDINALITY(T1 1) */
C1, C2
FROM T1
WHERE T1.C2 = 'A'
AND T1.C3 = 'b') V
WHERE T2.C1 = V.C1
;
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);
-------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 16 | 11 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| T2 | 1 | 8 | 2 (0)| 00:00:01 |
| 2 | NESTED LOOPS | | 1 | 16 | 11 (0)| 00:00:01 |
|* 3 | TABLE ACCESS FULL | T1 | 1 | 8 | 9 (0)| 00:00:01 |
|* 4 | INDEX RANGE SCAN | T2_N1 | 1 | | 1 (0)| 00:00:01 |
-------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - filter("T1"."C2"='A' AND "T1"."C3"='b')
4 - access("T2"."C1"="C1")
III. OPT_ESTIMATE Hint
III.1. 개념설명
- OPT_ESTIMATE Hint는 CARDINALITY Hint의 10g버전
- OPT_ESTIMATE Hint는 (CARDINALITY * SCALE_ROWS(사용자 지정값))이 CARDINALITY가 되도록 하는 방법
- OPT_ESTIMATE Hint는 Undocumented Hint로 SQL Profile을 구현하기 위해 추가됨
III.2. 테스트 Object 생성
DROP TABLE T1 PURGE;
DROP TABLE T2 PURGE;
CREATE TABLE T1(C1 INT,
C2 VARCHAR2(10),
C3 VARCHAR2(10));
CREATE TABLE T2(C1 INT,
C2 INT);
CREATE INDEX T1_N1 ON T1(C1);
CREATE INDEX T1_N2 ON T1(C2);
CREATE INDEX T2_N1 ON T2(C1);
INSERT INTO T1
SELECT LEVEL,
'A',
'a'
FROM DUAL
CONNECT BY LEVEL <= 10000
;
INSERT INTO T1
SELECT LEVEL + 10000,
'B',
'b'
FROM DUAL
CONNECT BY LEVEL <= 1000
;
INSERT INTO T2
SELECT LEVEL,
LEVEL
FROM DUAL
CONNECT BY LEVEL <= 11000
;
COMMIT;
BEGIN
DBMS_STATS.GATHER_TABLE_STATS(USER,
'T1',
CASCADE => TRUE,
ESTIMATE_PERCENT => 100,
METHOD_OPT => 'FOR ALL COLUMNS SIZE 1',
NO_INVALIDATE => FALSE
);
END;
/
BEGIN
DBMS_STATS.GATHER_TABLE_STATS(USER,
'T2',
CASCADE => TRUE,
ESTIMATE_PERCENT => 100,
METHOD_OPT => 'FOR ALL COLUMNS SIZE 1',
NO_INVALIDATE => FALSE
);
END;
/
III.3. 테스트
EXPLAIN PLAN FOR
SELECT *
FROM T2,
(SELECT /*+ OPT_ESTIMATE(TABLE T1 SCALE_ROWS=0.000363636) */
C1, C2
FROM T1
WHERE T1.C2 = 'A'
AND T1.C3 = 'b') V
WHERE T2.C1 = V.C1
;
SELECT * FROM TABLE(DBMS_XPLAN.DISPLAY);
-------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 16 | 11 (0)| 00:00:01 |
| 1 | TABLE ACCESS BY INDEX ROWID| T2 | 1 | 8 | 2 (0)| 00:00:01 |
| 2 | NESTED LOOPS | | 1 | 16 | 11 (0)| 00:00:01 |
|* 3 | TABLE ACCESS FULL | T1 | 1 | 8 | 9 (0)| 00:00:01 |
|* 4 | INDEX RANGE SCAN | T2_N1 | 1 | | 1 (0)| 00:00:01 |
-------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - filter("T1"."C2"='A' AND "T1"."C3"='b')
4 - access("T2"."C1"="C1")
IV. Dynamic Sampling
IV.1. 개념설명
IV.2. 테스트 Object 생성