Join Cardinality
- Skewed Data를 잘 해석하지 못하는 것처럼 Join시에도 Join Column들의 Data 편차가 있을 경우 잘 해석하지 못함.
-- create object
DROP TABLE T1 PURGE;
DROP TABLE T2 PURGE;
CREATE TABLE T1(C1 INT,
C2 INT,
C3 INT,
C4 INT);
CREATE TABLE T2(C1 INT,
C2 INT,
C3 INT,
C4 VARCHAR2(1));
-- create data
INSERT INTO T1
SELECT 1,
LEVEL,
MOD(LEVEL, 100) + 1,
'1'
FROM DUAL
CONNECT BY LEVEL <= 1000
;
INSERT INTO T2
SELECT 2,
LEVEL,
CASE WHEN LEVEL <= 99 THEN LEVEL
ELSE 0
END,
'2'
FROM DUAL
CONNECT BY LEVEL <= 1000
;
COMMIT;
-- gather stats "without" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T1', METHOD_OPT => 'FOR ALL COLUMNS SIZE 1', NO_INVALIDATE => FALSE);
-- gather stats "without" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T2', METHOD_OPT => 'FOR ALL COLUMNS SIZE 1', NO_INVALIDATE => FALSE);
-- Table
SELECT TABLE_NAME,
NUM_ROWS,
BLOCKS,
SAMPLE_SIZE,
TO_CHAR(LAST_ANALYZED, 'YYYY/MM/DD HH24:MI:SS') AS LAST_ANAL
FROM USER_TAB_STATISTICS
WHERE TABLE_NAME IN ('T1', 'T2')
;
TABLE_NAME NUM_ROWS BLOCKS SAMPLE_SIZE LAST_ANAL
T1 1000 42 1000 2011/05/12 12:15:40
T2 1000 42 1000 2011/05/12 12:15:43
-- Column
SELECT S.TABLE_NAME,
S.COLUMN_NAME,
S.NUM_DISTINCT,
S.NUM_NULLS,
S.DENSITY,
S.LOW_VALUE,
S.HIGH_VALUE,
S.HISTOGRAM
FROM USER_TAB_COLS S
WHERE S.TABLE_NAME IN ('T1', 'T2')
;
TABLE_NAME COLUMN_NAME NUM_DISTINCT NUM_NULLS DENSITY LOW_VALUE HIGH_VALUE HISTOGRAM
T2 C4 1 0 1 32 32 NONE
T2 C3 100 0 0.01 80 C164 NONE
T2 C2 1000 0 0.001 C102 C20B NONE
T2 C1 1 0 1 C103 C103 NONE
T1 C4 1 0 1 C102 C102 NONE
T1 C3 100 0 0.01 C102 C202 NONE
T1 C2 1000 0 0.001 C102 C20B NONE
T1 C1 1 0 1 C102 C102 NONE
-- Histogram
SELECT TABLE_NAME,
COLUMN_NAME,
ENDPOINT_NUMBER,
ENDPOINT_VALUE || '(' || ENDPOINT_ACTUAL_VALUE || ')' AS ENDPOINT_VALUE
FROM USER_TAB_HISTOGRAMS
WHERE TABLE_NAME IN ('T1', 'T2')
ORDER BY COLUMN_NAME,
ENDPOINT_NUMBER
;
TABLE_NAME COLUMN_NAME ENDPOINT_NUMBER ENDPOINT_VALUE
T1 C1 0 1()
T2 C1 0 2()
T2 C1 1 2()
T1 C1 1 1()
T2 C2 0 1()
T1 C2 0 1()
T2 C2 1 1000()
T1 C2 1 1000()
T1 C3 0 1()
T2 C3 0 0()
T1 C3 1 100()
T2 C3 1 99()
T2 C4 0 259614842926741000000000000000000000()
T1 C4 0 1()
T2 C4 1 259614842926741000000000000000000000()
T1 C4 1 1()
- 두 컬럼 사이에는 정확하게 일치하는 값이 없다. 정확하게 계산되었다.
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1,
T2
WHERE T1.C1 = T2.C1
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 446739472
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 74 |
|* 2 | HASH JOIN | | 1 | 1 | 23 (5)| 0 |00:00:00.01 | 74 |
| 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
| 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("T1"."C1"="T2"."C1")
- 다음과 같이 계산되었다.
= Cardinality(T1.C2 = T2.C2)
= Cardinality(T1.C2) * Cardinality(T2.C2) * MIN(Selectivity(T1.C2), Selectivity(T2.C2))
= 1000 * 1000 * MIN(0.001, 0.001)
= 1000
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1,
T2
WHERE T1.C2 = T2.C2
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 446739472
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.06 | 74 |
|* 2 | HASH JOIN | | 1 | 1000 | 23 (5)| 1000 |00:00:00.04 | 74 |
| 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.02 | 37 |
| 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.02 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("T1"."C2"="T2"."C2")
- 다음과 같이 계산되었다.
= Cardinality(T1.C3 = T2.C3)
= Cardinality(T1.C3) * Cardinality(T2.C3) * MIN(Selectivity(T1.C3), Selectivity(T2.C3))
= 1000 * 1000 * MIN(0.01, 0.01)
= 10000
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1,
T2
WHERE T1.C3 = T2.C3
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 446739472
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 74 |
|* 2 | HASH JOIN | | 1 | 10000 | 23 (5)| 990 |00:00:00.01 | 74 |
| 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
| 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("T1"."C3"="T2"."C3")
-- gather stats "with" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T1', METHOD_OPT => 'FOR COLUMNS C3 SIZE SKEWONLY', NO_INVALIDATE => FALSE);
-- gather stats "with" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T2', METHOD_OPT => 'FOR COLUMNS C3 SIZE SKEWONLY', NO_INVALIDATE => FALSE);
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1,
T2
WHERE T1.C3 = T2.C3
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 446739472
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 74 |
|* 2 | HASH JOIN | | 1 | 496 | 23 (5)| 990 |00:00:00.01 | 74 |
| 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
| 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("T1"."C3"="T2"."C3")
- 데이터형이 다를 경우 Predict 변형이 발생하고 Cardinality에 심각한 오류가 발생한다.
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1,
T2
WHERE T1.C4 = T2.C4
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 446739472
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 74 |
|* 2 | HASH JOIN | | 1 | 1000K| 27 (19)| 0 |00:00:00.01 | 74 |
| 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
| 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("T1"."C4"=TO_NUMBER("T2"."C4"))
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1
WHERE C1 IN (SELECT C1
FROM T2)
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 3595184350
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 74 |
|* 2 | HASH JOIN SEMI | | 1 | 1000 | 23 (5)| 0 |00:00:00.01 | 74 |
| 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
| 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("C1"="C1")
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1
WHERE C3 IN (SELECT C3
FROM T2)
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 3595184350
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 74 |
|* 2 | HASH JOIN SEMI | | 1 | 990 | 23 (5)| 990 |00:00:00.01 | 74 |
| 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
| 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("C3"="C3")
- 아래의 사례들을 보면 오라클은 단순하고 비현실적인 공식들로는 복잡한 경우를 처리할 수 없음을 확인할 수 있다.
- Filter Operation에 의한 Cardinality는 Bind 변수에 대한 Cardinality 계산과 동일한 원리를 따른다.
- Basic Rules에서 소개한 공식들이 사용된다.
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1
WHERE C1 NOT IN (SELECT C1
FROM T2
WHERE C1 IS NOT NULL)
AND C1 IS NOT NULL
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 688549579
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 74 |
|* 2 | HASH JOIN ANTI | | 1 | 1 | 23 (5)| 1000 |00:00:00.01 | 74 |
|* 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
|* 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("C1"="C1")
3 - filter("C1" IS NOT NULL)
4 - filter("C1" IS NOT NULL)
SELECT /*+ GATHER_PLAN_STATISTICS */
COUNT(*)
FROM T1
WHERE C1 NOT IN (SELECT C1
FROM T2
WHERE C1 IS NOT NULL)
AND C1 IS NOT NULL
;
select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT
Plan hash value: 688549579
---------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | Cost (%CPU)| A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------------------
| 1 | SORT AGGREGATE | | 1 | 1 | | 1 |00:00:00.01 | 74 |
|* 2 | HASH JOIN ANTI | | 1 | 1 | 23 (5)| 1000 |00:00:00.01 | 74 |
|* 3 | TABLE ACCESS FULL| T1 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
|* 4 | TABLE ACCESS FULL| T2 | 1 | 1000 | 11 (0)| 1000 |00:00:00.01 | 37 |
---------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("C1"="C1")
3 - filter("C1" IS NOT NULL)
4 - filter("C1" IS NOT NULL)