Optimizing Oracle Optimizer (2011년)
Join Cardinality 0 0 2,476

by 구루비스터디 Cardinility [2018.07.14]


Join Cardinality

  • Skewed Data를 잘 해석하지 못하는 것처럼 Join시에도 Join Column들의 Data 편차가 있을 경우 잘 해석하지 못함.

-- create object
DROP TABLE T1 PURGE;

DROP TABLE T2 PURGE;

CREATE TABLE T1(C1 INT,
                C2 INT,
                C3 INT,
                C4 INT);

CREATE TABLE T2(C1 INT,
                C2 INT,
                C3 INT,
                C4 VARCHAR2(1));

-- create data
INSERT INTO T1
SELECT 1,
       LEVEL,
       MOD(LEVEL, 100) + 1,
       '1'
FROM   DUAL
CONNECT BY LEVEL <= 1000
;

INSERT INTO T2
SELECT 2,
       LEVEL,
       CASE WHEN LEVEL <= 99 THEN LEVEL
            ELSE 0
            END,
       '2'            
FROM   DUAL
CONNECT BY LEVEL <= 1000
;

COMMIT;

-- gather stats "without" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T1', METHOD_OPT => 'FOR ALL COLUMNS SIZE 1', NO_INVALIDATE => FALSE);

-- gather stats "without" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T2', METHOD_OPT => 'FOR ALL COLUMNS SIZE 1', NO_INVALIDATE => FALSE);


  • 통계정보를 확인한다.

-- Table
SELECT TABLE_NAME,
       NUM_ROWS,
       BLOCKS,
       SAMPLE_SIZE,
       TO_CHAR(LAST_ANALYZED, 'YYYY/MM/DD HH24:MI:SS') AS LAST_ANAL
FROM   USER_TAB_STATISTICS
WHERE  TABLE_NAME IN ('T1', 'T2')
;

TABLE_NAME	NUM_ROWS	BLOCKS	SAMPLE_SIZE	LAST_ANAL

T1	1000	42	1000	2011/05/12 12:15:40
T2	1000	42	1000	2011/05/12 12:15:43




-- Column
SELECT S.TABLE_NAME,
       S.COLUMN_NAME,
       S.NUM_DISTINCT,
       S.NUM_NULLS,
       S.DENSITY,
       S.LOW_VALUE,
       S.HIGH_VALUE,
       S.HISTOGRAM
FROM   USER_TAB_COLS S
WHERE  S.TABLE_NAME IN ('T1', 'T2')
;

TABLE_NAME	COLUMN_NAME	NUM_DISTINCT	NUM_NULLS	DENSITY	LOW_VALUE	HIGH_VALUE	HISTOGRAM

T2	C4	1	0	1	32	32	NONE
T2	C3	100	0	0.01	80	C164	NONE
T2	C2	1000	0	0.001	C102	C20B	NONE
T2	C1	1	0	1	C103	C103	NONE
T1	C4	1	0	1	C102	C102	NONE
T1	C3	100	0	0.01	C102	C202	NONE
T1	C2	1000	0	0.001	C102	C20B	NONE
T1	C1	1	0	1	C102	C102	NONE




-- Histogram
SELECT TABLE_NAME,
       COLUMN_NAME,
       ENDPOINT_NUMBER,
       ENDPOINT_VALUE || '(' || ENDPOINT_ACTUAL_VALUE || ')' AS ENDPOINT_VALUE
FROM   USER_TAB_HISTOGRAMS
WHERE  TABLE_NAME IN ('T1', 'T2')
ORDER  BY COLUMN_NAME,
          ENDPOINT_NUMBER
;

TABLE_NAME	COLUMN_NAME	ENDPOINT_NUMBER	ENDPOINT_VALUE

T1	C1	0	1()
T2	C1	0	2()
T2	C1	1	2()
T1	C1	1	1()
T2	C2	0	1()
T1	C2	0	1()
T2	C2	1	1000()
T1	C2	1	1000()
T1	C3	0	1()
T2	C3	0	0()
T1	C3	1	100()
T2	C3	1	99()
T2	C4	0	259614842926741000000000000000000000()
T1	C4	0	1()
T2	C4	1	259614842926741000000000000000000000()
T1	C4	1	1()



  • 두 컬럼 사이에는 정확하게 일치하는 값이 없다. 정확하게 계산되었다.

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1, 
       T2
WHERE  T1.C1 = T2.C1
;              

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

PLAN_TABLE_OUTPUT

Plan hash value: 446739472
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      74 |
|*  2 |   HASH JOIN         |      |      1 |      1 |    23   (5)|      0 |00:00:00.01 |      74 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("T1"."C1"="T2"."C1")
 



* 다음과 같이 계산되었다.
= Cardinality(T1.C2 = T2.C2) 
= Cardinality(T1.C2) * Cardinality(T2.C2) * MIN(Selectivity(T1.C2), Selectivity(T2.C2))
= 1000 * 1000 * MIN(0.001, 0.001)
= 1000



SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1, 
       T2
WHERE  T1.C2 = T2.C2
;              

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

PLAN_TABLE_OUTPUT

Plan hash value: 446739472
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.06 |      74 |
|*  2 |   HASH JOIN         |      |      1 |   1000 |    23   (5)|   1000 |00:00:00.04 |      74 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.02 |      37 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.02 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("T1"."C2"="T2"."C2")
 



* 다음과 같이 계산되었다.
= Cardinality(T1.C3 = T2.C3) 
= Cardinality(T1.C3) * Cardinality(T2.C3) * MIN(Selectivity(T1.C3), Selectivity(T2.C3))
= 1000 * 1000 * MIN(0.01, 0.01)
= 10000



SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1, 
       T2
WHERE  T1.C3 = T2.C3
;              

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

PLAN_TABLE_OUTPUT

Plan hash value: 446739472
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      74 |
|*  2 |   HASH JOIN         |      |      1 |  10000 |    23   (5)|    990 |00:00:00.01 |      74 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("T1"."C3"="T2"."C3")
 



  • Histogram을 생성한다.

-- gather stats "with" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T1', METHOD_OPT => 'FOR COLUMNS C3 SIZE SKEWONLY', NO_INVALIDATE => FALSE);

-- gather stats "with" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T2', METHOD_OPT => 'FOR COLUMNS C3 SIZE SKEWONLY', NO_INVALIDATE => FALSE);


  • 다음과 같이 계산된다.

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1, 
       T2
WHERE  T1.C3 = T2.C3
;              

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

PLAN_TABLE_OUTPUT

Plan hash value: 446739472
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      74 |
|*  2 |   HASH JOIN         |      |      1 |    496 |    23   (5)|    990 |00:00:00.01 |      74 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("T1"."C3"="T2"."C3")
 


  • 데이터형이 다를 경우 Predict 변형이 발생하고 Cardinality에 심각한 오류가 발생한다.

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1, 
       T2
WHERE  T1.C4 = T2.C4
;              

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

PLAN_TABLE_OUTPUT

Plan hash value: 446739472
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      74 |
|*  2 |   HASH JOIN         |      |      1 |   1000K|    27  (19)|      0 |00:00:00.01 |      74 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("T1"."C4"=TO_NUMBER("T2"."C4"))
 


  • Cardinality가 매우 부정확하다.

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1
WHERE  C1 IN (SELECT C1
              FROM   T2)
;              

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

PLAN_TABLE_OUTPUT

Plan hash value: 3595184350
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      74 |
|*  2 |   HASH JOIN SEMI    |      |      1 |   1000 |    23   (5)|      0 |00:00:00.01 |      74 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("C1"="C1")


  • Cardinality가 정확하다.

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1
WHERE  C3 IN (SELECT C3
              FROM   T2)
;              

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

PLAN_TABLE_OUTPUT

Plan hash value: 3595184350
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      74 |
|*  2 |   HASH JOIN SEMI    |      |      1 |    990 |    23   (5)|    990 |00:00:00.01 |      74 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("C3"="C3")


  • 아래의 사례들을 보면 오라클은 단순하고 비현실적인 공식들로는 복잡한 경우를 처리할 수 없음을 확인할 수 있다.
  • Filter Operation에 의한 Cardinality는 Bind 변수에 대한 Cardinality 계산과 동일한 원리를 따른다.
  • Basic Rules에서 소개한 공식들이 사용된다.

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1
WHERE  C1 NOT IN (SELECT C1 
                  FROM   T2
                  WHERE  C1 IS NOT NULL)
AND    C1 IS NOT NULL
;                         

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

PLAN_TABLE_OUTPUT

Plan hash value: 688549579
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      74 |
|*  2 |   HASH JOIN ANTI    |      |      1 |      1 |    23   (5)|   1000 |00:00:00.01 |      74 |
|*  3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
|*  4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("C1"="C1")
   3 - filter("C1" IS NOT NULL)
   4 - filter("C1" IS NOT NULL)



SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1
WHERE  C1 NOT IN (SELECT C1 
                  FROM   T2
                  WHERE  C1 IS NOT NULL)
AND    C1 IS NOT NULL
;                         

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));
PLAN_TABLE_OUTPUT

Plan hash value: 688549579
 
---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      74 |
|*  2 |   HASH JOIN ANTI    |      |      1 |      1 |    23   (5)|   1000 |00:00:00.01 |      74 |
|*  3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
|*  4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |    11   (0)|   1000 |00:00:00.01 |      37 |
---------------------------------------------------------------------------------------------------
 
Predicate Information (identified by operation id):
---------------------------------------------------
 
   2 - access("C1"="C1")
   3 - filter("C1" IS NOT NULL)
   4 - filter("C1" IS NOT NULL)

"데이터베이스 스터디모임" 에서 2009년에 "OPTIMIZING ORACLE OPTIMIZER " 도서를 스터디하면서 정리한 내용 입니다.

- 강좌 URL : http://www.gurubee.net/lecture/3947

- 구루비 강좌는 개인의 학습용으로만 사용 할 수 있으며, 다른 웹 페이지에 게재할 경우에는 출처를 꼭 밝혀 주시면 고맙겠습니다.~^^

- 구루비 강좌는 서비스 제공을 위한 목적이나, 학원 홍보, 수익을 얻기 위한 용도로 사용 할 수 없습니다.

댓글등록
SQL문을 포맷에 맞게(깔끔하게) 등록하려면 code() 버튼을 클릭하여 작성 하시면 됩니다.
로그인 사용자만 댓글을 작성 할 수 있습니다. 로그인, 회원가입