Optimizing Oracle Optimizer (2009년)
Join Cardinality 0 0 58,193

by 구루비스터디 Join Cardinality cardinality [2023.09.25]


  1. III. Join Cardinality
    1. III.1. 테스트 데이터 생성
    2. III.2. 통계정보 확인
    3. III.3. 테스트


III. Join Cardinality

III.1. 테스트 데이터 생성


-- create object
DROP TABLE T1 PURGE;

DROP TABLE T2 PURGE;

CREATE TABLE T1(C1 INT,
                C2 INT,
                C3 INT,
                C4 INT);

CREATE TABLE T2(C1 INT,
                C2 INT,
                C3 INT,
                C4 VARCHAR2(1));

-- create data
INSERT INTO T1
SELECT 1,
       LEVEL,
       MOD(LEVEL, 100) + 1,
       '1'
FROM   DUAL
CONNECT BY LEVEL <= 1000
;

INSERT INTO T2
SELECT 2,
       LEVEL,
       CASE WHEN LEVEL <= 99 THEN LEVEL
            ELSE 0
            END,
       '2'
FROM   DUAL
CONNECT BY LEVEL <= 1000
;

COMMIT;

-- gather stats "without" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T1', METHOD_OPT => 'FOR ALL COLUMNS SIZE 1', NO_INVALIDATE => FALSE);

-- gather stats "without" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T2', METHOD_OPT => 'FOR ALL COLUMNS SIZE 1', NO_INVALIDATE => FALSE);


III.2. 통계정보 확인


-- Table
SELECT TABLE_NAME,
       NUM_ROWS,
       BLOCKS,
       SAMPLE_SIZE,
       TO_CHAR(LAST_ANALYZED, 'YYYY/MM/DD HH24:MI:SS') AS LAST_ANAL
FROM   USER_TAB_STATISTICS
WHERE  TABLE_NAME IN ('T1', 'T2')
;


TABLE_NAMENUM_ROWSBLOCKSSAMPLE_SIZELAST_ANAL
T110001310002009-03-15 11:51:32 AM
T210001310002009-03-15 11:51:33 AM



-- Column
SELECT S.TABLE_NAME,
       S.COLUMN_NAME,
       S.NUM_DISTINCT,
       S.NUM_NULLS,
       S.DENSITY,
       S.LOW_VALUE,
       S.HIGH_VALUE,
       S.HISTOGRAM
FROM   USER_TAB_COLS S
WHERE  S.TABLE_NAME IN ('T1', 'T2')
;


TABLE_NAMECOLUMN_NAMENUM_DISTINCTNUM_NULLSDENSITYLOW_VALUEHIGH_VALUEHISTOGRAM
T1C4101C102C102NONE
T1C310000.01C102C202NONE
T1C2100000.001C102C20BNONE
T1C1101C102C102NONE
T2C41013232NONE
T2C310000.0180C164NONE
T2C2100000.001C102C20BNONE
T2C1101C103C103NONE



-- Histogram
SELECT TABLE_NAME,
       COLUMN_NAME,
       ENDPOINT_NUMBER,
       ENDPOINT_VALUE || '(' || ENDPOINT_ACTUAL_VALUE || ')' AS ENDPOINT_VALUE
FROM   USER_TAB_HISTOGRAMS
WHERE  TABLE_NAME IN ('T1', 'T2')
ORDER  BY COLUMN_NAME,
          ENDPOINT_NUMBER
;


TABLE_NAMECOLUMN_NAMEENDPOINT_NUMBERENDPOINT_VALUE
T1C101()
T1C111()
T1C201()
T1C211000()
T1C301()
T1C31100()
T1C401()
T1C411()
T2C102()
T2C112()
T2C201()
T2C211000()
T2C300()
T2C3199()
T2C40259614842926741000000000000000000000()
T2C41259614842926741000000000000000000000()


III.3. 테스트

1) Join Column 'C1' Cardinality without Histogram

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1,
       T2
WHERE  T1.C1 = T2.C1
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      30 |
|*  2 |   HASH JOIN         |      |      1 |      1 |    11  (10)|      0 |00:00:00.01 |      30 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("T1"."C1"="T2"."C1")
;


2) Join Column 'C2' Cardinality without Histogram
  • = Cardinality(T1.C2 = T2.C2)
  • = Cardinality(T1.C2) * Cardinality(T2.C2) * MIN(Selectivity(T1.C2), Selectivity(T2.C2))
  • = 1000 * 1000 * MIN(0.001, 0.001)
  • = 1000



SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1,
       T2
WHERE  T1.C2 = T2.C2
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.02 |      30 |
|*  2 |   HASH JOIN         |      |      1 |   1000 |    11  (10)|   1000 |00:00:00.02 |      30 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("T1"."C2"="T2"."C2")
;


3) Join Column 'C3' Cardinality without Histogram
  • = Cardinality(T1.C3 = T2.C3)
  • = Cardinality(T1.C3) * Cardinality(T2.C3) * MIN(Selectivity(T1.C3), Selectivity(T2.C3))
  • = 1000 * 1000 * MIN(0.01, 0.01)
  • = 10000



SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1,
       T2
WHERE  T1.C3 = T2.C3
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.02 |      30 |
|*  2 |   HASH JOIN         |      |      1 |  10000 |    11  (10)|    990 |00:00:00.02 |      30 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("T1"."C3"="T2"."C3")
;


  • T1.C3, T2.C3 컬럼의 데이터들은 Skew되어 있기 때문에 정확한 Cardinality를 구할 수 없으며 이를 해결하기 위해서는 Histogram을 생성해야 한다.


4) Create 'C3' Column Histogram

-- gather stats "with" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T1', METHOD_OPT => 'FOR COLUMNS C3 SIZE SKEWONLY', NO_INVALIDATE => FALSE);

-- gather stats "with" histogram
EXEC DBMS_STATS.GATHER_TABLE_STATS(USER, 'T2', METHOD_OPT => 'FOR COLUMNS C3 SIZE SKEWONLY', NO_INVALIDATE => FALSE);

5) onemore Test for Join Column 'C3' Cardinality with Histogram

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1,
       T2
WHERE  T1.C3 = T2.C3
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.02 |      30 |
|*  2 |   HASH JOIN         |      |      1 |    496 |    11  (10)|    990 |00:00:00.02 |      30 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("T1"."C3"="T2"."C3")
;


6) mismatch datatype 'C4' Column Cardinality without Histogram

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1,
       T2
WHERE  T1.C4 = T2.C4
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      30 |
|*  2 |   HASH JOIN         |      |      1 |   1000K|    20  (50)|      0 |00:00:00.01 |      30 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("T1"."C4"=TO_NUMBER("T2"."C4"))
;


7) Semi Join 'C1' Column Cardinality without Histogram

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1
WHERE  C1 IN (SELECT C1
              FROM   T2)
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |      30 |
|*  2 |   HASH JOIN SEMI    |      |      1 |   1000 |    11  (10)|      0 |00:00:00.01 |      30 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("C1"="C1")
;


8) Semi Join 'C3' Column Cardinality with Histogram

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1
WHERE  C3 IN (SELECT C3
              FROM   T2)
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.02 |      30 |
|*  2 |   HASH JOIN SEMI    |      |      1 |    990 |    11  (10)|    990 |00:00:00.02 |      30 |
|   3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|   4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("C3"="C3")
;


9) Filter Operation Cardinality

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   (SELECT /*+ NO_MERGE */
               T1.C1,
               T1.C2
        FROM   T1
        WHERE  C3 IN (SELECT /*+ NO_UNNEST */
                             C3
                      FROM   T2)
       ) V,
       T2
WHERE  V.C1 = T2.C1
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

-----------------------------------------------------------------------------------------------------
| Id  | Operation             | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
-----------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE       |      |      1 |      1 |            |      1 |00:00:00.02 |    1557 |
|*  2 |   HASH JOIN           |      |      1 |      1 |   514   (1)|      0 |00:00:00.02 |    1557 |
|   3 |    VIEW               |      |      1 |     10 |   508   (1)|    990 |00:00:00.02 |    1542 |
|*  4 |     FILTER            |      |      1 |        |            |    990 |00:00:00.02 |    1542 |
|   5 |      TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|*  6 |      TABLE ACCESS FULL| T2   |    190 |     10 |     5   (0)|    189 |00:00:00.01 |    1527 |
|   7 |    TABLE ACCESS FULL  | T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
-----------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("V"."C1"="T2"."C1")
   4 - filter( IS NOT NULL)
   6 - filter("C3"=:B1)
;


10) Not In Operation 'C1' Column Cardinality

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1
WHERE  C1 NOT IN (SELECT C1
                  FROM   T2
                  WHERE  C1 IS NOT NULL)
AND    C1 IS NOT NULL
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.02 |      30 |
|*  2 |   HASH JOIN ANTI    |      |      1 |      1 |    11  (10)|   1000 |00:00:00.02 |      30 |
|*  3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|*  4 |    TABLE ACCESS FULL| T2   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("C1"="C1")
   3 - filter("C1" IS NOT NULL)
   4 - filter("C1" IS NOT NULL)
;


11) Not In Operation 'C3' Column Cardinality

SELECT /*+ GATHER_PLAN_STATISTICS */
       COUNT(*)
FROM   T1
WHERE  C3 NOT IN (SELECT C3
                  FROM   T2
                  WHERE  C1 IS NOT NULL)
AND    C3 IS NOT NULL
;

select * from table(dbms_xplan.display_cursor(null, null, 'iostats cost last'));

---------------------------------------------------------------------------------------------------
| Id  | Operation           | Name | Starts | E-Rows | Cost (%CPU)| A-Rows |   A-Time   | Buffers |
---------------------------------------------------------------------------------------------------
|   1 |  SORT AGGREGATE     |      |      1 |      1 |            |      1 |00:00:00.01 |    1542 |
|*  2 |   FILTER            |      |      1 |        |            |     10 |00:00:00.01 |    1542 |
|*  3 |    TABLE ACCESS FULL| T1   |      1 |   1000 |     5   (0)|   1000 |00:00:00.01 |      15 |
|*  4 |    TABLE ACCESS FULL| T2   |    190 |     10 |     5   (0)|    189 |00:00:00.01 |    1527 |
---------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - filter( IS NULL)
   3 - filter("C3" IS NOT NULL)
   4 - filter(("C1" IS NOT NULL AND LNNVL("C3"<>:B1)))
;

"데이터베이스 스터디모임" 에서 2009년에 "OPTIMIZING ORACLE OPTIMIZER " 도서를 스터디하면서 정리한 내용 입니다.

- 강좌 URL : http://www.gurubee.net/lecture/4426

- 구루비 강좌는 개인의 학습용으로만 사용 할 수 있으며, 다른 웹 페이지에 게재할 경우에는 출처를 꼭 밝혀 주시면 고맙겠습니다.~^^

- 구루비 강좌는 서비스 제공을 위한 목적이나, 학원 홍보, 수익을 얻기 위한 용도로 사용 할 수 없습니다.

댓글등록
SQL문을 포맷에 맞게(깔끔하게) 등록하려면 code() 버튼을 클릭하여 작성 하시면 됩니다.
로그인 사용자만 댓글을 작성 할 수 있습니다. 로그인, 회원가입