SQL> SELECT CASE WHEN AGE <= 19 THEN '10'
2 WHEN AGE >=20 AND AGE < 30 THEN '20'
3 WHEN AGE >=30 AND AGE < 40 THEN '30'
4 WHEN AGE >=40 AND AGE < 50 THEN '40'
5 WHEN AGE >=50 AND AGE < 60 THEN '50'
6 WHEN AGE >=60 THEN '60'
7 END AGE_GRP, COUNT(*)
8 FROM MEMBER
9 GROUP BY CASE WHEN AGE <= 19 THEN '10'
10 WHEN AGE >=20 AND AGE < 30 THEN '20'
11 WHEN AGE >=30 AND AGE < 40 THEN '30'
12 WHEN AGE >=40 AND AGE < 50 THEN '40'
13 WHEN AGE >=50 AND AGE < 60 THEN '50'
14 WHEN AGE >=60 THEN '60'
15 END
16 ORDER BY AGE_GRP;
AG COUNT(*)
--
SQL> SELECT COUNT(*), COUNT( DISTINCT AGE ) FROM MEMBER; --버킷 개수가 79개로 도수분포 히스토그램 사용 가능
COUNT(*) COUNT(DISTINCTAGE)
---------- ------------------
2130 79
SQL> begin
2 dbms_stats.gather_table_stats( user, 'MEMBER', method_opt=> 'for all columns size 100' ); --히스토그램 생성
3 end;
4 /
PL/SQL 처리가 정상적으로 완료되었습니다.
SQL> SELECT NUM_DISTINCT, NUM_BUCKETS, HISTOGRAM
2 FROM USER_TAB_COL_STATISTICS
3 WHERE TABLE_NAME = 'MEMBER'
4 AND COLUMN_NAME = 'AGE'
5 ;
NUM_DISTINCT NUM_BUCKETS HISTOGRAM
------------ ----------- ---------------
79 79 FREQUENCY
h3.(3) 높이균형 히스토그램
begin
dbms_stats.gather_table_stats( user, 'MEMBER', method_opt=> 'for all columns size 20' ); --히스토그램 생성
end;
/
SQL> SELECT NUM_DISTINCT, NUM_BUCKETS, HISTOGRAM
2 FROM USER_TAB_COL_STATISTICS
3 WHERE TABLE_NAME = 'MEMBER'
4 AND COLUMN_NAME = 'AGE';
NUM_DISTINCT NUM_BUCKETS HISTOGRAM
– ENDPOINT_NUMBER = 1 버킷은 1~22 연령대 구간을, 20은 55~99연령대 구간을 대표한다.
SQL> SELECT ENDPOINT_NUMBER, ENDPOINT_VALUE
2 FROM USER_HISTOGRAMS
3 WHERE TABLE_NAME= 'MEMBER'
4 AND COLUMN_NAME = 'AGE';
ENDPOINT_NUMBER ENDPOINT_VALUE
ENDPOINT_NUMBER ENDPOINT_VALUE
13 개의 행이 선택되었습니다.
SQL> SELECT '~' || AGE "연령대", ENDPOINT_NUMBER, DIFF
2 , ROUND( 100 * diff / sum( diff) over() ) "RATION(%)"
3 , ROUND( T.NUM_ROWS * DIFF / SUM( DIFF ) OVER()) "인원수(명)"
4 FROM (SELECT TABLE_NAME
5 , ENDPOINT_VALUE AGE, ENDPOINT_NUMBER
6 , ENDPOINT_NUMBER - LAG( ENDPOINT_NUMBER ) OVER (ORDER BY ENDPOINT_VALUE) DIFF
7 , LAG(ENDPOINT_NUMBER) OVER( ORDER BY ENDPOINT_VALUE) PREV
8 FROM USER_HISTOGRAMS
9 WHERE TABLE_NAME = 'MEMBER'
10 AND COLUMN_NAME = 'AGE' ) H, USER_TABLES T
11 WHERE H.ENDPOINT_NUMBER > 0
12 AND T.TABLE_NAME = H.TABLE_NAME
13 ORDER BY 1;
연령대 ENDPOINT_NUMBER DIFF RATION(%) 인원수(명)
-- 오라클은 popular value( 40 )에 대한 카디널리티를 구할 때만 버킷에 의한 계산식을 사용하고
나머지는 미리 구해놓은 density 값을 이용한다.
12 개의 행이 선택되었습니다.
{CODE}
h3.Popular value에 대한 선택도/카디널리티 계산
SQL> SELECT COUNT(*) FROM MEMBER ;
COUNT(*)
----------
2130
SQL> SELECT COUNT(*) FROM MEMBER WHERE AGE = 40;
COUNT(*)
----------
1000
SQL> @XPLAN
---------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 5 |
| 1 | SORT AGGREGATE | | 1 | 1 | 1 |00:00:00.01 | 5 |
|* 2 | TABLE ACCESS FULL| MEMBER | 1 | 959 | 1000 |00:00:00.01 | 5 |
---------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("AGE"=40)
h3.non-popular value에 대한 선택도/카디널리티 계산
-카디널리티 = 총 로우 수 * 선택도 = 총 로우수 * density
SQL> SELECT 1/NUM_DISTINCT, TO_CHAR( density, 'FM999.999999999999999999999' ) AS density
2 , round( TO_CHAR( density, 'FM999.999999999999999999999' ) * 2130 ) AS CD
3 FROM USER_TAB_COL_STATISTICS
4 WHERE TABLE_NAME = 'MEMBER'
5 AND COLUMN_NAME = 'AGE'
6 ;
1/NUM_DISTINCT DENSITY CD
-------------- -------------------------- ----------
.012658228 .0234101956873987 50
SQL> SELECT COUNT(*) FROM MEMBER WHERE AGE = 39;
COUNT(*)
----------
25
---------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
---------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 5 |
| 1 | SORT AGGREGATE | | 1 | 1 | 1 |00:00:00.01 | 5 |
|* 2 | TABLE ACCESS FULL| MEMBER | 1 | 15 | 25 |00:00:00.01 | 5 |
---------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - filter("AGE"=39)
Density
h3.(4) 바인드 변수 사용 시 카디널리티 계산
-( 1~4 = 5%, 5~8 = 0.25% )
{CODE:SQL}
DROP TABLE T PURGE;
CREATE TABLE T AS
SELECT ROWNUM NO FROM DUAL CONNECT BY LEVEL <= 1000;
begin
dbms_stats.gather_table_stats( user, 'T', method_opt=> 'for all columns size 254' );
end;
/
SQL> SELECT COUNT(*) FROM T WHERE NO <= :NO;
COUNT(*)
SQL> @XPLAN
Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time |
0 | SELECT STATEMENT | 1 | 1 | 00:00:00.01 | ||
1 | SORT AGGREGATE | 1 | 1 | 1 | 00:00:00.01 | |
| TABLE ACCESS FULL | T | 1 | 50 | 0 | 00:00:00.01 |
SQL> VAR NO1 NUMBER;
SQL> VAR NO2 NUMBER;
SQL> SELECT COUNT(*) FROM T WHERE NO BETWEEN :NO1 AND :NO2;
COUNT(*)
SQL> @XPLAN
Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time |
0 | SELECT STATEMENT | 1 | 1 | 00:00:00.01 | ||
1 | SORT AGGREGATE | 1 | 1 | 1 | 00:00:00.01 | |
| FILTER | 1 | 0 | 00:00:00.01 | ||
| TABLE ACCESS FULL | T | 0 | 3 | 0 | 00:00:00.01 |
-- 상수일경우 거의 정확한 카디널리티 계산
SQL> SELECT COUNT(*) FROM T WHERE NO <= 100;
COUNT(*)
SQL> @XPLAN
Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
0 | SELECT STATEMENT | 1 | 1 | 00:00:00.01 | 3 | ||
1 | SORT AGGREGATE | 1 | 1 | 1 | 00:00:00.01 | 3 | |
| TABLE ACCESS FULL | T | 1 | 98 | 100 | 00:00:00.01 | 3 |
Predicate Information (identified by operation id):
2 - filter("NO"<=100)
SQL> SELECT COUNT(*) FROM T WHERE NO BETWEEN 500 AND 600;
COUNT(*)
SQL> @XPLAN
Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
0 | SELECT STATEMENT | 1 | 1 | 00:00:00.01 | 3 | ||
1 | SORT AGGREGATE | 1 | 1 | 1 | 00:00:00.01 | 3 | |
| TABLE ACCESS FULL | T | 1 | 98 | 101 | 00:00:00.01 | 3 |
Predicate Information (identified by operation id):
2 - filter(("NO">=500 AND "NO"<=600))
{CODE}
h3.(5) 결합 선택도
{CODE:SQL}
DROP TABLE T PURGE
CREATE TABLE T AS
SELECT EMPNO , ENAME , SAL , SAL * 0.1 SAL_BO FROM SCOTT.EMP;
begin
dbms_stats.gather_table_stats( user, 'T', method_opt=> 'for columns SAL size 254 SAL_BO SIZE 254' );
end;
/
-- 급여에 일괄적으로 0.1을 곱해 상여를 구했으므로 두 컬럼 간에 상관관계가 매우 높지만 옵티마이저는 모름
-- 히스토그램 버킷 개수를 최대치로 주더라도 단일 컬럼 분포만 저장하기 때문
SQL> SELECT * FROM T
2 WHERE SAL >= 2000
3 AND SAL_BO >= 200;
EMPNO ENAME SAL SAL_BO
6 개의 행이 선택되었습니다.
SQL> @XPLAN
Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
0 | SELECT STATEMENT | 1 | 6 | 00:00:00.01 | 3 | ||
| TABLE ACCESS FULL | T | 1 | 2 | 6 | 00:00:00.01 | 3 |
Predicate Information (identified by operation id):
1 - filter(("SAL">=2000 AND "SAL_BO">=200))
-- 옵티마이저는 카디널러티를 2로 예상했지만 실제 수행 결과는 6건
SQL> SELECT /*+ DYNAMIC_SAMPLING( 4 ) */ * FROM T
2 WHERE SAL >= 2000
3 AND SAL_BO >= 200;
EMPNO ENAME SAL SAL_BO
6 개의 행이 선택되었습니다.
SQL> @XPLAN
Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
0 | SELECT STATEMENT | 1 | 6 | 00:00:00.01 | 3 | ||
| TABLE ACCESS FULL | T | 1 | 6 | 6 | 00:00:00.01 | 3 |
Predicate Information (identified by operation id):
1 - filter(("SAL">=2000 AND "SAL_BO">=200))
--동적 샘플링 레벨을 4로 설정하고 다시 수행
{CODE}