h1.히스토그램
h3.(1) 히스토그램 유형
-- sample
create sequence seq;
create table member( mem_id number, age number( 2 ) );
exec dbms_random.seed(0);
insert into member
select seq.nextval, dbms_random.value( 1,19 ) from dual connect by level <= 50;
insert into member
select seq.nextval, dbms_random.value( 20,29 ) from dual connect by level <= 270;
insert into member
select seq.nextval, dbms_random.value( 30,39 ) from dual connect by level <= 330;
insert into member
select seq.nextval, dbms_random.value( 30,39 ) from dual connect by level <= 330;
insert into member
select seq.nextval, 40 from dual connect by level <= 1000; --> popular value
insert into member
select seq.nextval, dbms_random.value( 41,49 ) from dual connect by level <= 200;
insert into member
select seq.nextval, dbms_random.value( 50,59 ) from dual connect by level <= 100;
insert into member
select seq.nextval, dbms_random.value( 60,99 ) from dual connect by level <= 50;
SU COUNT(*)
-- ----------
10 50
20 270
30 660
40 1200
50 100
60 50
h3.(2) 도수분포 히스토그램
select count(*) , count(distinct age) from member;
COUNT(*) COUNT(DISTINCTAGE)
---------- ---------------------------------------
2330 83
begin
dbms_stats.gather_table_stats( user, 'MEMBER', method_opt=> 'for all columns size 100' );
end;
PL/SQL 프로시저가 성공적으로 완료되었습니다.
select num_distinct, num_buckets, histogram
from user_tab_col_statistics
where table_name = 'MEMBER'
and column_name = 'AGE' ;
NUM_DISTINCT NUM_BUCKETS HISTOGRAM
--------------------------------------- --------------------------------------- ---------------
83 83 FREQUENCY
h3.(3) 높이균형 히스토그램
begin
dbms_stats.gather_table_stats( user, 'MEMBER', method_opt=> 'for all columns size 20' );
end;
PL/SQL 프로시저가 성공적으로 완료되었습니다.
select num_distinct, num_buckets, histogram
from user_tab_col_statistics
where table_name = 'MEMBER'
and column_name = 'AGE' ;
NUM_DISTINCT NUM_BUCKETS HISTOGRAM
--------------------------------------- --------------------------------------- ---------------
83 20 HEIGHT BALANCED
-- endpoint_number : 버킷 변호
-- endpoint3alue : 버킷이 담당하는 가장 큰 값
select endpoint_number, endpoint_value
from user_histograms
where table_name = 'MEMBER'
and column_name = 'AGE'
order by 1;
ENDPOINT_NUMBER ENDPOINT_VALUE
--------------------------------------- ---------------------------------------
0 1 <- 최소값을 표현하는 용도
1 22
2 27
3 30
4 32
5 33
6 35
7 37
8 38
16 40 <- popular vlaue
17 41
18 46
19 53
20 99
==========================================================================================
* endpoint_number=l 벼킷은 1~21 연령대 구간을, endpoint_number= 20 버킷은 56~99 연령대 구간을 대표한다.
* popular vlaue ENDPOINT_NUMBER 8->16 으로 증가된것을 볼수있다. ENDPOINT_NUMBER 가 생략된 형태