DataBaser.Net: UnPivot 문제

1 기본 문제
2 문제2
3 해결

1 기본 문제 #

환경:
- CPU:도선1.6GHz P4 모바일
- RAM: 1GB
- DBMS: Microsoft SQL Server 2005 + SP2

다음과 같은 테이블에서 ikey별a, b, c 중 가장 작은 값을 찾는 것이 문제다. 단, 값은 0보다 커야 한다. ikey는 고유하지 않으며, 값에 따라서 1건이 될 수 있고, 1만 건이 될 수 도 있다.

ikey        a           b           c
----------- ----------- ----------- -----------
244         382         961         368
275         623         372         947
212         148         744         466
783         75          479         678
301         931         631         998
569         872         154         435
363         654         257         388
437         655         109         327
382         149         534         725
846         471         106         187

이런 컬럼, 컬럼, 컬럼는 다중값 속성을 표현한 테이블로써 설계상 그리 좋지 않은 구조이다. 비정규화된 테이블로 볼 수 있으며, 컬럼이 추가된다면 테이블의 구조를 변경해야 하는 단점이 있다. 기존의 업무는 그대로이고, 단지 컬럼이 더 필요해 졌을 뿐이라면 이 테이블 구조는 좋지 않은 구조이다.

--예제데이터생성스크립트
set nocount on
set statistics io on
 
if object_id('temp..#temp') is not null
        drop table #temp
 
create table #temp(
        ikey int
,       a int
,       b int
,       c int
)
go
 
declare
        @i int
,       @i_key int
,       @a int
,       @b int
,       @c int
set @i = 1
 
while(@i <= 100000)
begin
        set @i_key = replace(right(cast(rand() as varchar), 3) , '.', '')
        set @a = replace(right(cast(rand() as varchar), 3), '.', '')
        set @b =  replace(right(cast(rand() as varchar), 3), '.', '')
        set @c =  replace(right(cast(rand() as varchar), 3), '.', '')
 
        insert #temp values(@i_key, @a, @b, @c)
        set @i = @i + 1
end
 
create index idx on #temp(ikey)
go

예제 데이터 생성 스크립트를 실행하고, ikey의 카디널리티가 가장 많은 것을 보자.

select top 100 ikey, count(*) cnt from #temp group by ikey
order by 2 desc
 
ikey        cnt
----------- -----------
804         145
935         141
687         140
918         140
754         138
207         137
588         137

필자는 ikey = 804가 145건으로 가장 많았다. 쉽게 생각하면 다음과 같은 SQL을 작성할 것이다. 그러나 아래와 같은 SQL은 똑같은 데이터를 3번이나 읽어야 했다.

set statistics profile on

--SQL1: UNION ALL을이용
select ikey, min(val) as min_val
from(
        select ikey, min(a) as val
        from #temp
        where a > 0 and ikey = 804
        group by ikey
        union all
        select ikey, min(b)
        from #temp
        where b > 0 and ikey = 804
        group by ikey
        union all
        select ikey,min(c)
        from #temp
        where c > 0 and ikey = 804
        group by ikey
    ) a
group by ikey
--검색수3, 논리적읽기수444, 물리적읽기수0, 미리읽기수0, LOB 논리적읽기수0, LOB 물리적읽기수0, LOB 미리읽기수0.
 
--SQL2: Cross Join을이용
select
        ikey
,       min(
               case
                       when id = 1 and a > 0 then a
                       when id = 2 and b > 0 then b
                       when id = 3 and c > 0 then c
               end) min_val
from (
        select
               *
        from #temp
        where ikey = 804 ) a cross join (select 1 id union all select 2 union all select 3) b
group by ikey
--검색수1, 논리적읽기수148, 물리적읽기수0, 미리읽기수0, LOB 논리적읽기수0, LOB 물리적읽기수0, LOB 미리읽기수0.

SQL1의 논리적 읽기 수는 444이고, SQL2는 148이다. 즉, 148 * 3 = 444이므로 3배의 논리적 읽기 수 차이가 나는 것이다. 또한 두 쿼리를 동시에 실행시켜 실제 실행계획에서 비용의 비교를 해보면 필자의 경우는 SQL1이 81%이고, SQL2는 19%이다. 약 4배 차이다. 만일 단일 사용자 환경이 아닌 매우 많은 사용자가 위와 같이 쿼리를 DBMS에 날린다면 부하는 좀 될 것이다. 실제 1만명이 이 쿼리를 동시에 날린다고 생각해 보자. SQL1과SQL2가 어떤 차이를 부를까?

[edit]

2 문제2 #

다음과 같은 테이블이 있다. 원하는 결과를 얻을 수 있는 쿼리를 짜보라.

USE tempdb;

IF EXISTS(SELECT * FROM sysobjects 
   WHERE ID = (OBJECT_ID('#temp')) AND xtype = 'U')
DROP TABLE #temp
GO

CREATE TABLE #temp(
 [#] [tinyint] NULL,
 [1] [tinyint] NULL,
 [2] [tinyint] NULL,
 [3] [tinyint] NULL,
 [4] [tinyint] NULL,
 [5] [tinyint] NULL,
 [6] [tinyint] NULL,
 [7] [tinyint] NULL,
 [8] [tinyint] NULL,
 [9] [tinyint] NULL
)

INSERT INTO #temp VALUES(1, 8, 5, 2, 8, 6, 8, 9, 9, 1);
INSERT INTO #temp VALUES(2, 3, 8, 6, 4, 9, 3, 6, 9, 6);
INSERT INTO #temp VALUES(3, 2 , 8, 9, 3, 4, 3, 7, 4, 9);
INSERT INTO #temp VALUES(4, 1, 9, 4, 5, 7, 9, 9, 5, 6);
INSERT INTO #temp VALUES(5, 7, 6, 2, 9, 7, 9, 9, 6, 8);
INSERT INTO #temp VALUES(6, 5, 2, 9, 6, 5, 8, 6, 8, 9);
INSERT INTO #temp VALUES(7, 4, 4, 2, 5, 9, 5, 8, 6, 9);
INSERT INTO #temp VALUES(8, 5, 9, 9, 3, 7, 7, 6, 2, 6);
INSERT INTO #temp VALUES(9, 2, 3, 2, 7, 9, 6, 6, 2, 9);

/*
#    1    2    3    4    5    6    7    8    9
---- ---- ---- ---- ---- ---- ---- ---- ---- ----
1    8    5    2    8    6    8    9    9    1
2    3    8    6    4    9    3    6    9    6
3    2    8    9    3    4    3    7    4    9
4    1    9    4    5    7    9    9    5    6
5    7    6    2    9    7    9    9    6    8
6    5    2    9    6    5    8    6    8    9
7    4    4    2    5    9    5    8    6    9
8    5    9    9    3    7    7    6    2    6
9    2    3    2    7    9    6    6    2    9
*/

[edit]

3 해결 #

SQL Server 2005 이상의 버전이라면 Pivot/UnPivot을 사용해도 된다. 어쨌든 기본은 Cross Join을 이용해야 쉽다는 것..

with temp(no)
as
(
        select 1 no
        union all
        select no + 1 from temp
        where no + 1 <= 9
)--2005버전만 된다. temp라는 테이블은 0 ~ 9의 숫자가 있는 Dumy테이블이라 생각하면 된다. 
select 
	[#] [Row number]
,	b.no [Column number]
,	case
		when b.no=1 then [1]
		when b.no=2 then [2]
		when b.no=3 then [3]
		when b.no=4 then [4]
		when b.no=5 then [5]
		when b.no=6 then [6]
		when b.no=7 then [7]
		when b.no=8 then [8]
		when b.no=9 then [9]
	end	[Value]
from #temp a
	cross join temp b
/*
Row number Column number Value
---------- ------------- -----
1          1             8
2          1             3
3          1             2
4          1             1
5          1             7
6          1             5
7          1             4
...
*/

Contents

1 기본 문제 #

2 문제2 #

3 해결 #