瀏覽代碼

Various query fixes.

cartershanklin 11 年之前
父節點
當前提交
9552263285

+ 28 - 0
sample-queries-tpch/init.settings

@@ -0,0 +1,28 @@
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.map.aggr=true;
+set hive.optimize.bucketmapjoin=false;
+set hive.optimize.bucketmapjoin.sortedmerge=false;
+set hive.mapred.reduce.tasks.speculative.execution=false;
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=false;
+set hive.auto.convert.sortmerge.join.noconditionaltask=false;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.optimize.reducededuplication.min.reducer=1;
+set hive.optimize.mapjoin.mapreduce=true;
+set mapred.reduce.parallel.copies=30;
+
+set mapreduce.map.output.compress=true;
+set mapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.DefaultCodec;
+set tez.runtime.intermediate-output.should-compress=true;
+set tez.runtime.intermediate-output.compress.codec=org.apache.hadoop.io.compress.SnappyCodec;
+set tez.runtime.intermdiate-input.is-compressed=true;
+set tez.runtime.intermediate-input.compress.codec=org.apache.hadoop.io.compress.SnappyCodec;
+
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.orc.splits.include.file.footer=true;
+
+set hive.root.logger=ERROR,console;
+set hive.execution.engine=tez;
+set hive.vectorized.execution.enabled=true;
+set hive.exec.local.cache=true;

+ 1 - 0
sample-queries-tpch/tpch_query10.sql

@@ -29,3 +29,4 @@ group by
 	c_comment
 order by
 	revenue desc;
+limit 20;

+ 24 - 17
sample-queries-tpch/tpch_query11.sql

@@ -3,28 +3,35 @@ drop view q11_sum_tmp_cached;
 
 create view q11_part_tmp_cached as
 select
-  ps_partkey, sum(ps_supplycost * ps_availqty) as part_value
+	ps_partkey,
+	sum(ps_supplycost * ps_availqty) as part_value
 from
-  nation n join supplier_partitioned_nationkey s
-  on
-    s.s_nationkey = n.n_nationkey and n.n_name = 'GERMANY'
-  join partsupp ps
-  on
-    ps.ps_suppkey = s.s_suppkey
+	partsupp,
+	supplier,
+	nation
+where
+	ps_suppkey = s_suppkey
+	and s_nationkey = n_nationkey
+	and n_name = 'GERMANY'
 group by ps_partkey;
 
 create view q11_sum_tmp_cached as
 select
-  sum(part_value) as total_value
+	sum(part_value) as total_value
 from
-  q11_part_tmp_cached;
+	q11_part_tmp_cached;
 
 select
-  ps_partkey, part_value as value
-from
-  (
-    select ps_partkey, part_value, total_value
-    from q11_part_tmp_cached join q11_sum_tmp_cached
-  ) a
-where part_value > total_value * 0.0001
-order by value desc;
+	ps_partkey, part_value as value
+from (
+	select
+		ps_partkey,
+		part_value,
+		total_value
+	from
+		q11_part_tmp_cached join q11_sum_tmp_cached
+) a
+where
+	part_value > total_value * 0.0001
+order by
+	value desc;

+ 19 - 12
sample-queries-tpch/tpch_query15.sql

@@ -3,25 +3,32 @@ drop view max_revenue_cached;
 
 create view revenue_cached as
 select
-  l_suppkey as supplier_no, sum(l_extendedprice * (1 - l_discount)) as total_revenue
+	l_suppkey as supplier_no,
+	sum(l_extendedprice * (1 - l_discount)) as total_revenue
 from
-  lineitem_partitioned_shipdate
+	lineitem
 where
-  l_shipdate >= '1996-01-01' and l_shipdate < '1996-04-01'
+	l_shipdate >= '1996-01-01'
+	and l_shipdate < '1996-04-01'
 group by l_suppkey;
 
 create view max_revenue_cached as
 select
-  max(total_revenue) as max_revenue
+	max(total_revenue) as max_revenue
 from
-  revenue_cached;
+	revenue_cached;
 
 select
-  s_suppkey, s_name, s_address, s_phone, total_revenue
-from supplier_partitioned_nationkey s join revenue_cached r
-  on
-    s.s_suppkey = r.supplier_no
-  join max_revenue_cached m
-  on
-    r.total_revenue = m.max_revenue 
+	s_suppkey,
+	s_name,
+	s_address,
+	s_phone,
+	total_revenue
+from
+	supplier,
+	revenue_cached,
+	max_revenue_cached
+where
+	s_suppkey = supplier_no
+	and total_revenue = max_revenue 
 order by s_suppkey;

+ 26 - 18
sample-queries-tpch/tpch_query17.sql

@@ -1,25 +1,33 @@
-drop view lineitem_tmp_cached;
+drop view q17_lineitem_tmp_cached;
 
-create view lineitem_tmp_cached as
+create view q17_lineitem_tmp_cached as
 select
-  l_partkey as t_partkey, 0.2 * avg(l_quantity) as t_avg_quantity
+	l_partkey as t_partkey,
+	0.2 * avg(l_quantity) as t_avg_quantity
 from
-  lineitem_partitioned_shipdate
+	lineitem
 group by l_partkey;
 
 select
-  sum(l_extendedprice) / 7.0 as avg_yearly
-from
-  (select l_quantity, l_extendedprice, t_avg_quantity from
-   lineitem_tmp_cached t join
-     (select
-        l_quantity, l_partkey, l_extendedprice
-      from
-        part p join lineitem_partitioned_shipdate l
-        on
-          p.p_partkey = l.l_partkey
-          and p.p_brand = 'Brand#23'
-          and p.p_container = 'MED BOX'
-      ) l1 on l1.l_partkey = t.t_partkey
-   ) a 
+	sum(l_extendedprice) / 7.0 as avg_yearly
+from (
+	select
+		l_quantity,
+		l_extendedprice,
+		t_avg_quantity
+	from
+		q17_lineitem_tmp_cached join
+		(select
+			l_quantity,
+			l_partkey,
+			l_extendedprice
+		from
+			part,
+			lineitem
+		where
+			p_partkey = l_partkey
+			and p_brand = 'Brand#23'
+			and p_container = 'MED BOX'
+		) l1 on l1.l_partkey = t_partkey
+) a 
 where l_quantity < t_avg_quantity;

+ 25 - 13
sample-queries-tpch/tpch_query18.sql

@@ -1,3 +1,18 @@
+drop view q18_tmp_cached;
+drop table q18_large_volume_customer_cached;
+
+create view q18_tmp_cached as
+select
+	l_orderkey,
+	sum(l_quantity) as t_sum_quantity
+from
+	lineitem
+where
+	l_orderkey is not null
+group by
+	l_orderkey;
+
+create table q18_large_volume_customer_cached as
 select
 	c_name,
 	c_custkey,
@@ -8,19 +23,15 @@ select
 from
 	customer,
 	orders,
-	lineitem
+	q18_tmp_cached t,
+	lineitem l
 where
-	o_orderkey in (
-		select
-			l_orderkey
-		from
-			lineitem
-		group by
-			l_orderkey having
-				sum(l_quantity) > 315
-	)
-	and c_custkey = o_custkey
-	and o_orderkey = l_orderkey
+	c_custkey = o_custkey
+	and o_orderkey = t.l_orderkey
+	and o_orderkey is not null
+	and t.t_sum_quantity > 300
+	and o_orderkey = l.l_orderkey
+	and l.l_orderkey is not null
 group by
 	c_name,
 	c_custkey,
@@ -29,4 +40,5 @@ group by
 	o_totalprice
 order by
 	o_totalprice desc,
-	o_orderdate;
+	o_orderdate 
+limit 100;

+ 46 - 29
sample-queries-tpch/tpch_query2.sql

@@ -1,35 +1,52 @@
-drop view q2_minimum_cost_supplier_tmp1_cached;
-drop view q2_minimum_cost_supplier_tmp2_cached;
-
-create view q2_minimum_cost_supplier_tmp1_cached as
-select
-  s.s_acctbal, s.s_name, n.n_name, p.p_partkey, ps.ps_supplycost, p.p_mfgr, s.s_address, s.s_phone, s.s_comment
-from
-  nation n join region r
-  on
-    n.n_regionkey = r.r_regionkey and r.r_name = 'EUROPE'
-  join supplier_partitioned_nationkey s
-  on
-s.s_nationkey = n.n_nationkey
-  join partsupp ps
-  on
-s.s_suppkey = ps.ps_suppkey
-  join part p
-  on
-    p.p_partkey = ps.ps_partkey and p.p_size = 15 and p.p_type like '%BRASS' ;
-
-create view q2_minimum_cost_supplier_tmp2_cached as
+drop view q2_min_ps_supplycost;
+create view q2_min_ps_supplycost as
 select
-  p_partkey, min(ps_supplycost) as ps_min_supplycost
+	p_partkey as min_p_partkey,
+	min(ps_supplycost) as min_ps_supplycost
 from
-  q2_minimum_cost_supplier_tmp1_cached
-group by p_partkey;
+	part,
+	partsupp,
+	supplier,
+	nation,
+	region
+where
+	p_partkey = ps_partkey
+	and s_suppkey = ps_suppkey
+	and s_nationkey = n_nationkey
+	and n_regionkey = r_regionkey
+	and r_name = 'EUROPE'
+group by
+	p_partkey;
 
 select
-  t1.s_acctbal, t1.s_name, t1.n_name, t1.p_partkey, t1.p_mfgr, t1.s_address, t1.s_phone, t1.s_comment
+	s_acctbal,
+	s_name,
+	n_name,
+	p_partkey,
+	p_mfgr,
+	s_address,
+	s_phone,
+	s_comment
 from
-  q2_minimum_cost_supplier_tmp1_cached t1 join q2_minimum_cost_supplier_tmp2_cached t2
-on
-  t1.p_partkey = t2.p_partkey and t1.ps_supplycost=t2.ps_min_supplycost
-order by s_acctbal desc, n_name, s_name, p_partkey 
+	part,
+	supplier,
+	partsupp,
+	nation,
+	region,
+	q2_min_ps_supplycost
+where
+	p_partkey = ps_partkey
+	and s_suppkey = ps_suppkey
+	and p_size = 37
+	and p_type like '%COPPER'
+	and s_nationkey = n_nationkey
+	and n_regionkey = r_regionkey
+	and r_name = 'EUROPE'
+	and ps_supplycost = min_ps_supplycost
+	and p_partkey = min_p_partkey
+order by
+	s_acctbal desc,
+	n_name,
+	s_name,
+	p_partkey;
 limit 100;

+ 28 - 24
sample-queries-tpch/tpch_query20.sql

@@ -6,48 +6,52 @@ drop view q20_tmp4_cached;
 create view q20_tmp1_cached as
 select distinct p_partkey
 from
-  part
+	part
 where
-  p_name like 'forest%';
+	p_name like 'forest%';
 
 create view q20_tmp2_cached as
 select
-  l_partkey, l_suppkey, 0.5 * sum(l_quantity) as sum_quantity
+	l_partkey,
+	l_suppkey,
+	0.5 * sum(l_quantity) as sum_quantity
 from
-  lineitem_partitioned_shipdate
+	lineitem
 where
-  l_shipdate >= '1994-01-01'
-  and l_shipdate < '1995-01-01'
+	l_shipdate >= '1994-01-01'
+	and l_shipdate < '1995-01-01'
 group by l_partkey, l_suppkey;
 
 create view q20_tmp3_cached as
 select
-  ps_suppkey, ps_availqty, sum_quantity
+	ps_suppkey,
+	ps_availqty,
+	sum_quantity
 from
-  partsupp ps join q20_tmp1_cached t1
-  on
-    ps.ps_partkey = t1.p_partkey
-  join q20_tmp2_cached t2
-  on
-    ps.ps_partkey = t2.l_partkey and ps.ps_suppkey = t2.l_suppkey;
+	partsupp, q20_tmp1_cached, q20_tmp2_cached
+where
+	ps_partkey = p_partkey
+	and ps_partkey = l_partkey
+	and ps_suppkey = l_suppkey;
 
 create view q20_tmp4_cached as
 select
-  ps_suppkey
+	ps_suppkey
 from
-  q20_tmp3_cached
+	q20_tmp3_cached
 where
-  ps_availqty > sum_quantity
+	ps_availqty > sum_quantity
 group by ps_suppkey;
 
 select
-  s_name, s_address
+	s_name,
+	s_address
 from
-  supplier_partitioned_nationkey s join nation n
-  on
-    s.s_nationkey = n.n_nationkey
-    and n.n_name = 'CANADA'
-  join q20_tmp4_cached t4
-  on
-    s.s_suppkey = t4.ps_suppkey
+	supplier,
+	nation,
+	q20_tmp4_cached
+where
+	s_nationkey = n_nationkey
+	and n_name = 'CANADA'
+	and s_suppkey = ps_suppkey
 order by s_name;

+ 85 - 30
sample-queries-tpch/tpch_query21.sql

@@ -1,39 +1,94 @@
+drop view q21_tmp1_cached;
+drop view q21_tmp2_cached;
+
+create view q21_tmp1_cached as
 select
-	s_name,
-	count(*) as numwait
+	l_orderkey,
+	count(distinct l_suppkey) as count_suppkey,
+	max(l_suppkey) as max_suppkey
 from
-	supplier,
-	lineitem l1,
-	orders,
-	nation
+	lineitem
 where
-	s_suppkey = l1.l_suppkey
-	and o_orderkey = l1.l_orderkey
-	and o_orderstatus = 'F'
-	and l1.l_receiptdate > l1.l_commitdate
-	and exists (
-		select
-			*
-		from
-			lineitem l2
-		where
-			l2.l_orderkey = l1.l_orderkey
-			and l2.l_suppkey <> l1.l_suppkey
-	)
-	and not exists (
+	l_orderkey is not null
+group by
+	l_orderkey;
+
+create view q21_tmp2_cached as
+select
+	l_orderkey,
+	count(distinct l_suppkey) count_suppkey,
+	max(l_suppkey) as max_suppkey
+from
+	lineitem
+where
+	l_receiptdate > l_commitdate
+	and l_orderkey is not null
+group by
+	l_orderkey;
+
+select
+	s_name,
+	count(1) as numwait
+from (
+	select s_name from (
 		select
-			*
+			s_name,
+			t2.l_orderkey,
+			l_suppkey,
+			count_suppkey,
+			max_suppkey
 		from
-			lineitem l3
-		where
-			l3.l_orderkey = l1.l_orderkey
-			and l3.l_suppkey <> l1.l_suppkey
-			and l3.l_receiptdate > l3.l_commitdate
-	)
-	and s_nationkey = n_nationkey
-	and n_name = 'JAPAN'
+			q21_tmp2_cached t2 right outer join (
+			select
+				s_name,
+				l_orderkey,
+				l_suppkey from (
+				select
+					s_name,
+					t1.l_orderkey,
+					l_suppkey,
+					count_suppkey,
+					max_suppkey
+				from
+					q21_tmp1_cached t1 join (
+						select
+							s_name,
+							l_orderkey,
+							l_suppkey
+						from
+							orders o join (
+							select
+								s_name,
+								l_orderkey,
+								l_suppkey
+							from
+								nation n join supplier s
+							on
+								s.s_nationkey = n.n_nationkey
+								and n.n_name = 'SAUDI ARABIA'
+								join lineitem l
+							on
+								s.s_suppkey = l.l_suppkey
+							where
+								l.l_receiptdate > l.l_commitdate
+								and l.l_orderkey is not null
+						) l1 on o.o_orderkey = l1.l_orderkey and o.o_orderstatus = 'F'
+					) l2 on l2.l_orderkey = t1.l_orderkey
+				) a
+			where
+				(count_suppkey > 1)
+				or ((count_suppkey=1)
+				and (l_suppkey <> max_suppkey))
+		) l3 on l3.l_orderkey = t2.l_orderkey
+	) b
+	where
+		(count_suppkey is null)
+		or ((count_suppkey=1)
+		and (l_suppkey = max_suppkey))
+) c
 group by
 	s_name
 order by
 	numwait desc,
-	s_name;
+	s_name 
+limit 100;

+ 42 - 32
sample-queries-tpch/tpch_query22.sql

@@ -4,51 +4,61 @@ drop view q22_orders_tmp_cached;
 
 create view if not exists q22_customer_tmp_cached as
 select
-  c_acctbal, c_custkey, substr(c_phone, 1, 2) as cntrycode
+	c_acctbal,
+	c_custkey,
+	substr(c_phone, 1, 2) as cntrycode
 from
-  customer_partitioned_mktsegment
+	customer
 where
-  substr(c_phone, 1, 2) = '13' or
-  substr(c_phone, 1, 2) = '31' or
-  substr(c_phone, 1, 2) = '23' or
-  substr(c_phone, 1, 2) = '29' or
-  substr(c_phone, 1, 2) = '30' or
-  substr(c_phone, 1, 2) = '18' or
-  substr(c_phone, 1, 2) = '17';
+	substr(c_phone, 1, 2) = '13' or
+	substr(c_phone, 1, 2) = '31' or
+	substr(c_phone, 1, 2) = '23' or
+	substr(c_phone, 1, 2) = '29' or
+	substr(c_phone, 1, 2) = '30' or
+	substr(c_phone, 1, 2) = '18' or
+	substr(c_phone, 1, 2) = '17';
  
 create view if not exists q22_customer_tmp1_cached as
 select
-  avg(c_acctbal) as avg_acctbal
+	avg(c_acctbal) as avg_acctbal
 from
-  q22_customer_tmp_cached
+	q22_customer_tmp_cached
 where
-  c_acctbal > 0.00;
+	c_acctbal > 0.00;
 
 create view if not exists q22_orders_tmp_cached as
 select
-  o_custkey
+	o_custkey
 from
-  orders_partitioned_orderdate
+	orders
 group by
-  o_custkey;
+	o_custkey;
 
 select
-  cntrycode, count(1) as numcust, sum(c_acctbal) as totacctbal
-from
-(
-  select cntrycode, c_acctbal, avg_acctbal from
-  q22_customer_tmp1_cached ct1 join
-  (
-    select cntrycode, c_acctbal from
-      q22_orders_tmp_cached ot
-      right outer join q22_customer_tmp_cached ct
-      on
-        ct.c_custkey = ot.o_custkey
-    where
-      o_custkey is null
-  ) ct2
+	cntrycode,
+	count(1) as numcust,
+	sum(c_acctbal) as totacctbal
+from (
+	select
+		cntrycode,
+		c_acctbal,
+		avg_acctbal
+	from
+		q22_customer_tmp1_cached ct1 join (
+			select
+				cntrycode,
+				c_acctbal
+			from
+				q22_orders_tmp_cached ot
+				right outer join q22_customer_tmp_cached ct
+				on ct.c_custkey = ot.o_custkey
+			where
+				o_custkey is null
+		) ct2
 ) a
 where
-  c_acctbal > avg_acctbal
-group by cntrycode
-order by cntrycode;
+	c_acctbal > avg_acctbal
+group by
+	cntrycode
+order by
+	cntrycode;

+ 1 - 0
sample-queries-tpch/tpch_query3.sql

@@ -20,3 +20,4 @@ group by
 order by
 	revenue desc,
 	o_orderdate;
+limit 10;

+ 1 - 1
sample-queries-tpch/tpch_query8.sql

@@ -7,7 +7,7 @@ select
 from
 	(
 		select
-			extract(year from o_orderdate) as o_year,
+			year(o_orderdate) as o_year,
 			l_extendedprice * (1 - l_discount) as volume,
 			n2.n_name as nation
 		from