Parcourir la source

turn into larger files during ETL

Gopal V il y a 10 ans
Parent
commit
b0697a1690

+ 8 - 0
ddl-tpch/bin_flat/analyze.sql

@@ -0,0 +1,8 @@
+analyze table nation compute statistics for columns;
+analyze table region compute statistics for columns;
+analyze table supplier compute statistics for columns;
+analyze table part compute statistics for columns;
+analyze table partsupp compute statistics for columns;
+analyze table customer compute statistics for columns;
+analyze table orders compute statistics for columns;
+analyze table lineitem compute statistics for columns;

+ 3 - 1
ddl-tpch/bin_flat/customer.sql

@@ -5,4 +5,6 @@ drop table if exists customer;
 
 create table customer
 stored as ${FILE}
-as select * from ${SOURCE}.customer;
+as select * from ${SOURCE}.customer
+cluster by C_MKTSEGMENT
+;

+ 3 - 1
ddl-tpch/bin_flat/lineitem.sql

@@ -5,4 +5,6 @@ drop table if exists lineitem;
 
 create table lineitem
 stored as ${FILE}
-as select * from ${SOURCE}.lineitem;
+as select * from ${SOURCE}.lineitem 
+cluster by L_SHIPDATE
+;

+ 1 - 1
ddl-tpch/bin_flat/nation.sql

@@ -5,4 +5,4 @@ drop table if exists nation;
 
 create table nation
 stored as ${FILE}
-as select * from ${SOURCE}.nation;
+as select distinct * from ${SOURCE}.nation;

+ 3 - 1
ddl-tpch/bin_flat/orders.sql

@@ -5,4 +5,6 @@ drop table if exists orders;
 
 create table orders
 stored as ${FILE}
-as select * from ${SOURCE}.orders;
+as select * from ${SOURCE}.orders
+cluster by o_orderdate
+;

+ 3 - 1
ddl-tpch/bin_flat/part.sql

@@ -5,4 +5,6 @@ drop table if exists part;
 
 create table part
 stored as ${FILE}
-as select * from ${SOURCE}.part;
+as select * from ${SOURCE}.part
+cluster by p_brand
+;

+ 3 - 1
ddl-tpch/bin_flat/partsupp.sql

@@ -5,4 +5,6 @@ drop table if exists partsupp;
 
 create table partsupp
 stored as ${FILE}
-as select * from ${SOURCE}.partsupp;
+as select * from ${SOURCE}.partsupp
+cluster by PS_SUPPKEY
+;

+ 1 - 1
ddl-tpch/bin_flat/region.sql

@@ -5,4 +5,4 @@ drop table if exists region;
 
 create table region
 stored as ${FILE}
-as select * from ${SOURCE}.region;
+as select distinct * from ${SOURCE}.region;

+ 3 - 1
ddl-tpch/bin_flat/supplier.sql

@@ -5,4 +5,6 @@ drop table if exists supplier;
 
 create table supplier
 stored as ${FILE}
-as select * from ${SOURCE}.supplier;
+as select * from ${SOURCE}.supplier
+cluster by s_nationkey, s_suppkey
+;