소스 검색

Updated DDL and load script.

cartershanklin 12 년 전
부모
커밋
d9add68747

+ 0 - 31
ddl/bin_flat/add_partition_inventory.sql

@@ -1,31 +0,0 @@
-set hive.enforce.bucketing=true;
-set hive.exec.dynamic.partition.mode=nonstrict;
-set hive.exec.max.dynamic.partitions.pernode=1000000;
-set hive.exec.max.dynamic.partitions=1000000;
-set hive.exec.max.created.files=1000000;
-set hive.metstore.uris=;
-
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists inventory_part;
-
-dfs -mv ${LOCATION} ${TMP_DIR}/part;
-
-create external table inventory_part
-(
-    inv_item_sk			int,
-    inv_warehouse_sk		int,
-    inv_quantity_on_hand	int,
-    inv_date			string
-)
-row format serde '${SERDE}'
-stored as ${FILE}
-location '${TMP_DIR}';
-
-insert into table inventory partition(inv_date)
-select * from inventory_part;
-
-dfs -mv ${TMP_DIR}/part ${LOCATION};
-
-drop table inventory_part;

+ 0 - 50
ddl/bin_flat/add_partition_store_sales.sql

@@ -1,50 +0,0 @@
-set hive.enforce.bucketing=true;
-set hive.exec.dynamic.partition.mode=nonstrict;
-set hive.exec.max.dynamic.partitions.pernode=1000000;
-set hive.exec.max.dynamic.partitions=1000000;
-set hive.exec.max.created.files=1000000;
-set hive.metstore.uris=;
-
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists store_sales_part;
-
-dfs -mv ${LOCATION} ${TMP_DIR}/part;
-
-create external table store_sales_part
-(
-    ss_sold_time_sk           int,
-    ss_item_sk                int,
-    ss_customer_sk            int,
-    ss_cdemo_sk               int,
-    ss_hdemo_sk               int,
-    ss_addr_sk                int,
-    ss_store_sk               int,
-    ss_promo_sk               int,
-    ss_ticket_number          int,
-    ss_quantity               int,
-    ss_wholesale_cost         float,
-    ss_list_price             float,
-    ss_sales_price            float,
-    ss_ext_discount_amt       float,
-    ss_ext_sales_price        float,
-    ss_ext_wholesale_cost     float,
-    ss_ext_list_price         float,
-    ss_ext_tax                float,
-    ss_coupon_amt             float,
-    ss_net_paid               float,
-    ss_net_paid_inc_tax       float,
-    ss_net_profit             float,
-    ss_sold_date              string
-)
-row format serde '${SERDE}'
-stored as ${FILE}
-location '${TMP_DIR}';
-
-insert into table store_sales partition(ss_sold_date)
-select * from store_sales_part;
-
-dfs -mv ${TMP_DIR}/part ${LOCATION};
-
-drop table store_sales_part;

+ 0 - 32
ddl/bin_flat/analyze.sql

@@ -1,32 +0,0 @@
-use ${DB};
-ADD JAR file://${mysql_jar};
-
-ANALYZE TABLE date_dim COMPUTE STATISTICS;
-ANALYZE TABLE time_dim COMPUTE STATISTICS;
-ANALYZE TABLE item COMPUTE STATISTICS;
-ANALYZE TABLE customer COMPUTE STATISTICS;
-ANALYZE TABLE customer_demographics COMPUTE STATISTICS;
-ANALYZE TABLE household_demographics COMPUTE STATISTICS;
-ANALYZE TABLE customer_address COMPUTE STATISTICS;
-ANALYZE TABLE store COMPUTE STATISTICS;
-ANALYZE TABLE promotion COMPUTE STATISTICS;
-ANALYZE TABLE web_site COMPUTE STATISTICS;
-
-
-
-ANALYZE TABLE inventory COMPUTE STATISTICS;
-ANALYZE TABLE store_sales COMPUTE STATISTICS;
-ANALYZE TABLE store_returns COMPUTE STATISTICS;
-ANALYZE TABLE web_sales COMPUTE STATISTICS;
-ANALYZE TABLE web_returns COMPUTE STATISTICS;
-ANALYZE TABLE catalog_sales COMPUTE STATISTICS;
-ANALYZE TABLE catalog_returns COMPUTE STATISTICS;
-
-
-ANALYZE TABLE web_page COMPUTE STATISTICS;
-ANALYZE TABLE income_band COMPUTE STATISTICS;
-ANALYZE TABLE call_center COMPUTE STATISTICS;
-ANALYZE TABLE ship_mode COMPUTE STATISTICS;
-ANALYZE TABLE reason COMPUTE STATISTICS;
-ANALYZE TABLE catalog_page COMPUTE STATISTICS;
-ANALYZE TABLE warehouse COMPUTE STATISTICS;

+ 0 - 9
ddl/bin_flat/call_center.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists call_center;
-
-create table call_center
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.call_center;

+ 0 - 9
ddl/bin_flat/catalog_page.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists catalog_page;
-
-create table catalog_page
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.catalog_page;

+ 0 - 9
ddl/bin_flat/catalog_returns.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists catalog_returns;
-
-create table catalog_returns
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.catalog_returns;

+ 0 - 9
ddl/bin_flat/catalog_sales.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists catalog_sales;
-
-create table catalog_sales
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.catalog_sales;

+ 0 - 9
ddl/bin_flat/customer.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists customer;
-
-create table customer
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.customer;

+ 0 - 9
ddl/bin_flat/customer_address.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists customer_address;
-
-create table customer_address
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.customer_address;

+ 0 - 9
ddl/bin_flat/customer_demographics.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists customer_demographics;
-
-create table customer_demographics
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.customer_demographics;

+ 0 - 9
ddl/bin_flat/date_dim.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists date_dim;
-
-create table date_dim
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.date_dim;

+ 0 - 5
ddl/bin_flat/drop_tmp_tables.sql

@@ -1,5 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists store_sales_tmp;
-drop table if exists inventory_tmp;

+ 0 - 9
ddl/bin_flat/household_demographics.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists household_demographics;
-
-create table household_demographics
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.household_demographics;

+ 0 - 9
ddl/bin_flat/income_band.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists income_band;
-
-create table income_band
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.income_band;

+ 0 - 9
ddl/bin_flat/inventory.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists inventory;
-
-create table inventory
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.inventory;

+ 0 - 9
ddl/bin_flat/item.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists item;
-
-create table item
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.item;

+ 0 - 9
ddl/bin_flat/promotion.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists promotion;
-
-create table promotion
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.promotion;

+ 0 - 9
ddl/bin_flat/reason.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists reason;
-
-create table reason
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.reason;

+ 0 - 9
ddl/bin_flat/ship_mode.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists ship_mode;
-
-create table ship_mode
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.ship_mode;

+ 0 - 9
ddl/bin_flat/store.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists store;
-
-create table store
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.store;

+ 0 - 9
ddl/bin_flat/store_returns.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists store_returns;
-
-create table store_returns
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.store_returns;

+ 0 - 9
ddl/bin_flat/store_sales.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists store_sales;
-
-create table store_sales
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.store_sales;

+ 0 - 9
ddl/bin_flat/time_dim.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists time_dim;
-
-create table time_dim
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.time_dim;

+ 0 - 9
ddl/bin_flat/warehouse.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists warehouse;
-
-create table warehouse
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.warehouse;

+ 0 - 9
ddl/bin_flat/web_page.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists web_page;
-
-create table web_page
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.web_page;

+ 0 - 9
ddl/bin_flat/web_returns.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists web_returns;
-
-create table web_returns
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.web_returns;

+ 0 - 9
ddl/bin_flat/web_sales.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists web_sales;
-
-create table web_sales
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.web_sales;

+ 0 - 9
ddl/bin_flat/web_site.sql

@@ -1,9 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists web_site;
-
-create table web_site
-row format serde '${SERDE}'
-stored as ${FILE}
-as select * from ${SOURCE}.web_site;

+ 0 - 5
ddl/bin_partitioned/drop_tmp_tables.sql

@@ -1,5 +0,0 @@
-create database if not exists ${DB};
-use ${DB};
-
-drop table if exists store_sales_tmp;
-drop table if exists inventory_tmp;

+ 8 - 17
tpcds-setup.sh

@@ -51,16 +51,6 @@ hadoop dfs -ls ${DIR}/${SCALE}
 # Generate the text/flat tables. These will be later be converted to ORCFile.
 # hive -i settings/load-flat.sql -f ddl/text/alltables.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}
 
-# Populate the smaller tables.
-#for t in ${LIST}
-#do
-#	hive -i settings/load-partitioned.sql -f ddl/bin_partitioned/${t}.sql \
-#	    -d DB=tpcds_bin_partitioned_orc_${SCALE} \
-#	    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
-#	    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE="${file}" \
-#	    -d SERDE=org.apache.hadoop.hive.ql.io.orc.OrcSerde -d SPLIT=${SPLIT}
-#done
-
 # Create the partitioned tables.
 for t in ${FACTS}
 do
@@ -68,14 +58,15 @@ do
 	    -d DB=tpcds_bin_partitioned_orc_${SCALE} \
 	    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
 	    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE="${file}" \
-	    -d SERDE=org.apache.hadoop.hive.ql.io.orc.OrcSerde -d SPLIT=${SPLIT}
+	    -d SPLIT=${SPLIT}
 done
 
-# Populate the partitioned tables.
-for t in ${FACTS}
+# Populate the smaller tables.
+for t in ${LIST}
 do
-	hadoop jar tpcds-parts-1.0-SNAPSHOT.jar -t ${t}
-	    -i ${DIR}/${t}/
-	    -o /apps/hive/warehouse/tpcds_bin_partitioned_orc_${SCALE}.db/${t}
-	hive -e "msck repair table ${t}"
+	hive -i settings/load-partitioned.sql -f ddl/bin_partitioned/${t}.sql \
+	    -d DB=tpcds_bin_partitioned_orc_${SCALE} \
+	    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
+	    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE="${file}" \
+	    -d SPLIT=${SPLIT}
 done