Selaa lähdekoodia

Create tables in two distinct waves.

cartershanklin 11 vuotta sitten
vanhempi
commit
3267ec2381
1 muutettua tiedostoa jossa 7 lisäystä ja 11 poistoa
  1. 7 11
      tpcds-setup.sh

+ 7 - 11
tpcds-setup.sh

@@ -16,7 +16,7 @@ if [ $? -ne 0 ]; then
 fi
 
 # Tables in the TPC-DS schema.
-LIST="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site"
+DIMS="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site"
 FACTS="store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory"
 
 # Get the parameters.
@@ -38,18 +38,17 @@ if [ $SCALE -eq 1 ]; then
 fi
 
 BUCKETS=13
-RETURN_BUCKETS=1
-SPLIT=16
 
 set -x
 set -e
 
 hadoop dfs -mkdir -p ${DIR}
 hadoop dfs -ls ${DIR}/${SCALE} || (cd tpcds-gen; hadoop jar target/*.jar -d ${DIR}/${SCALE}/ -s ${SCALE})
+hadoop dfs -ls ${DIR}/${SCALE} || ( echo "No data available" )
 hadoop dfs -ls ${DIR}/${SCALE}
 
 # Generate the text/flat tables. These will be later be converted to ORCFile.
-hive -i settings/load-flat.sql -f ddl/text/alltables.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}
+# hive -i settings/load-flat.sql -f ddl/text/alltables.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}
 
 # Create the partitioned tables.
 for t in ${FACTS}
@@ -57,16 +56,13 @@ do
 	hive -i settings/load-partitioned.sql -f ddl/bin_partitioned/${t}.sql \
 	    -d DB=tpcds_bin_partitioned_orc_${SCALE} \
 	    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
-	    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE=orc \
-	    -d SPLIT=${SPLIT}
+	    -d FILE=orc
 done
 
 # Populate the smaller tables.
-for t in ${LIST}
+for t in ${DIMS}
 do
 	hive -i settings/load-partitioned.sql -f ddl/bin_partitioned/${t}.sql \
-	    -d DB=tpcds_bin_partitioned_orc_${SCALE} \
-	    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
-	    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE=orc \
-	    -d SPLIT=${SPLIT}
+	    -d DB=tpcds_bin_partitioned_orc_${SCALE} -d SOURCE=tpcds_text_${SCALE} \
+	    -d FILE=orc
 done