11 vuotta sitten · 3267ec2381
--- a/tpcds-setup.sh
+++ b/tpcds-setup.sh
@@ -16,7 +16,7 @@ if [ $? -ne 0 ]; then
 
				 fi
			
 
				 
			
 
				 # Tables in the TPC-DS schema.
			
 
				-LIST="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site"
			
 
				+DIMS="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site"
			
 
				 FACTS="store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory"
			
 
				 
			
 
				 # Get the parameters.
			
@@ -38,18 +38,17 @@ if [ $SCALE -eq 1 ]; then
 
				 fi
			
 
				 
			
 
				 BUCKETS=13
			
 
				-RETURN_BUCKETS=1
			
 
				-SPLIT=16
			
 
				 
			
 
				 set -x
			
 
				 set -e
			
 
				 
			
 
				 hadoop dfs -mkdir -p ${DIR}
			
 
				 hadoop dfs -ls ${DIR}/${SCALE} || (cd tpcds-gen; hadoop jar target/*.jar -d ${DIR}/${SCALE}/ -s ${SCALE})
			
 
				+hadoop dfs -ls ${DIR}/${SCALE} || ( echo "No data available" )
			
 
				 hadoop dfs -ls ${DIR}/${SCALE}
			
 
				 
			
 
				 # Generate the text/flat tables. These will be later be converted to ORCFile.
			
 
				-hive -i settings/load-flat.sql -f ddl/text/alltables.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}
			
 
				+# hive -i settings/load-flat.sql -f ddl/text/alltables.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}
			
 
				 
			
 
				 # Create the partitioned tables.
			
 
				 for t in ${FACTS}
			
@@ -57,16 +56,13 @@ do
 
				 	hive -i settings/load-partitioned.sql -f ddl/bin_partitioned/${t}.sql \
			
 
				 	    -d DB=tpcds_bin_partitioned_orc_${SCALE} \
			
 
				 	    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
			
 
				-	    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE=orc \
			
 
				-	    -d SPLIT=${SPLIT}
			
 
				+	    -d FILE=orc
			
 
				 done
			
 
				 
			
 
				 # Populate the smaller tables.
			
 
				-for t in ${LIST}
			
 
				+for t in ${DIMS}
			
 
				 do
			
 
				 	hive -i settings/load-partitioned.sql -f ddl/bin_partitioned/${t}.sql \
			
 
				-	    -d DB=tpcds_bin_partitioned_orc_${SCALE} \
			
 
				-	    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
			
 
				-	    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE=orc \
			
 
				-	    -d SPLIT=${SPLIT}
			
 
				+	    -d DB=tpcds_bin_partitioned_orc_${SCALE} -d SOURCE=tpcds_text_${SCALE} \
			
 
				+	    -d FILE=orc
			
 
				 done