12 anos atrás · c6ad0228fb
--- a/README.md
+++ b/README.md
@@ -61,8 +61,8 @@ All of these steps should be carried out on your Hadoop cluster.
 
				   	```
			
 
				   	cd sample-queries
			
 
				   	hive
			
 
				-  	hive> use tpcds_bin_partitioned_orc_200
			
 
				-  	hive> source query12.sql
			
 
				+  	hive> use tpcds_bin_partitioned_orc_200;
			
 
				+  	hive> source query12.sql;
			
 
				   	```
			
 
				 
			
 
				   Note that the database is named based on the Data Scale chosen in step 3. At Data Scale 200, your database will be named tpcds_bin_partitioned_orc_200. At Data Scale 50 it would be named tpcds_bin_partitioned_orc_50. You can always ```show databases``` to get a list of available databases.
			
--- a/settings/load.sql
+++ b/settings/load.sql
@@ -1,14 +0,0 @@
 
				-set hive.enforce.bucketing=true;
			
 
				-set hive.enforce.sorting=true;
			
 
				-set hive.exec.dynamic.partition.mode=nonstrict;
			
 
				-set hive.exec.max.dynamic.partitions.pernode=1000000;
			
 
				-set hive.exec.max.dynamic.partitions=1000000;
			
 
				-set hive.exec.max.created.files=1000000;
			
 
				-
			
 
				-set mapred.min.split.size=240000000;
			
 
				-set mapred.max.split.size=240000000;
			
 
				-set mapred.min.split.size.per.node=240000000;
			
 
				-set mapred.min.split.size.per.rack=240000000;
			
 
				-set hive.exec.parallel=true;
			
 
				-set hive.stats.autogather=false;
			
 
				-set hive.optimize.tez=true;
			
--- a/tpcds-setup.sh
+++ b/tpcds-setup.sh
@@ -57,7 +57,7 @@ hadoop dfs -ls ${DIR}/${SCALE}
 
				 # Generate the text/flat tables. These will be later be converted to ORCFile.
			
 
				 for t in ${LIST}
			
 
				 do
			
 
				-	hive -i settings/load.sql -f ddl/text/${t}.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}/${t}
			
 
				+	hive -i settings/load-flat.sql -f ddl/text/${t}.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}/${t}
			
 
				 done
			
 
				 
			
 
				 # Generate the binary forms of the data.
			
@@ -67,7 +67,7 @@ if [ $MODE = "partitioned" ]; then
 
				 	do
			
 
				 		for t in ${LIST}
			
 
				 		do
			
 
				-			hive -i settings/load.sql -f ddl/bin_partitioned/${t}.sql \
			
 
				+			hive -i settings/load-partitioned.sql -f ddl/bin_partitioned/${t}.sql \
			
 
				 			    -d DB=tpcds_bin_partitioned_${FILE_FORMATS[$i]}_${SCALE} \
			
 
				 			    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
			
 
				 			    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE="${file}" \
			
@@ -81,7 +81,7 @@ else
 
				 	do
			
 
				 		for t in ${LIST}
			
 
				 		do
			
 
				-			hive -i settings/load.sql -f ddl/bin_flat/${t}.sql \
			
 
				+			hive -i settings/load-flat.sql -f ddl/bin_flat/${t}.sql \
			
 
				 			    -d DB=tpcds_bin_flat_${FILE_FORMATS[$i]}_${SCALE} \
			
 
				 			    -d SOURCE=tpcds_text_${SCALE} -d FILE="${file}" \
			
 
				 			    -d SERDE=${SERDES[$i]}