Ver código fonte

Use split settings files.

cartershanklin 12 anos atrás
pai
commit
c6ad0228fb
3 arquivos alterados com 5 adições e 19 exclusões
  1. 2 2
      README.md
  2. 0 14
      settings/load.sql
  3. 3 3
      tpcds-setup.sh

+ 2 - 2
README.md

@@ -61,8 +61,8 @@ All of these steps should be carried out on your Hadoop cluster.
   	```
   	cd sample-queries
   	hive
-  	hive> use tpcds_bin_partitioned_orc_200
-  	hive> source query12.sql
+  	hive> use tpcds_bin_partitioned_orc_200;
+  	hive> source query12.sql;
   	```
 
   Note that the database is named based on the Data Scale chosen in step 3. At Data Scale 200, your database will be named tpcds_bin_partitioned_orc_200. At Data Scale 50 it would be named tpcds_bin_partitioned_orc_50. You can always ```show databases``` to get a list of available databases.

+ 0 - 14
settings/load.sql

@@ -1,14 +0,0 @@
-set hive.enforce.bucketing=true;
-set hive.enforce.sorting=true;
-set hive.exec.dynamic.partition.mode=nonstrict;
-set hive.exec.max.dynamic.partitions.pernode=1000000;
-set hive.exec.max.dynamic.partitions=1000000;
-set hive.exec.max.created.files=1000000;
-
-set mapred.min.split.size=240000000;
-set mapred.max.split.size=240000000;
-set mapred.min.split.size.per.node=240000000;
-set mapred.min.split.size.per.rack=240000000;
-set hive.exec.parallel=true;
-set hive.stats.autogather=false;
-set hive.optimize.tez=true;

+ 3 - 3
tpcds-setup.sh

@@ -57,7 +57,7 @@ hadoop dfs -ls ${DIR}/${SCALE}
 # Generate the text/flat tables. These will be later be converted to ORCFile.
 for t in ${LIST}
 do
-	hive -i settings/load.sql -f ddl/text/${t}.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}/${t}
+	hive -i settings/load-flat.sql -f ddl/text/${t}.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}/${t}
 done
 
 # Generate the binary forms of the data.
@@ -67,7 +67,7 @@ if [ $MODE = "partitioned" ]; then
 	do
 		for t in ${LIST}
 		do
-			hive -i settings/load.sql -f ddl/bin_partitioned/${t}.sql \
+			hive -i settings/load-partitioned.sql -f ddl/bin_partitioned/${t}.sql \
 			    -d DB=tpcds_bin_partitioned_${FILE_FORMATS[$i]}_${SCALE} \
 			    -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
 			    -d RETURN_BUCKETS=${RETURN_BUCKETS} -d FILE="${file}" \
@@ -81,7 +81,7 @@ else
 	do
 		for t in ${LIST}
 		do
-			hive -i settings/load.sql -f ddl/bin_flat/${t}.sql \
+			hive -i settings/load-flat.sql -f ddl/bin_flat/${t}.sql \
 			    -d DB=tpcds_bin_flat_${FILE_FORMATS[$i]}_${SCALE} \
 			    -d SOURCE=tpcds_text_${SCALE} -d FILE="${file}" \
 			    -d SERDE=${SERDES[$i]}