|
@@ -25,7 +25,8 @@ fi
|
|
|
|
|
|
|
|
# Tables in the TPC-DS schema.
|
|
# Tables in the TPC-DS schema.
|
|
|
DIMS="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site"
|
|
DIMS="date_dim time_dim item customer customer_demographics household_demographics customer_address store promotion warehouse ship_mode reason income_band call_center web_page catalog_page web_site"
|
|
|
-FACTS="store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory"
|
|
|
|
|
|
|
+FACTS=""
|
|
|
|
|
+#FACTS="store_sales store_returns web_sales web_returns catalog_sales catalog_returns inventory"
|
|
|
|
|
|
|
|
# Get the parameters.
|
|
# Get the parameters.
|
|
|
SCALE=$1
|
|
SCALE=$1
|
|
@@ -65,11 +66,16 @@ if [ $? -ne 0 ]; then
|
|
|
echo "Data generation failed, exiting."
|
|
echo "Data generation failed, exiting."
|
|
|
exit 1
|
|
exit 1
|
|
|
fi
|
|
fi
|
|
|
|
|
+
|
|
|
|
|
+hadoop fs -chmod -R 777 /${DIR}/${SCALE}
|
|
|
|
|
+
|
|
|
echo "TPC-DS text data generation complete."
|
|
echo "TPC-DS text data generation complete."
|
|
|
|
|
|
|
|
|
|
+HIVE="beeline -n hive -u 'jdbc:hive2://localhost:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2?tez.queue.name=default' "
|
|
|
|
|
+
|
|
|
# Create the text/flat tables as external tables. These will be later be converted to ORCFile.
|
|
# Create the text/flat tables as external tables. These will be later be converted to ORCFile.
|
|
|
echo "Loading text data into external tables."
|
|
echo "Loading text data into external tables."
|
|
|
-runcommand "hive -i settings/load-flat.sql -f ddl-tpcds/text/alltables.sql -d DB=tpcds_text_${SCALE} -d LOCATION=${DIR}/${SCALE}"
|
|
|
|
|
|
|
+runcommand "$HIVE -i settings/load-flat.sql -f ddl-tpcds/text/alltables.sql --hivevar DB=tpcds_text_${SCALE} --hivevar LOCATION=${DIR}/${SCALE}"
|
|
|
|
|
|
|
|
# Create the partitioned and bucketed tables.
|
|
# Create the partitioned and bucketed tables.
|
|
|
if [ "X$FORMAT" = "X" ]; then
|
|
if [ "X$FORMAT" = "X" ]; then
|
|
@@ -93,26 +99,26 @@ REDUCERS=$((test ${SCALE} -gt ${MAX_REDUCERS} && echo ${MAX_REDUCERS}) || echo $
|
|
|
# Populate the smaller tables.
|
|
# Populate the smaller tables.
|
|
|
for t in ${DIMS}
|
|
for t in ${DIMS}
|
|
|
do
|
|
do
|
|
|
- COMMAND="hive -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
|
|
|
|
|
- -d DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} -d SOURCE=tpcds_text_${SCALE} \
|
|
|
|
|
- -d SCALE=${SCALE} \
|
|
|
|
|
- -d REDUCERS=${REDUCERS} \
|
|
|
|
|
- -d FILE=${FORMAT}"
|
|
|
|
|
|
|
+ COMMAND="$HIVE -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
|
|
|
|
|
+ --hivevar DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} --hivevar SOURCE=tpcds_text_${SCALE} \
|
|
|
|
|
+ --hivevar SCALE=${SCALE} \
|
|
|
|
|
+ --hivevar REDUCERS=${REDUCERS} \
|
|
|
|
|
+ --hivevar FILE=${FORMAT}"
|
|
|
echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE
|
|
echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE
|
|
|
i=`expr $i + 1`
|
|
i=`expr $i + 1`
|
|
|
done
|
|
done
|
|
|
|
|
|
|
|
for t in ${FACTS}
|
|
for t in ${FACTS}
|
|
|
do
|
|
do
|
|
|
- COMMAND="hive -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
|
|
|
|
|
- -d DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} \
|
|
|
|
|
- -d SCALE=${SCALE} \
|
|
|
|
|
- -d SOURCE=tpcds_text_${SCALE} -d BUCKETS=${BUCKETS} \
|
|
|
|
|
- -d RETURN_BUCKETS=${RETURN_BUCKETS} -d REDUCERS=${REDUCERS} -d FILE=${FORMAT}"
|
|
|
|
|
|
|
+ COMMAND="$HIVE -i settings/load-partitioned.sql -f ddl-tpcds/bin_partitioned/${t}.sql \
|
|
|
|
|
+ --hivevar DB=tpcds_bin_partitioned_${FORMAT}_${SCALE} \
|
|
|
|
|
+ --hivevar SCALE=${SCALE} \
|
|
|
|
|
+ --hivevar SOURCE=tpcds_text_${SCALE} --hivevar BUCKETS=${BUCKETS} \
|
|
|
|
|
+ --hivevar RETURN_BUCKETS=${RETURN_BUCKETS} --hivevar REDUCERS=${REDUCERS} --hivevar FILE=${FORMAT}"
|
|
|
echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE
|
|
echo -e "${t}:\n\t@$COMMAND $SILENCE && echo 'Optimizing table $t ($i/$total).'" >> $LOAD_FILE
|
|
|
i=`expr $i + 1`
|
|
i=`expr $i + 1`
|
|
|
done
|
|
done
|
|
|
|
|
|
|
|
-make -j 2 -f $LOAD_FILE
|
|
|
|
|
|
|
+make -j 1 -f $LOAD_FILE
|
|
|
|
|
|
|
|
echo "Data loaded into database ${DATABASE}."
|
|
echo "Data loaded into database ${DATABASE}."
|