Explorar el Código

More improvements to build and setup scripts.

cartershanklin hace 11 años
padre
commit
0fd538c264
Se han modificado 4 ficheros con 39 adiciones y 15 borrados
  1. 14 5
      tpcds-build.sh
  2. 4 1
      tpcds-setup.sh
  3. 15 6
      tpch-build.sh
  4. 6 3
      tpch-setup.sh

+ 14 - 5
tpcds-build.sh

@@ -12,11 +12,20 @@ done
 # Check if Maven is installed and install it if not.
 which mvn > /dev/null 2>&1
 if [ $? -ne 0 ]; then
-	echo "Maven not found, automatically installing it."
-	curl -O http://www.us.apache.org/dist/maven/maven-3/3.0.5/binaries/apache-maven-3.0.5-bin.tar.gz 2> /dev/null
-	if [ $? -ne 0 ]; then
-		echo "Failed to download Maven, check Internet connectivity and try again."
-		exit 1
+	SKIP=0
+	if [ -e "apache-maven-3.0.5-bin.tar.gz" ]; then
+		SIZE=`du -b apache-maven-3.0.5-bin.tar.gz | cut -f 1`
+		if [ $SIZE -eq 5144659 ]; then
+			SKIP=1
+		fi
+	fi
+	if [ $SKIP -ne 1 ]; then
+		echo "Maven not found, automatically installing it."
+		curl -O http://www.us.apache.org/dist/maven/maven-3/3.0.5/binaries/apache-maven-3.0.5-bin.tar.gz 2> /dev/null
+		if [ $? -ne 0 ]; then
+			echo "Failed to download Maven, check Internet connectivity and try again."
+			exit 1
+		fi
 	fi
 	tar -zxf apache-maven-3.0.5-bin.tar.gz > /dev/null
 	CWD=$(pwd)

+ 4 - 1
tpcds-setup.sh

@@ -17,7 +17,7 @@ BOLD=`tput bold`
 NORMAL=`tput sgr0`
 
 if [ ! -f tpcds-gen/target/tpcds-gen-1.0-SNAPSHOT.jar ]; then
-	echo "Please build the data generator with ./build-tpcds.sh first"
+	echo "Please build the data generator with ./tpcds-build.sh first"
 	exit 1
 fi
 which hive > /dev/null 2>&1
@@ -72,6 +72,7 @@ runcommand "hive -i settings/load-flat.sql -f ddl-tpcds/text/alltables.sql -d DB
 # Create the partitioned and bucketed tables.
 i=1
 total=24
+DATABASE=tpcds_bin_partitioned_orc_${SCALE}
 for t in ${FACTS}
 do
 	echo "${BOLD}Optimizing table $t ($i/$total).${NORMAL}"
@@ -101,3 +102,5 @@ do
 	fi
 	i=`expr $i + 1`
 done
+
+echo "${BOLD}Data loaded into database ${DATABASE}.${NORMAL}"

+ 15 - 6
tpch-build.sh

@@ -12,11 +12,20 @@ done
 # Check if Maven is installed and install it if not.
 which mvn > /dev/null 2>&1
 if [ $? -ne 0 ]; then
-	echo "Maven not found, automatically installing it."
-	curl -O http://www.us.apache.org/dist/maven/maven-3/3.0.5/binaries/apache-maven-3.0.5-bin.tar.gz 2> /dev/null
-	if [ $? -ne 0 ]; then
-		echo "Failed to download Maven, check Internet connectivity and try again."
-		exit 1
+	SKIP=0
+	if [ -e "apache-maven-3.0.5-bin.tar.gz" ]; then
+		SIZE=`du -b apache-maven-3.0.5-bin.tar.gz | cut -f 1`
+		if [ $SIZE -eq 5144659 ]; then
+			SKIP=1
+		fi
+	fi
+	if [ $SKIP -ne 1 ]; then
+		echo "Maven not found, automatically installing it."
+		curl -O http://www.us.apache.org/dist/maven/maven-3/3.0.5/binaries/apache-maven-3.0.5-bin.tar.gz 2> /dev/null
+		if [ $? -ne 0 ]; then
+			echo "Failed to download Maven, check Internet connectivity and try again."
+			exit 1
+		fi
 	fi
 	tar -zxf apache-maven-3.0.5-bin.tar.gz > /dev/null
 	CWD=$(pwd)
@@ -24,6 +33,6 @@ if [ $? -ne 0 ]; then
 	export PATH=$PATH:$MAVEN_HOME/bin
 fi
 
-echo "Building TPC-H Data Generator"
+echo "Building TPC-DS Data Generator"
 (cd tpch-gen; make)
 echo "TPC-H Data Generator built, you can now use tpch-setup.sh to generate data."

+ 6 - 3
tpch-setup.sh

@@ -17,7 +17,7 @@ BOLD=`tput bold`
 NORMAL=`tput sgr0`
 
 if [ ! -f tpch-gen/target/tpch-gen-1.0-SNAPSHOT.jar ]; then
-	echo "Please build the data generator with ./build-tpch.sh first"
+	echo "Please build the data generator with ./tpch-build.sh first"
 	exit 1
 fi
 which hive > /dev/null 2>&1
@@ -67,14 +67,15 @@ echo "${BOLD}TPC-H text data generation complete.${NORMAL}"
 echo "${BOLD}Loading text data into external tables.${NORMAL}"
 runcommand "hive -i settings/load-flat.sql -f ddl-tpch/text/alltables.sql -d DB=tpch_text_${SCALE} -d LOCATION=${DIR}/${SCALE}"
 
-# Create the partitioned and bucketed tables.
+# Create the optimized tables.
 i=1
 total=8
+DATABASE=tpch_bin_partitioned_orc_${SCALE}
 for t in ${TABLES}
 do
 	echo "${BOLD}Optimizing table $t ($i/$total).${NORMAL}"
 	COMMAND="hive -i settings/load-flat.sql -f ddl-tpch/bin_flat/${t}.sql \
-	    -d DB=tpch_bin_partitioned_orc_${SCALE} \
+	    -d DB=tpch_bin_flat_orc_${SCALE} \
 	    -d SOURCE=tpch_text_${SCALE} -d BUCKETS=${BUCKETS} \
 	    -d FILE=orc"
 	runcommand "$COMMAND"
@@ -84,3 +85,5 @@ do
 	fi
 	i=`expr $i + 1`
 done
+
+echo "${BOLD}Data loaded into database ${DATABASE}.${NORMAL}"