Browse Source

Many people are not using ATS, causing the testbench.settings files to fail.

Split the settings to use no ATS (default) or with ATS. ATS allows you to get additional analytics about your jobs.
Carter Shanklin 11 years ago
parent
commit
302c158661

+ 58 - 0
sample-queries-tpcds/testbench-withATS.settings

@@ -0,0 +1,58 @@
+set ambari.hive.db.schema.name=hive;
+set fs.file.impl.disable.cache=true;
+set fs.hdfs.impl.disable.cache=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join.noconditionaltask=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.compactor.abortedtxn.threshold=1000;
+set hive.compactor.check.interval=300;
+set hive.compactor.delta.num.threshold=10;
+set hive.compactor.delta.pct.threshold=0.1f;
+set hive.compactor.initiator.on=false;
+set hive.compactor.worker.threads=0;
+set hive.compactor.worker.timeout=86400;
+set hive.compute.query.using.stats=true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.enforce.sortmergebucketmapjoin=true;
+set hive.exec.failure.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook;
+set hive.exec.post.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook;
+set hive.exec.pre.hooks=org.apache.hadoop.hive.ql.hooks.ATSHook;
+set hive.execution.engine=mr;
+set hive.limit.pushdown.memory.usage=0.04;
+set hive.map.aggr=true;
+set hive.mapjoin.bucket.cache.size=10000;
+set hive.mapred.reduce.tasks.speculative.execution=false;
+set hive.metastore.cache.pinobjtypes=Table,Database,Type,FieldSchema,Order;
+set hive.metastore.client.socket.timeout=60;
+set hive.metastore.execute.setugi=true;
+set hive.metastore.warehouse.dir=/apps/hive/warehouse;
+set hive.optimize.bucketmapjoin.sortedmerge=false;
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.index.filter=true;
+set hive.optimize.mapjoin.mapreduce=true;
+set hive.optimize.reducededuplication.min.reducer=4;
+set hive.optimize.reducededuplication=true;
+set hive.orc.splits.include.file.footer=false;
+set hive.security.authorization.enabled=false;
+set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider;
+set hive.semantic.analyzer.factory.impl=org.apache.hivealog.cli.HCatSemanticAnalyzerFactory;
+set hive.server2.enable.doAs=false;
+set hive.server2.tez.default.queues=default;
+set hive.server2.tez.initialize.default.sessions=false;
+set hive.server2.tez.sessions.per.default.queue=1;
+set hive.stats.autogather=true;
+set hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
+set hive.txn.max.open.batch=1000;
+set hive.txn.timeout=300;
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.groupby.checkinterval=1024;
+set hive.vectorized.groupby.flush.percent=1;
+set hive.vectorized.groupby.maxentries=1024;
+
+-- These values need to be tuned appropriately to your cluster. These examples are for reference.
+-- set hive.tez.container.size=4096;
+-- set hive.tez.java.opts=-Xmx3800m;
+-- set hive.auto.convert.join.noconditionaltask.size=1252698795;

+ 55 - 0
sample-queries-tpch/testbench-withATS.settings

@@ -0,0 +1,55 @@
+set ambari.hive.db.schema.name=hive;
+set fs.file.impl.disable.cache=true;
+set fs.hdfs.impl.disable.cache=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join.noconditionaltask=true;
+set hive.auto.convert.sortmerge.join=true;
+set hive.compactor.abortedtxn.threshold=1000;
+set hive.compactor.check.interval=300;
+set hive.compactor.delta.num.threshold=10;
+set hive.compactor.delta.pct.threshold=0.1f;
+set hive.compactor.initiator.on=false;
+set hive.compactor.worker.threads=0;
+set hive.compactor.worker.timeout=86400;
+set hive.compute.query.using.stats=true;
+set hive.enforce.bucketing=true;
+set hive.enforce.sorting=true;
+set hive.enforce.sortmergebucketmapjoin=true;
+set hive.execution.engine=mr;
+set hive.limit.pushdown.memory.usage=0.04;
+set hive.map.aggr=true;
+set hive.mapjoin.bucket.cache.size=10000;
+set hive.mapred.reduce.tasks.speculative.execution=false;
+set hive.metastore.cache.pinobjtypes=Table,Database,Type,FieldSchema,Order;
+set hive.metastore.client.socket.timeout=60;
+set hive.metastore.execute.setugi=true;
+set hive.metastore.warehouse.dir=/apps/hive/warehouse;
+set hive.optimize.bucketmapjoin.sortedmerge=false;
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.index.filter=true;
+set hive.optimize.mapjoin.mapreduce=true;
+set hive.optimize.reducededuplication.min.reducer=4;
+set hive.optimize.reducededuplication=true;
+set hive.orc.splits.include.file.footer=false;
+set hive.security.authorization.enabled=false;
+set hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider;
+set hive.semantic.analyzer.factory.impl=org.apache.hivealog.cli.HCatSemanticAnalyzerFactory;
+set hive.server2.enable.doAs=false;
+set hive.server2.tez.default.queues=default;
+set hive.server2.tez.initialize.default.sessions=false;
+set hive.server2.tez.sessions.per.default.queue=1;
+set hive.stats.autogather=true;
+set hive.tez.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager;
+set hive.txn.max.open.batch=1000;
+set hive.txn.timeout=300;
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.groupby.checkinterval=1024;
+set hive.vectorized.groupby.flush.percent=1;
+set hive.vectorized.groupby.maxentries=1024;
+
+-- These values need to be tuned appropriately to your cluster. These examples are for reference.
+-- set hive.tez.container.size=4096;
+-- set hive.tez.java.opts=-Xmx3800m;
+-- set hive.auto.convert.join.noconditionaltask.size=1252698795;