cartershanklin %!s(int64=12) %!d(string=hai) anos
pai
achega
2f07953446

+ 0 - 1
ddl/bin_partitioned/call_center.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists call_center;
 
 create table call_center
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.call_center;

+ 0 - 1
ddl/bin_partitioned/catalog_page.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists catalog_page;
 
 create table catalog_page
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.catalog_page;

+ 35 - 1
ddl/bin_partitioned/catalog_returns.sql

@@ -1,5 +1,6 @@
 set hive.enforce.bucketing=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.max.dynamic.partitions=4096;
 set hive.exec.max.dynamic.partitions.pernode=4096;
 set mapred.job.reduce.input.buffer.percent=0.0;
 
@@ -39,5 +40,38 @@ create table catalog_returns
     cr_net_loss               float
 )
 partitioned by (cr_returned_date string)
-row format serde '${SERDE}'
 stored as ${FILE};
+
+insert overwrite table catalog_returns partition (cr_returned_date) 
+select
+        cr.cr_returned_date_sk,
+        cr.cr_returned_time_sk,
+        cr.cr_item_sk,
+        cr.cr_refunded_customer_sk,
+        cr.cr_refunded_cdemo_sk,
+        cr.cr_refunded_hdemo_sk,
+        cr.cr_refunded_addr_sk,
+        cr.cr_returning_customer_sk,
+        cr.cr_returning_cdemo_sk,
+        cr.cr_returning_hdemo_sk,
+        cr.cr_returning_addr_sk,
+        cr.cr_call_center_sk,
+        cr.cr_catalog_page_sk,
+        cr.cr_ship_mode_sk,
+        cr.cr_warehouse_sk,
+        cr.cr_reason_sk,
+        cr.cr_order_number,
+        cr.cr_return_quantity,
+        cr.cr_return_amount,
+        cr.cr_return_tax,
+        cr.cr_return_amt_inc_tax,
+        cr.cr_fee,
+        cr.cr_return_ship_cost,
+        cr.cr_refunded_cash,
+        cr.cr_reversed_charge,
+        cr.cr_store_credit,
+        cr.cr_net_loss,
+        dd.d_date as cr_returned_date
+      from ${SOURCE}.catalog_returns cr
+      left outer join ${SOURCE}.date_dim dd
+      on (cr.cr_returned_date_sk = dd.d_date_sk);

+ 42 - 1
ddl/bin_partitioned/catalog_sales.sql

@@ -1,5 +1,6 @@
 set hive.enforce.bucketing=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.max.dynamic.partitions=4096;
 set hive.exec.max.dynamic.partitions.pernode=4096;
 set mapred.job.reduce.input.buffer.percent=0.0;
 
@@ -46,5 +47,45 @@ create table catalog_sales
     cs_net_profit             float
 )
 partitioned by (cs_sold_date string)
-row format serde '${SERDE}'
 stored as ${FILE};
+
+insert overwrite table catalog_sales partition (cs_sold_date) 
+select
+        cs.cs_sold_date_sk,
+        cs.cs_sold_time_sk,
+        cs.cs_ship_date_sk,
+        cs.cs_bill_customer_sk,
+        cs.cs_bill_cdemo_sk,
+        cs.cs_bill_hdemo_sk,
+        cs.cs_bill_addr_sk,
+        cs.cs_ship_customer_sk,
+        cs.cs_ship_cdemo_sk,
+        cs.cs_ship_hdemo_sk,
+        cs.cs_ship_addr_sk,
+        cs.cs_call_center_sk,
+        cs.cs_catalog_page_sk,
+        cs.cs_ship_mode_sk,
+        cs.cs_warehouse_sk,
+        cs.cs_item_sk,
+        cs.cs_promo_sk,
+        cs.cs_order_number,
+        cs.cs_quantity,
+        cs.cs_wholesale_cost,
+        cs.cs_list_price,
+        cs.cs_sales_price,
+        cs.cs_ext_discount_amt,
+        cs.cs_ext_sales_price,
+        cs.cs_ext_wholesale_cost,
+        cs.cs_ext_list_price,
+        cs.cs_ext_tax,
+        cs.cs_coupon_amt,
+        cs.cs_ext_ship_cost,
+        cs.cs_net_paid,
+        cs.cs_net_paid_inc_tax,
+        cs.cs_net_paid_inc_ship,
+        cs.cs_net_paid_inc_ship_tax,
+        cs.cs_net_profit,
+        dd.d_date as cs_sold_date
+      from ${SOURCE}.catalog_sales cs
+      left outer join ${SOURCE}.date_dim dd
+      on (cs.cs_sold_date_sk = dd.d_date_sk);

+ 0 - 1
ddl/bin_partitioned/customer.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists customer;
 
 create table customer
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.customer;

+ 0 - 1
ddl/bin_partitioned/customer_address.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists customer_address;
 
 create table customer_address
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.customer_address;

+ 0 - 1
ddl/bin_partitioned/customer_demographics.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists customer_demographics;
 
 create table customer_demographics
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.customer_demographics;

+ 0 - 1
ddl/bin_partitioned/date_dim.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists date_dim;
 
 create table date_dim
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.date_dim;

+ 0 - 1
ddl/bin_partitioned/household_demographics.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists household_demographics;
 
 create table household_demographics
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.household_demographics;

+ 0 - 1
ddl/bin_partitioned/income_band.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists income_band;
 
 create table income_band
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.income_band;

+ 12 - 4
ddl/bin_partitioned/inventory.sql

@@ -1,10 +1,8 @@
 set hive.enforce.bucketing=true;
-set hive.enforce.sorting=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
-set hive.exec.max.created.files=4096;
+set hive.exec.max.dynamic.partitions=4096;
 set hive.exec.max.dynamic.partitions.pernode=4096;
 set mapred.job.reduce.input.buffer.percent=0.0;
-set mapred.child.java.opts=-server -Xmx6000m -Djava.net.preferIPv4Stack=true;
 
 create database if not exists ${DB};
 use ${DB};
@@ -19,5 +17,15 @@ create table inventory
     inv_quantity_on_hand	int
 )
 partitioned by (inv_date string)
-row format serde '${SERDE}'
 stored as ${FILE};
+
+insert overwrite table inventory partition (inv_date)
+from (select
+	i.inv_date_sk,
+	i.inv_item_sk,
+	i.inv_warehouse_sk,
+	i.inv_quantity_on_hand,
+	d.d_date as inv_date
+  from ${SOURCE}.inventory i
+  left outer join ${SOURCE}.date_dim d
+  on (d.d_date_sk = i.inv_date_sk);

+ 4 - 3
ddl/bin_partitioned/item.sql

@@ -1,5 +1,8 @@
 set hive.enforce.bucketing=true;
-set hive.enforce.sorting=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.max.dynamic.partitions=4096;
+set hive.exec.max.dynamic.partitions.pernode=4096;
+set mapred.job.reduce.input.buffer.percent=0.0;
 
 create database if not exists ${DB};
 use ${DB};
@@ -31,8 +34,6 @@ create table item
     i_manager_id              int,
     i_product_name            string
 )
-clustered by (i_item_sk) sorted by (i_item_sk) into ${BUCKETS} buckets
-row format serde '${SERDE}'
 stored as ${FILE};
 
 insert overwrite table item

+ 0 - 1
ddl/bin_partitioned/promotion.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists promotion;
 
 create table promotion
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.promotion;

+ 0 - 1
ddl/bin_partitioned/reason.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists reason;
 
 create table reason
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.reason;

+ 0 - 1
ddl/bin_partitioned/ship_mode.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists ship_mode;
 
 create table ship_mode
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.ship_mode;

+ 0 - 1
ddl/bin_partitioned/store.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists store;
 
 create table store
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.store;

+ 28 - 1
ddl/bin_partitioned/store_returns.sql

@@ -1,5 +1,6 @@
 set hive.enforce.bucketing=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.max.dynamic.partitions=4096;
 set hive.exec.max.dynamic.partitions.pernode=4096;
 set mapred.job.reduce.input.buffer.percent=0.0;
 
@@ -32,5 +33,31 @@ create table store_returns
     sr_net_loss               float
 )
 partitioned by (sr_returned_date string)
-row format serde '${SERDE}'
 stored as ${FILE};
+
+insert overwrite table store_returns partition (sr_returned_date) 
+select
+        sr.sr_returned_date_sk,
+        sr.sr_return_time_sk,
+        sr.sr_item_sk,
+        sr.sr_customer_sk,
+        sr.sr_cdemo_sk,
+        sr.sr_hdemo_sk,
+        sr.sr_addr_sk,
+        sr.sr_store_sk,
+        sr.sr_reason_sk,
+        sr.sr_ticket_number,
+        sr.sr_return_quantity,
+        sr.sr_return_amt,
+        sr.sr_return_tax,
+        sr.sr_return_amt_inc_tax,
+        sr.sr_fee,
+        sr.sr_return_ship_cost,
+        sr.sr_refunded_cash,
+        sr.sr_reversed_charge,
+        sr.sr_store_credit,
+        sr.sr_net_loss,
+        dd.d_date as sr_returned_date
+      from ${SOURCE}.store_returns sr
+      left outer join ${SOURCE}.date_dim dd
+      on (sr.sr_returned_date_sk = dd.d_date_sk);

+ 31 - 1
ddl/bin_partitioned/store_sales.sql

@@ -1,5 +1,6 @@
 set hive.enforce.bucketing=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.max.dynamic.partitions=4096;
 set hive.exec.max.dynamic.partitions.pernode=4096;
 set mapred.job.reduce.input.buffer.percent=0.0;
 
@@ -35,5 +36,34 @@ create table store_sales
     ss_net_profit             float
 )
 partitioned by (ss_sold_date string)
-row format serde '${SERDE}'
 stored as ${FILE};
+
+insert overwrite table store_sales partition (ss_sold_date) 
+select
+        ss.ss_sold_date_sk,
+        ss.ss_sold_time_sk,
+        ss.ss_item_sk,
+        ss.ss_customer_sk,
+        ss.ss_cdemo_sk,
+        ss.ss_hdemo_sk,
+        ss.ss_addr_sk,
+        ss.ss_store_sk,
+        ss.ss_promo_sk,
+        ss.ss_ticket_number,
+        ss.ss_quantity,
+        ss.ss_wholesale_cost,
+        ss.ss_list_price,
+        ss.ss_sales_price,
+        ss.ss_ext_discount_amt,
+        ss.ss_ext_sales_price,
+        ss.ss_ext_wholesale_cost,
+        ss.ss_ext_list_price,
+        ss.ss_ext_tax,
+        ss.ss_coupon_amt,
+        ss.ss_net_paid,
+        ss.ss_net_paid_inc_tax,
+        ss.ss_net_profit,
+        dd.d_date as ss_sold_date
+      from ${SOURCE}.store_sales ss
+      left outer join ${SOURCE}.date_dim dd
+      on (ss.ss_sold_date_sk = dd.d_date_sk);

+ 0 - 1
ddl/bin_partitioned/time_dim.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists time_dim;
 
 create table time_dim
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.time_dim;

+ 0 - 1
ddl/bin_partitioned/warehouse.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists warehouse;
 
 create table warehouse
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.warehouse;

+ 0 - 1
ddl/bin_partitioned/web_page.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists web_page;
 
 create table web_page
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.web_page;

+ 32 - 1
ddl/bin_partitioned/web_returns.sql

@@ -1,5 +1,6 @@
 set hive.enforce.bucketing=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.max.dynamic.partitions=4096;
 set hive.exec.max.dynamic.partitions.pernode=4096;
 set mapred.job.reduce.input.buffer.percent=0.0;
 
@@ -36,5 +37,35 @@ create table web_returns
     wr_net_loss               float
 )
 partitioned by (wr_returned_date string)
-row format serde '${SERDE}'
 stored as ${FILE};
+
+insert overwrite table web_returns partition (wr_returned_date)
+select
+        wr.wr_returned_date_sk,
+        wr.wr_returned_time_sk,
+        wr.wr_item_sk,
+        wr.wr_refunded_customer_sk,
+        wr.wr_refunded_cdemo_sk,
+        wr.wr_refunded_hdemo_sk,
+        wr.wr_refunded_addr_sk,
+        wr.wr_returning_customer_sk,
+        wr.wr_returning_cdemo_sk,
+        wr.wr_returning_hdemo_sk,
+        wr.wr_returning_addr_sk,
+        wr.wr_web_page_sk,
+        wr.wr_reason_sk,
+        wr.wr_order_number,
+        wr.wr_return_quantity,
+        wr.wr_return_amt,
+        wr.wr_return_tax,
+        wr.wr_return_amt_inc_tax,
+        wr.wr_fee,
+        wr.wr_return_ship_cost,
+        wr.wr_refunded_cash,
+        wr.wr_reversed_charge,
+        wr.wr_account_credit,
+        wr.wr_net_loss,
+        dd.d_date as wr_returned_date
+      from ${SOURCE}.web_returns wr
+      left outer join ${SOURCE}.date_dim dd
+      on (wr.wr_returned_date_sk = dd.d_date_sk);

+ 42 - 1
ddl/bin_partitioned/web_sales.sql

@@ -1,5 +1,6 @@
 set hive.enforce.bucketing=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.exec.max.dynamic.partitions=4096;
 set hive.exec.max.dynamic.partitions.pernode=4096;
 set mapred.job.reduce.input.buffer.percent=0.0;
 
@@ -46,5 +47,45 @@ create table web_sales
     ws_net_profit             float
 )
 partitioned by (ws_sold_date string)
-row format serde '${SERDE}'
 stored as ${FILE};
+
+insert overwrite table web_sales partition (ws_sold_date) 
+select
+        ws.ws_sold_date_sk,
+        ws.ws_sold_time_sk,
+        ws.ws_ship_date_sk,
+        ws.ws_item_sk,
+        ws.ws_bill_customer_sk,
+        ws.ws_bill_cdemo_sk,
+        ws.ws_bill_hdemo_sk,
+        ws.ws_bill_addr_sk,
+        ws.ws_ship_customer_sk,
+        ws.ws_ship_cdemo_sk,
+        ws.ws_ship_hdemo_sk,
+        ws.ws_ship_addr_sk,
+        ws.ws_web_page_sk,
+        ws.ws_web_site_sk,
+        ws.ws_ship_mode_sk,
+        ws.ws_warehouse_sk,
+        ws.ws_promo_sk,
+        ws.ws_order_number,
+        ws.ws_quantity,
+        ws.ws_wholesale_cost,
+        ws.ws_list_price,
+        ws.ws_sales_price,
+        ws.ws_ext_discount_amt,
+        ws.ws_ext_sales_price,
+        ws.ws_ext_wholesale_cost,
+        ws.ws_ext_list_price,
+        ws.ws_ext_tax,
+        ws.ws_coupon_amt,
+        ws.ws_ext_ship_cost,
+        ws.ws_net_paid,
+        ws.ws_net_paid_inc_tax,
+        ws.ws_net_paid_inc_ship,
+        ws.ws_net_paid_inc_ship_tax,
+        ws.ws_net_profit,
+        dd.d_date as ws_sold_date
+      from ${SOURCE}.web_sales ws
+      left outer join ${SOURCE}.date_dim dd
+      on (ws.ws_sold_date_sk = dd.d_date_sk);

+ 0 - 1
ddl/bin_partitioned/web_site.sql

@@ -4,6 +4,5 @@ use ${DB};
 drop table if exists web_site;
 
 create table web_site
-row format serde '${SERDE}'
 stored as ${FILE}
 as select * from ${SOURCE}.web_site;