generate_analyze.pl 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. use Text::Wrap;
  2. use DateTime;
  3. use DateTime::Format::Strptime;
  4. $Text::Wrap::columns = 72;
  5. print "-- Use filesystem to track stats.\n";
  6. print "set hive.stats.dbclass=fs;\n";
  7. print "-- Many tables have some missing partitions, deal with this by ignoring errors.\n";
  8. print "set hive.cli.errors.ignore=true;\n\n";
  9. %partitions = (
  10. catalog_returns => "cr_returned_date",
  11. catalog_sales => "cs_sold_date",
  12. inventory => "inv_date",
  13. store_returns => "sr_returned_date",
  14. store_sales => "ss_sold_date",
  15. web_returns => "wr_returned_date",
  16. web_sales => "ws_sold_date",
  17. );
  18. open(TEMP, $ARGV[0]);
  19. $old = $/;
  20. $/ = undef;
  21. $x = <TEMP>;
  22. $x =~ m|l table (\S+)|;
  23. $table = $1;
  24. $table =~ s/\(//g;
  25. if ($partitions{$table}) {
  26. $partition = " partition(" . $partitions{$table} . ")";
  27. }
  28. open(TEMP, $ARGV[0]);
  29. $/ = $old;
  30. while ($_ = <TEMP>) {
  31. if ($_ =~ m|^[ ,]{4,6}(\S+)|) {
  32. push(@stuff, "$1");
  33. }
  34. }
  35. $columns = join(', ', @stuff);
  36. @stuff = split('\s', $columns);
  37. print "ANALYZE TABLE $table$partition COMPUTE STATISTICS;\n";
  38. $date = "1998-01-01";
  39. my $strp = DateTime::Format::Strptime->new(
  40. pattern => '%Y-%m-%d'
  41. );
  42. $dt = $strp->parse_datetime($date);
  43. if ($partitions{$table}) {
  44. $year = "1998";
  45. while ($year < 2003) {
  46. $partitionv = "'" . $dt->strftime("%Y-%m-%d") . "'";
  47. $partition = " partition(" . $partitions{$table} . "=$partitionv)";
  48. print "!echo Analyzing $table $partitionv;\n";
  49. print "ANALYZE TABLE $table$partition COMPUTE STATISTICS FOR COLUMNS \n";
  50. print wrap("\t", "\t", @stuff);
  51. print ";\n\n";
  52. $dt->add(days => 1);
  53. $year = $dt->strftime("%Y");
  54. }
  55. } else {
  56. print "ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS \n";
  57. print wrap("\t", "\t", @stuff);
  58. print ";\n\n";
  59. }