/*
 * blockprofiler.h
 *
 *  Created on: Mar 8, 2013
 *      Author: yjo
 */

#ifndef BLOCKPROFILER_H_
#define BLOCKPROFILER_H_

//#define INCLUSIVE 

typedef vector<int> CountVec;

class CountStack
{
 public:
  CountStack() {}
  ~CountStack() {
    for (int i = 0; i < items.size(); i++) {
      delete items[i];
    }
  }

  CountVec* get(int i) {
    while (i >= items.size()) {
      items.push_back(new vector<int>());
    }
    return items[i];
  }

  int size() { return items.size(); }

  vector<CountVec *> items;
};

typedef vector<double> CountVecd;

class CountStackd
{
 public:
  CountStackd() {}
  ~CountStackd() {
    for (int i = 0; i < items.size(); i++) {
      delete items[i];
    }
  }

  CountVecd* get(int i) {
    while (i >= items.size()) {
      items.push_back(new vector<double>());
    }
    return items[i];
  }

  int size() { return items.size(); }

  vector<CountVecd *> items;
};

class BlockProfiler {
 public:
  BlockProfiler(int width = 4) {
    this->width = width;
    count = new uint64_t[width + 1];
    for (int i = 0; i < width + 1; i++) {
      count[i] = 0;
    }
    block_size_sum = 0;
    block_size_cnt = 0;
  }

  ~BlockProfiler() {
    delete [] count;
  }

  void output() {
    printf("--- Block Profile ---\n");
    uint64_t work_sum = 0;
    for (int i = 0; i < width + 1; i++) {
      int weight = i == 0 ? 1 : i;
      work_sum += count[i] * weight;
    }
    float *work_ratio = new float[width + 1];
    for (int i = 0; i < width + 1; i++) {
      int weight = i == 0 ? 1 : i;
      work_ratio[i] = static_cast<float>(count[i] * weight) / work_sum;
      printf("work %d: %llu %.4f\n", i, count[i] * weight, work_ratio[i]);
    }
    printf("work sum: %llu\n", work_sum);
    float block_size_avg = (float) block_size_sum / block_size_cnt;
    printf("block size avg: %.4f\n", block_size_avg);
    printf("block size cnt: %llu\n", block_size_cnt);
    printf("leaf node exist rate: %.4f\n", leaf_node_exist_rate);

    FILE *fp = fopen("blockprofile.csv", "a");

    fprintf(fp, "%s,%s,%d,%d,%d, ",
            Harness::get_benchmark().c_str(), Harness::get_appargs().c_str(),
            Harness::get_sort_flag(), Harness::get_block_size(), Harness::get_splice_depth());
    for (int i = 0; i < width + 1; i++) {
      int weight = i == 0 ? 1 : i;
      fprintf(fp, "%llu,", count[i] * weight);
    }
    fprintf(fp, "%llu,", work_sum);
    for (int i = 0; i < width + 1; i++) {
      fprintf(fp, "%.4f,", work_ratio[i]);
    }

    fprintf(fp, "%llu, %.4f,", block_size_cnt, block_size_avg);
    fprintf(fp, "%.4f,", leaf_node_exist_rate);
    fprintf(fp, "\n");
    fclose(fp);

    delete [] work_ratio;

    outputParameter();
    outputSIMDUtilize(0);//0 -- include expand; 1 -- exclude expand
  }

  //Bin:Add
  void outputSIMDUtilize(int exclude_expand){
    uint64_t work_sum = 0;
    uint64_t count_sum = 0;
    for (int i = exclude_expand; i < width + 1; i++) {
      int weight = i == 0 ? 1 : i;
      work_sum += count[i] * weight;
      count_sum += count[i];
    }

    double ave_simd_width = ((float) work_sum) / count_sum;
    double simd_utilize = ave_simd_width / width;
    printf("Bin: SIMD Utilization Report:ave_simd_width, ave_simd_width / real_simd_width: %f, %f\n", ave_simd_width, simd_utilize);
  }


  void outputParameter() {
    printf("--- Parameter Profile ---\n");

    uint64_t totalWork = 0;
    int totalCalls = 0;
    vector<float> means;
    vector<int> mins;
    vector<int> maxs;
    vector<int> calls;
    vector<float> stabilities;
    for (int i = 0; i < countStack.size(); i++) {
      CountVec *cvec = countStack.get(i);
      int size = cvec->size();
      uint64_t sum = 0;
      int max = 0;
      int min = 1 << 30;
      for (int j = 0; j < size; j++) {
        int val = cvec->at(j);
        sum += val;
        min = min < val ? min : val;
        max = max > val ? max : val;
      }
      totalWork += sum;
      float mean = (size != 0) ? (float) sum / size : 0;
      sum = 0;
      for (int j = 0; j < size; j++) {
        sum += (cvec->at(j) - mean) * (cvec->at(j) - mean);
      }
      float variance = (size > 1) ? sum / (size - 1) : 0;
      float stdev = sqrt(variance);
      float stability = (mean != 0.0) ? stdev * 100 / mean : 0;
      means.push_back(mean);
      mins.push_back(min);
      maxs.push_back(max);
      stabilities.push_back(stability);
      calls.push_back(size);
      totalCalls += size;
      printf("%d \t %d \t %.1f \t %d \t %d \t %.2f\n", i, size, mean, min, max, stability);
    }

    FILE *fp = fopen("parameter.csv", "a");

    fprintf(fp, "%s,%s,%d,%d,%d, ",
            Harness::get_benchmark().c_str(), Harness::get_appargs().c_str(),
            Harness::get_sort_flag(), Harness::get_block_size(), Harness::get_splice_depth());
    float totalMean = (float) totalWork / totalCalls;
    printf("totalMean: %.1f\n", totalMean);
    uint64_t nonBlockedWork = count[0];
    totalWork += nonBlockedWork;	// exclude nonBlockedWork in totalMean
    fprintf(fp, "%llu,", totalWork);
    fprintf(fp, "%.1f,", totalMean);
    float nonBlockedWorkRatio = (float) nonBlockedWork / totalWork;
    fprintf(fp, "%.4f,mean,", nonBlockedWorkRatio);
    for (int i = 0; i < means.size(); i++) {
      fprintf(fp, "%.1f,", means[i]);
    }
    fprintf(fp, "\n,,,,,,,,min, ");
    for (int i = 0; i < mins.size(); i++) {
      fprintf(fp, "%d,", mins[i]);
    }
    fprintf(fp, "\n,,,,,,,,max, ");
    for (int i = 0; i < maxs.size(); i++) {
      fprintf(fp, "%d,", maxs[i]);
    }
    fprintf(fp, "\n,,,,,,,,stability, ");
    for (int i = 0; i < stabilities.size(); i++) {
      fprintf(fp, "%.2f,", stabilities[i]);
    }
    fprintf(fp, "\n,,,,,,,,calls, ");
    for (int i = 0; i < calls.size(); i++) {
      fprintf(fp, "%d,", calls[i]);
    }
    fprintf(fp, "\n");
    fclose(fp);
  }

  void outputBlockInfo(){
    cout << "----------Tasks Distribution Profile-----------" << endl;
    cout << "This is a full profiling about block size in each level:" << endl;
    for (int i = 0; i < countStack.size(); i++) {
      cout << "Depth " << i << ":" << endl;;
      CountVec *cvec = countStack.get(i);
      int size = cvec->size();
      uint64_t sum = 0;
      for (int j = 0; j < size; j++) {
        sum += (*cvec)[j];
        //if (j % 100 == 0)
        //    cout << (*cvec)[j] << " ";
      }

      cout << "Total nodes: " << sum << endl;
    }
  }

  void outputReexpandInfo(){
    cout << "----------Reexpansion Benefits Profile-----------" << endl;
    cout << "Reexpansion Number: " << endl;
    for (int i = 0; i < this->expansion_count.size(); ++i){
      cout << this->expansion_count[i] << endl;
    }

    cout << "Before reexpansion ave block size, and standard dev: " << endl;
    vector<double> ave(bef_exp_size.size());
    vector<double> dev(bef_exp_size.size());
    for (int i = 0; i < bef_exp_size.size(); ++i){
      ave[i] = 0;
      dev[i] = 0;
      int c_size = bef_exp_size.get(i)->size();
      for (int j = 0; j < c_size; ++j){
        ave[i] += bef_exp_size.get(i)->at(j);
      }

      ave[i] = (c_size) ? ave[i] / c_size : 0;

      for (int j = 0; j < c_size; ++j){
        dev[i] += pow(bef_exp_size.get(i)->at(j) - ave[i], 2);
      }

      dev[i] = sqrt((c_size) ? dev[i] / c_size : 0);
    }

    cout << "This is AVE block size: " << endl;
    for (int i = 0; i < bef_exp_size.size(); ++i){
      cout << ave[i] << endl;
    }

    cout << "This is block size Standard DEV: " << endl;
    for (int i = 0; i < bef_exp_size.size(); ++i){
      cout << dev[i] << endl;
    }

    cout << "This is number of blocks in each level: " << endl;
    for (int i = 0; i < bef_exp_size.size(); ++i){
      cout << bef_exp_size.get(i)->size() << endl;
    }

    cout << "After reexpansion ave block size, and standard dev: " << endl;
    ave.resize(aft_exp_size.size());
    dev.resize(aft_exp_size.size());
    for (int i = 0; i < aft_exp_size.size(); ++i){
      ave[i] = 0;
      dev[i] = 0;
      int c_size = aft_exp_size.get(i)->size();
      for (int j = 0; j < c_size; ++j){
        ave[i] += aft_exp_size.get(i)->at(j);
      }

      ave[i] = (c_size) ? ave[i] / c_size : 0;

      for (int j = 0; j < c_size; ++j){
        dev[i] += pow(aft_exp_size.get(i)->at(j) - ave[i], 2);
      }

      dev[i] = sqrt((c_size) ? dev[i] / c_size : 0);
    }

    cout << "This is AVE block size: " << endl;
    for (int i = 0; i < aft_exp_size.size(); ++i){
      cout << ave[i] << endl;
    }

    cout << "This is block size Standard DEV: " << endl;
    for (int i = 0; i < aft_exp_size.size(); ++i){
      cout << dev[i] << endl;
    }

    cout << "This is number of blocks in each level: " << endl;
    for (int i = 0; i < aft_exp_size.size(); ++i){
      cout << aft_exp_size.get(i)->size() << endl;
    }

    cout << "This is w wo expand ration: " << endl;
    for (int i = 0; i < w_wo_exp_ratio.size(); ++i){
      double ave_ratio = 0;
      int c_size = w_wo_exp_ratio.get(i)->size();
      for (int j = 0; j < c_size; ++j){
        ave_ratio += w_wo_exp_ratio.get(i)->at(j);
      }

      ave_ratio = (c_size) ? ave_ratio / c_size : 0;
      cout << ave_ratio << endl;
    }

    cout << "This is the end of reexpansion statistics!"  <<endl;

  }


  void record(int block_size) {
    count[width] += block_size / width;
    if (block_size % width != 0) count[block_size % width]++;
    block_size_sum += block_size;
    block_size_cnt++;
  }

  void record(int size, int depth) {
    CountVec* cvec = countStack.get(depth);
    cvec->push_back(size);

    record(size);
  }

  void record_reexpansion(int depth){
    while (depth >= this->expansion_count.size()){
      this->expansion_count.push_back(0);	
    }

    this->expansion_count[depth] += 1;
  }

  void record_bef_exp_size(int depth, int size){
    CountVec* cvec = bef_exp_size.get(depth);
    cvec->push_back(size);
  }

  void record_aft_exp_size(int depth, int size){
    CountVec* cvec = aft_exp_size.get(depth);
    cvec->push_back(size);
  }

  void record_w_wo_exp_ratio(int depth, double ratio){
    CountVecd* cvecd = w_wo_exp_ratio.get(depth);
    cvecd->push_back(ratio);
  }

  void record_single() { count[0]++; }

  void record_leaf_node_exist_rate(float f) { leaf_node_exist_rate = f; }
 private:
  CountStack countStack;
  int width;
  uint64_t *count;
  uint64_t block_size_sum;
  uint64_t block_size_cnt;
  float leaf_node_exist_rate;
  vector<int> expansion_count;
  CountStack bef_exp_size;
  CountStack aft_exp_size;
  CountStackd w_wo_exp_ratio;
};

#endif /* BLOCKPROFILER_H_ */
