/**********************************************************/
/* This code is for PLDI-15 Artifact Evaluation only      */ 
/* and will be released with further copyright information*/ 
/* File: Sequential block w reexpansion of binomial       */
/**********************************************************/

#include <iostream>
#include <fstream>
#include <math.h>

#include "harness.h"
#include "block.h"

#ifdef BLOCK_PROFILE
#include "blockprofiler.h"
BlockProfiler * profiler;
#endif
//Parallelism profiler, not used in our paper
//for further development
#ifdef PARALLELISM_PROFILE
#include "parallelismprofiler.h"
ParallelismProfiler *parallelismProfiler;
#endif

int dynamic_reexpand_count = 0;
_Block * g_initial_block = NULL;//For memory release
int g_is_partial = 0;

using namespace std;

/*Pseudo tail recursive binomial matching our language spec*/
void binomial(char n, char k, int *num) {
#ifdef BLOCK_PROFILE
  profiler->record_single();
#endif

  if (k == 0 || k == n) {
    *num += 1;
#ifdef PARALLELISM_PROFILE
    parallelismProfiler->recordNonBlockedTruncate();
#endif
  } else if (k < 0 || k > n) {
#ifdef PARALLELISM_PROFILE
    parallelismProfiler->recordNonBlockedTruncate();
#endif
  } else {
    binomial(n-1, k-1, num);
    binomial(n-1, k, num);
#ifdef PARALLELISM_PROFILE
    parallelismProfiler->recordNonBlockedRecurse();
#endif
  }
}

//int _expandDepth = 0;
long long _expandSize = D_MAX_BLOCK_SIZE;

int binomial_block1(class _BlockStack *_stack,int _depth, int *num);
int binomial_block(class _BlockStack *_stack,int _depth, int *num);

/*Breadth First execution to expand the number of tasks in software block*/
void binomial_expand_bf(class _BlockStack *_stack, int* _depth, int *num) {
  class _BlockSet *_set = _stack ->  get (*_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();

#ifdef BLOCK_PROFILE
  profiler->record(_block->size, *_depth);

#ifdef EXPAND_PROFILE
  if (dynamic_reexpand_count) 
  {
    profiler->record_reexpansion(*_depth);
  }

  int c_bef_exp_size[2];
  int max_c_bef_exp_size = 0;
#endif
#endif

  //Add left
  for (int _bi = 0; _bi < _block -> _Block::size; ++_bi) {
    class _Point &_point = _block ->  get (_bi);
    char n = _point.n;
    char k = _point.k;
    if (k == 0 || k == n) {
      *num += 1;
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordTruncate();
#endif
    } else if (k < 0 || k > n) {
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordTruncate();
#endif
    } else {
      _nextBlock0->add(n-1, k);
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordRecurse();
#endif
    }
  }

#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
  c_bef_exp_size[0] = _nextBlock0->size;
#endif
#endif

  //Add right
  for (int _bi = 0; _bi < _block -> _Block::size; ++_bi) {
    class _Point &_point = _block ->  get (_bi);
    char n = _point.n;
    char k = _point.k - 1;
    if (k == 0 || k == n) {
      *num += 1;
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordTruncate();
#endif
    } else if (k < 0 || k > n) {
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordTruncate();
#endif
    } else {
      _nextBlock0->add(n-1, k);
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordRecurse();
#endif
    }
  }
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
  c_bef_exp_size[1] =  _nextBlock0->size - c_bef_exp_size[0];
  max_c_bef_exp_size = max(c_bef_exp_size[0], c_bef_exp_size[1]);
#endif
#endif

  //Free old stack space
  if (!g_is_partial){
    if (!*_depth){
      delete g_initial_block;
    } else
    {
      _stack->release(*_depth - 1);
    }
  }

  int _nextblock0_size = _nextBlock0 -> _Block::size;
  *_depth += 1;
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
  if (dynamic_reexpand_count == 0){
    if(_nextblock0_size) {
      profiler->record_bef_exp_size(*_depth, _nextblock0_size);
#ifdef INCLUSIVE
      profiler->record_w_wo_exp_ratio(*_depth, 1);	
#endif
    }
  } else{
    for (int i = 0; i < 2; ++i){
      if (c_bef_exp_size[i]) profiler->record_bef_exp_size(*_depth, c_bef_exp_size[i]);
    }
    if(_nextblock0_size) profiler->record_w_wo_exp_ratio(*_depth, ((double)_nextblock0_size) / max_c_bef_exp_size);	
  }
  if(_nextblock0_size) {
    profiler->record_aft_exp_size(*_depth, _nextblock0_size);
  }

#endif
#endif
  if (_nextblock0_size > 0 && _nextblock0_size <= _expandSize / 2) {
    _stack ->  get (*_depth) -> _BlockSet::block = _nextBlock0;
    binomial_expand_bf(_stack, _depth, num);
  } else { //Reach the buffer size, or finish all evaluation
    if (!dynamic_reexpand_count){// only print for the first time
      cout << "This is the max block buffer size for dfs: " << _nextblock0_size << endl;
    }

    if (_nextblock0_size){
      _stack ->  get (*_depth) -> _BlockSet::block = _nextBlock0;
      binomial_block1(_stack, *_depth, num);
      binomial_block(_stack, *_depth, num);
    }
  }
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->blockEnd();
#endif

}

/*Depth First execution of left children to limit the memory consumption*/
int binomial_block(class _BlockStack *_stack,int _depth, int *num) {
  class _BlockSet *_set = _stack ->  get (_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();

  int _block_size = _block->size;
  if (_block_size <= _expandSize / 2){ //Do dynamic reexpansion
    dynamic_reexpand_count++;
    g_is_partial = 1;
    binomial_expand_bf(_stack, &_depth, num);
    return 1;
  } else {
#ifdef BLOCK_PROFILE
    profiler->record(_block->size, _depth);
#endif
    for (int _bi = 0; _bi < _block -> _Block::size; ++_bi) {
      class _Point &_point = _block ->  get (_bi);
      char n = _point.n;
      char k = _point.k;
      if (k == 0 || k == n) {
        *num += 1;
#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordTruncate();
#endif
      } else if (k < 0 || k > n) {
#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordTruncate();
#endif
      } else {
        _nextBlock0->add(n-1, k);
#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordRecurse();
#endif
      }
    }

    if (_nextBlock0 -> _Block::size > 0) {
      _stack ->  get (_depth + 1) -> _BlockSet::block = _nextBlock0;
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
      profiler->record_bef_exp_size(_depth + 1, _nextBlock0->size);
      profiler->record_aft_exp_size(_depth + 1, _nextBlock0->size);
#ifdef INCLUSIVE
      profiler->record_w_wo_exp_ratio(_depth + 1, 1);	
#endif
#endif
#endif
      int skip = 0;
      skip = binomial_block1(_stack, _depth + 1, num);
      if (!skip)
        binomial_block(_stack, _depth + 1, num);
    }
  }
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->blockEnd();
#endif

  return 0;
}

/*Depth First execution of right children to limit the memory consumption*/
int binomial_block1(class _BlockStack *_stack,int _depth, int *num) {
  class _BlockSet *_set = _stack ->  get (_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();

  int _block_size = _block->size;
  if (_block_size <= _expandSize / 2){//Do dynamic reexpansion
    dynamic_reexpand_count++;
    g_is_partial = 1;
    binomial_expand_bf(_stack, &_depth, num);
    return 1;
  } else {
#ifdef BLOCK_PROFILE
    profiler->record(_block->size, _depth);
#endif
    for (int _bi = 0; _bi < _block -> _Block::size; ++_bi) {
      class _Point &_point = _block ->  get (_bi);
      char n = _point.n;
      char k = _point.k - 1;
      if (k == 0 || k == n) {
        *num += 1;
#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordTruncate();
#endif
      } else if (k < 0 || k > n) {
#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordTruncate();
#endif
      } else {
        _nextBlock0->add(n-1, k);
#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordRecurse();
#endif
      }
    }

    if (_nextBlock0 -> _Block::size > 0) {
      _stack ->  get (_depth + 1) -> _BlockSet::block = _nextBlock0;
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
      profiler->record_bef_exp_size(_depth + 1, _nextBlock0->size);
      profiler->record_aft_exp_size(_depth + 1, _nextBlock0->size);
#ifdef INCLUSIVE
      profiler->record_w_wo_exp_ratio(_depth + 1, 1);	
#endif
#endif
#endif
      int skip = 0;
      skip = binomial_block1(_stack, _depth + 1, num);
      if (!skip)
        binomial_block(_stack, _depth + 1, num);
    }
  }
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->blockEnd();
#endif
  return 0;
}

/*Benchmark entrance called by harness*/
int app_main(int argc, char **argv) {
  if (argc != 2 && argc != 3 && argc != 4) {
    cout << "usage: binomial [n] [k] or binomial [n] [k] [buffer_size, pow(2, i)] or binomial [n] [k] [buffer_size, pow(2, i)] [simd width]" << endl;
    exit(0);
  }

  int vec_width = 16;//set simd utilization width as 16 
  char n = atoi(argv[0]);
  char k = atoi(argv[1]);
  if (argc >= 3) _expandSize = pow(2.0, atoi(argv[2]));
  if (argc == 4) vec_width = atoi(argv[3]);
  int num = 0;
#ifdef PARALLELISM_PROFILE
  parallelismProfiler = new ParallelismProfiler;
#endif

#ifdef BLOCK_PROFILE
  cout << "The vector width is " << vec_width << endl;
  profiler = new BlockProfiler(vec_width);
#endif
  Harness::start_timing();

  //_expandDepth = Harness::get_splice_depth();

  //Initialize software block stack
  cout << "Set fixed max block buffer size, _expandSize: " << _expandSize << endl;
  _Block::max_block = _expandSize;
  Harness::set_block_size(_expandSize);
  class _BlockStack * _stack = new _BlockStack;
  class _Block * _block = new _Block;
  g_initial_block = _block;

  if (k == 0 || k == n) {
    num += 1;
  } else if (k < 0 || k > n) {
    return 1;
  } else {
    _block->add(n - 1, k);
  }

  int _depth = 0;
  _stack->get (_depth) -> block = _block;

  //Start to execute blocked binomial 
  if (_expandSize >= 2){
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
    profiler->record_bef_exp_size(0, 1);
    profiler->record_aft_exp_size(0, 1);
#ifdef INCLUSIVE
    profiler->record_w_wo_exp_ratio(0, 1);	
#endif
#endif
#endif
    binomial_expand_bf(_stack, &_depth, &num);
  }
  else{
    int df_block_size = _stack->get(_depth)->block->size;
    cout << "This is the max block buffer size for dfs: " << df_block_size << endl;

    if (df_block_size){
      binomial_block1(_stack, _depth, &num);
      binomial_block(_stack, _depth, &num);
    }
  }
  delete _stack;
  if (_expandSize < 2) delete _block;
  Harness::stop_timing();
  
  cout << num << endl;

#ifdef BLOCK_PROFILE
  profiler->output();
#ifdef EXPAND_PROFILE
  profiler->outputReexpandInfo();
#endif
  delete profiler;
#endif
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->output();
  delete parallelismProfiler;
#endif

  cout << "This is total reexpansion counts: " << dynamic_reexpand_count << endl;
#ifdef PROFILE_SPACE_USE
  cout << "This is max space use (Bytes): " << m_space << endl;
  cout << "This is the total number of new operations for block: " << total_malloc << endl;
#endif

  return 0;
}
