/**********************************************************/
/* This code is for PLDI-15 Artifact Evaluation only      */ 
/* and will be released with further copyright information*/ 
/* File: Sequential block wo reexpansion of fibonacci     */
/**********************************************************/

#include <iostream>
#include <fstream>
#include <math.h>

#include "harness.h"
#include "block.h"

using namespace std;

_Block * g_initial_block = NULL; //For memory release

#ifdef BLOCK_PROFILE
#include "blockprofiler.h"
BlockProfiler profiler(16); //set simd profiler simd width as 16 
#endif
//Parallelism profiler, not used in our paper
//for further development
#ifdef PARALLELISM_PROFILE
#include "parallelismprofiler.h"
ParallelismProfiler *parallelismProfiler;
#endif


/*Pseudo tail recursive fib matching our language spec*/
void fib(int n, int *sum) {
#ifdef BLOCK_PROFILE
  profiler.record_single();
#endif

  if (n == 1 || n == 0) {
    *sum += 1;
#ifdef PARALLELISM_PROFILE
    parallelismProfiler->recordNonBlockedTruncate();
#endif
  } else {
    fib(n - 1, sum);
    fib(n - 2, sum);
#ifdef PARALLELISM_PROFILE
    parallelismProfiler->recordNonBlockedRecurse();
#endif
  }
}

//int _expandDepth = 0;
int _expandSize = D_MAX_BLOCK_SIZE;

/*Breadth First execution to expand the number of tasks in software block*/
void fib_expand_bf(class _BlockStack *_stack, int *_depth, int *sum){
  class _BlockSet *_set = _stack->get(*_depth);
  class _Block *_block = _set->block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();
#ifdef BLOCK_PROFILE
  profiler.record(_block->size, *_depth);
#endif

  //Add Left
  for (int _bi = 0; _bi < _block -> _Block::size; ++_bi) {
    class _Point &_point = _block ->  get (_bi);
    int n = _point.n0;
    if (n == 1 || n == 0) {
      *sum += 1;
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordTruncate();
#endif
    } else {
      _nextBlock0->add(n - 1, n - 2);
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordRecurse();
#endif
    }
  }

  //Add right
  for (int _bi = 0; _bi < _block -> _Block::size; ++_bi) {
    class _Point &_point = _block ->  get (_bi);
    int n = _point.n1;
    if (n == 1 || n == 0) {
      *sum += 1;
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordTruncate();
#endif
    } else {
      _nextBlock0->add(n - 1, n - 2);
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordRecurse();
#endif
    }
  }

  //Free old stack space
  if (!*_depth){
    delete g_initial_block;
  } else
  {
    _stack->release(*_depth - 1);
  }

  int _nextblock0_size = _nextBlock0 -> _Block::size;
  *_depth += 1;
  if (_nextblock0_size > 0 && _nextblock0_size <= _expandSize / 2) {
    _stack ->  get (*_depth) -> _BlockSet::block = _nextBlock0;
    fib_expand_bf(_stack, _depth, sum);
  } else { //Reach the buffer size, or finish all evaluation
    _stack ->  get (*_depth) -> _BlockSet::block = _nextBlock0;
  }
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->blockEnd();
#endif
}


void fib1(class _BlockStack *_stack,int _depth, int *sum);

/*Depth First execution of left children to limit the memory consumption*/
void fib0(class _BlockStack *_stack,int _depth, int *sum) {
  class _BlockSet *_set = _stack ->  get (_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();
#ifdef BLOCK_PROFILE
  profiler.record(_block->size, _depth);
#endif

  for (int _bi = 0; _bi < _block -> _Block::size; ++_bi) {
    class _Point &_point = _block ->  get (_bi);
    int n = _point.n0;
    if (n == 1 || n == 0) {
      *sum += 1;
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordTruncate();
#endif
    } else {
      _nextBlock0->add(n - 1, n - 2);
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordRecurse();
#endif
    }
  }

  if (_nextBlock0 -> _Block::size > 0) {
    _stack ->  get (_depth + 1) -> _BlockSet::block = _nextBlock0;
    fib0(_stack, _depth + 1, sum);
    fib1(_stack, _depth + 1, sum);
  }
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->blockEnd();
#endif
}

/*Depth First execution of right children to limit the memory consumption*/
void fib1(class _BlockStack *_stack,int _depth, int *sum) {
  class _BlockSet *_set = _stack ->  get (_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();
#ifdef BLOCK_PROFILE
  profiler.record(_block->size, _depth);
#endif

  for (int _bi = 0; _bi < _block -> _Block::size; ++_bi) {
    class _Point &_point = _block ->  get (_bi);
    int n = _point.n1;
    if (n == 1 || n == 0) {
      *sum += 1;
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordTruncate();
#endif
    } else {
      _nextBlock0->add(n - 1, n - 2);
#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordRecurse();
#endif
    }
  }

  if (_nextBlock0 -> _Block::size > 0) {
    _stack ->  get (_depth + 1) -> _BlockSet::block = _nextBlock0;
    fib0(_stack, _depth + 1, sum);
    fib1(_stack, _depth + 1, sum);
  }
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->blockEnd();
#endif
}

/*Benchmark entrance called by harness*/
int app_main(int argc, char **argv) {
  if (argc != 1 && argc != 2) {
    cout << "usage: fibonacci [n] or fibonacci [n] [block_size, pow(2, k)]" << endl;
    exit(1);
  }

  int n = atoi(argv[0]);
  if (argc == 2) _expandSize = pow(2.0, atoi(argv[1]));
  int sum = 0;

#ifdef PARALLELISM_PROFILE
  parallelismProfiler = new ParallelismProfiler;
#endif

  Harness::start_timing();
  //_expandDepth = Harness::get_splice_depth();

  //Initialize software block stack
  cout << "Set fixed max block buffer size, _expandSize: " << _expandSize << endl;
  _Block::max_block = _expandSize;
  Harness::set_block_size(_expandSize);
  class _BlockStack * _stack = new _BlockStack;
  class _Block * _block = new _Block;
  g_initial_block = _block;

  if (n == 0 || n == 1){
    cout << "Please input a larger number rather than 0 or 1 ..." << endl;
    exit(0);
  }

  _block->add(n - 1, n - 2);
  int _depth = 0;
  _stack->get (_depth) -> block = _block;

  //Start to execute blocked fib
  if (_expandSize >= 2) fib_expand_bf(_stack, &_depth, &sum);
  int df_block_size = _stack->get(_depth)->block->size;
  cout << "This is the max block buffer size for dfs: " << df_block_size << endl;

  if (df_block_size){
    fib0(_stack, _depth, &sum);
    fib1(_stack, _depth, &sum);
  }

  delete _stack;
  if (_expandSize < 2) delete _block;

  Harness::stop_timing();

#ifdef BLOCK_PROFILE
  profiler.output();
#ifdef BLOCKINFO
  profiler.outputBlockInfo(); //For output task distribution profile data
#endif 
#endif
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->output();
  delete parallelismProfiler;
#endif

  cout << sum << endl;

#ifdef PROFILE_SPACE_USE
  cout << "This is max space use (Bytes): " << m_space << endl;
  cout << "This is total malloc counts: " << total_malloc << endl;
#endif
  return 0;
}

