/**********************************************************/
/* This code is for PLDI-15 Artifact Evaluation only      */ 
/* and will be released with further copyright information*/ 
/* File: Sequential block w reexpansion of graphcol       */
/**********************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <limits.h>
#include "harness.h"
#include "block.h"

#ifdef BLOCK_PROFILE
#include "blockprofiler.h"
BlockProfiler * profiler;
#endif

#ifdef TRACK_TRAVERSALS
uint64_t work = 0;
#endif

using namespace std;

//int _expandDepth = 0;
long long _expandSize = D_MAX_BLOCK_SIZE;

int dynamic_reexpand_count = 0;
_Block * g_initial_block = NULL;
int g_is_partial = 0;

char* DATAFILE = "../inputs/data38N64E.col";
_MM_ALIGNED_ node_t * adjacentNodes; //Graph Pointer, in adjacent matrix format
int numColor = 0;

void readEdge(FILE *data, int numEdge, int numNode, int *edge)
{
  char tmp[50];
  for(int i = 0; i < numEdge; i++)
  {
    fscanf(data, "%s %d %d\n", tmp, &edge[i*2], &edge[i*2+1]);
    edge[i*2]--;
    edge[i*2+1]--;
  }
}

void init_graph(int numNode, int numEdge, int* edge){
  adjacentNodes = (node_t *)malloc(numNode*numNode*sizeof(node_t));
  memset(adjacentNodes, 0, numNode*numNode*sizeof(node_t));

  for (int t = 0; t < numEdge; t++){
    adjacentNodes[edge[t*2]*numNode + edge[t*2+1]]
        = adjacentNodes[edge[t*2+1]*numNode + edge[t*2]]
        = 1;
  }
}

void print_graph(int numNode){
  printf("This is the graph: \n");
  for (int i = 0; i < numNode; ++i){
    for (int j = 0; j < numNode; ++j){
      printf("%d ", adjacentNodes[i*numNode+j]);
    }
    printf("\n");
  }
}

bool ok(int nodeId, int ci, node_t* c, int numNode){
  for (int k = 0; k < nodeId; ++k){
    for (int i = k + 1; i <= nodeId; ++i){
      if (adjacentNodes[k * numNode + i] == 1 && c[k] == c[i]){
        return false;
      }
    }
  }
  return true;
}

void print_solution(node_t* c, int numNode){
  printf("This is a solution: \n");
  for (int i = 0; i < numNode; ++i){
    printf("%d ", c[i]);
  }
  printf("\n");
}

/*Pseudo tail recursive graphcol matching our language spec*/
void color(int nodeId, int numNode, int numColor, node_t* c, int *numSolution, int _callIndex){
#ifdef TRACK_TRAVERSALS
  work++;
#endif
#ifdef BLOCK_PROFILE
  profiler->record_single();
#endif

  if (_callIndex != -1){
    c[nodeId - 1] = _callIndex;
    if (!ok(nodeId - 1, _callIndex, c, numNode)) return;
  }

  if (nodeId == numNode){
    *numSolution += 1;
#ifdef _DEBUG
    if (*numSolution % 1000 == 0) print_solution(c, numNode);
#endif
    return;
  }

  //For current node, try color from color id = 1
  for (int ci = 1; ci <= numColor; ++ci){
    color(nodeId + 1, numNode, numColor, c, numSolution, ci);
  }
}

int color_block(_BlockStack *_stack, int _depth, int* num, int _callIndex);

/*Breadth First execution to expand the number of tasks in software block*/
void color_expand_bf(_BlockStack* _stack, int* _depth, int* num){
#ifdef TRACK_TRAVERSALS
  work++;
#endif
  class _BlockSet *_set = _stack ->  get (*_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();

#ifdef BLOCK_PROFILE 
  profiler->record(_block->size, *_depth);

#ifdef EXPAND_PROFILE
  if (dynamic_reexpand_count) 
  {
    profiler->record_reexpansion(*_depth);
  }

  int c_bef_exp_size[numColor];
  int max_c_bef_exp_size = 0;
  int num_child = numColor;
#endif
#endif

  if (g_nnodes == *_depth) {
#ifdef PARALLELISM_PROFILE
    for (int pi = 0; pi < _block->size; _bi++)
      parallelismProfiler->recordTruncate();
#endif
    *num += _block->size;
  } else {
    for(int i = 1; i <= numColor; ++i){
      for (int _bi = 0; _bi < _block->size; _bi++) {
        class _Point &_point = _block ->  get (_bi);
        node_t *a = _point.b;

        a[*_depth] = i;
        if (!ok(*_depth, i, a, g_nnodes)) {
#ifdef PARALLELISM_PROFILE
          parallelismProfiler->recordTruncate();
#endif
          continue;
        }

#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordRecurse();
#endif
        _nextBlock0->add(a, *_depth + 1);
      }
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
      c_bef_exp_size[i - 1] = _nextBlock0->size;
      for (int j = i - 2; j >= 0; --j){
        c_bef_exp_size[i - 1] -= c_bef_exp_size[j];
      }
      max_c_bef_exp_size += c_bef_exp_size[i-1];
      if (c_bef_exp_size[i-1] == 0) num_child--; 
#endif
#endif
    }
  }

  //Free old stack space
  if (!g_is_partial){
    if (!*_depth){
      delete g_initial_block;
    } else
    {
      _stack->release(*_depth - 1);
    }
  }

  int _nextblock0_size = _nextBlock0 -> _Block::size;

#ifdef _DEBUG
  cout << "This is _nextblock0_size: " << _nextblock0_size << endl;
  for (int j = 0; j < _nextblock0_size; ++j){
    for (int k = 0; k < g_nnodes; ++k){
      printf("%d ", (int)_nextBlock0->points[j].b[k]);
    }
    cout << endl;
  }
  cout << endl;
#endif

  *_depth += 1;
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
  if (dynamic_reexpand_count == 0){
    if(_nextblock0_size) {
      profiler->record_bef_exp_size(*_depth, _nextblock0_size);
#ifdef INCLUSIVE
      profiler->record_w_wo_exp_ratio(*_depth, 1);	
#endif
    }
  } else{
    for (int i = 0; i < numColor; ++i){
      if (c_bef_exp_size[i]) profiler->record_bef_exp_size(*_depth, c_bef_exp_size[i]);
    }
    if(_nextblock0_size) profiler->record_w_wo_exp_ratio(*_depth, ((double)_nextblock0_size) / ((double)max_c_bef_exp_size / num_child));	
  }
  if(_nextblock0_size) {
    profiler->record_aft_exp_size(*_depth, _nextblock0_size);
  }


#endif
#endif

  if (_nextblock0_size > 0 && (_nextblock0_size <= _expandSize / numColor || *_depth == g_nnodes)) { // May result in bugs
    _stack ->  get (*_depth) -> _BlockSet::block = _nextBlock0;
    color_expand_bf(_stack, _depth, num);
  } else { //Reach the buffer size, or finish all evaluation
    if (!dynamic_reexpand_count){// only print for the first time
      cout << "This is the max block buffer size for dfs: " << _nextblock0_size << endl;
      cout << "This is the result now: " << *num << endl;
    }

    if (_nextblock0_size){
      _stack ->  get (*_depth) -> _BlockSet::block = _nextBlock0;
      for (int i = 1; i <= numColor; i++) {
        color_block(_stack, *_depth, num, i);
      }
    }

  }
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->blockEnd();
#endif
}

/*Depth First execution of i-th children to limit the memory consumption*/
int color_block(_BlockStack *_stack, int _depth, int* num, int _callIndex) {
#ifdef TRACK_TRAVERSALS
  work++;
#endif
  class _BlockSet *_set = _stack ->  get (_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();

  int _block_size = _block->size;
  if (_block_size <= _expandSize / numColor){// Do dynamic expansion
    dynamic_reexpand_count++;
    g_is_partial = 1;
    color_expand_bf(_stack, &_depth, num);
    return 1;
  } else {
#ifdef BLOCK_PROFILE
    profiler->record(_block->size, _depth);
#endif
    for (int _bi = 0; _bi < _block->size; _bi++) {
      class _Point &_point = _block ->  get (_bi);
      node_t *a = _point.b;

      //if (_callIndex != -1) {  // this check not necessary as block is done after expansion
      a[_depth] = _callIndex;
      if (!ok(_depth, _callIndex, a, g_nnodes)) {
#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordTruncate();
#endif
        continue;
      }
      //}

      if (g_nnodes == _depth + 1) {
#ifdef PARALLELISM_PROFILE
        parallelismProfiler->recordTruncate();
#endif
        *num += 1;
        continue;
      }

#ifdef PARALLELISM_PROFILE
      parallelismProfiler->recordRecurse();
#endif
      _nextBlock0->add(a, _depth + 1);
    }

    if (_nextBlock0 -> _Block::size > 0) {
      _stack ->  get (_depth + 1) -> _BlockSet::block = _nextBlock0;
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
      profiler->record_bef_exp_size(_depth + 1, _nextBlock0->size);
      profiler->record_aft_exp_size(_depth + 1, _nextBlock0->size);
#ifdef INCLUSIVE
      profiler->record_w_wo_exp_ratio(_depth + 1, 1);	
#endif
#endif
#endif
      int skip = 0;
      for (int i = 1; i <= numColor; i++) {
        skip = color_block(_stack, _depth + 1, num, i);
        if (skip) break;
      }
    }

  }
#ifdef PARALLELISM_PROFILE
  parallelismProfiler->blockEnd();
#endif

  return 0;
}

/*Benchmark entrance called by harness*/
int app_main(int argc, char** argv){
  int numSolution = 0;

  if (argc != 1 && argc != 2 && argc != 3 && argc != 4){
    printf("Usage: ./graphcoloring [color numbers] or ./graphcoloring [color numbers]  [max block size k, in pow(2, k)] or ./graphcoloring [color numbers] [max block size k, in pow(2, k)] [vector width] or ./graphcoloring [color numbers] [max block size k, in pow(2, k)] [vector width] [graph]\n");
    exit(0);
  }

  int vec_width = 16;//set simd profiler simd width as 16
  numColor = atoi(argv[0]);
  if (argc >= 2) _expandSize = pow(2.0, atoi(argv[1]));
  if (argc >= 3) vec_width =  atoi(argv[2]);
  if (argc == 4) DATAFILE = argv[3];
  printf("Start %d-Color Process...", numColor);

  /*******Read input data**************/
  int numNode, numEdge;
  int *edge;
  FILE *data;
  char tmp1[50], tmp2[50];
  data = fopen(DATAFILE, "r");
  if(data == NULL) {
    printf("Open data file failed\n");
    exit(0);
  }
  fscanf(data, "%s %s %d %d\n", tmp1, tmp2, &numNode, &numEdge);
  printf("numNode = %d, numEdge =%d\n", numNode, numEdge);

  edge = (int *)malloc(numEdge*sizeof(int)*2);
  readEdge(data, numEdge, numNode, edge);
  fclose(data);
  /********Read input data end**********/

  //Construct adjacent Matrix
  init_graph(numNode, numEdge, edge);
#ifdef _DEBUG
  print_graph(numNode);
#endif

  //Color auxiliary array
  _MM_ALIGNED_ node_t* c = (node_t*)malloc(numNode * sizeof(node_t));
  memset(c, 0, numNode * sizeof(node_t));

#ifdef BLOCK_PROFILE
  cout << "The vector width is " << vec_width << endl;
  profiler = new BlockProfiler(vec_width);
#endif

  //Start coloring...
  Harness::start_timing();

  //_expandDepth = Harness::get_splice_depth();

  //Initialize software block stack
  g_nnodes = numNode;
  cout << "Set fixed max block buffer size, _expandSize: " << _expandSize << endl;
  _Block::max_block = _expandSize;
  Harness::set_block_size(_expandSize);
  class _BlockStack * _stack = new _BlockStack;
  class _Block * _block = new _Block;
  g_initial_block = _block;

  _block->add(c);
  int _depth = 0;
  _stack->get (_depth) -> block = _block;

  //Start to execute blocked graphcol 
  if (_expandSize >= numColor){
#ifdef BLOCK_PROFILE
#ifdef EXPAND_PROFILE
    profiler->record_bef_exp_size(0, 1);
    profiler->record_aft_exp_size(0, 1);
#ifdef INCLUSIVE
    profiler->record_w_wo_exp_ratio(0, 1);
#endif
#endif
#endif
    color_expand_bf(_stack, &_depth, &numSolution);
  }
  else{
    int df_block_size = _stack->get(_depth)->block->size;
    cout << "This is the max block buffer size for dfs: " << df_block_size << endl;
    cout << "This is the result now: " << numSolution << endl;

    if (df_block_size){
      for (int i = 1; i <= numColor; i++) {
        color_block(_stack, _depth, &numSolution, i);
      }
    }
  }
  delete _stack;
  if (_expandSize < numColor) delete _block;

  Harness::stop_timing();

#ifdef BLOCK_PROFILE
  profiler->output();
#ifdef EXPAND_PROFILE
  profiler->outputReexpandInfo();
#endif
  delete profiler;
#endif
#ifdef TRACK_TRAVERSALS
  cout << "work: " << work << endl;
#endif

  printf("This is the number of possible solutions: %d\n", numSolution);
  printf("This is the dynamic reexpand counts: %d\n", dynamic_reexpand_count);

#ifdef PROFILE_SPACE_USE
  cout << "This is max space use (Bytes): " << m_space << endl;
  cout << "This is the total number of new operations for block: " << total_malloc << endl;
#endif

  free(edge);
  free(adjacentNodes);
  return 1;
}
