/**********************************************************/
/* This code is for PLDI-15 Artifact Evaluation only      */ 
/* and will be released with further copyright information*/ 
/* File: SSE4 block wo reexpansion of minmax              */
/**********************************************************/

#include <stdio.h>
#include <string.h>
#include <iostream>
#include <fstream>

#include "simd.h"
#include "block-sse.h"
#include "harness.h"
#ifdef BLOCK_PROFILE
#include "blockprofiler.h"
BlockProfiler profiler;
#endif

using namespace std;

//int _expandDepth = 0;
int _expandSize = D_MAX_BLOCK_SIZE;
_Block * g_initial_block;
int expand_condition = 1;

int pos_weights[POS_SIZE]={};

void init_board( char pboard[] )
{
  int i ;
  for( i = 1; i <= (BOARD_SIZE*BOARD_SIZE); i++) pboard[i] = '_';
}

void print_board(char pboard[] )
{
  int i;
  for (i = 1; i <= (BOARD_SIZE*BOARD_SIZE); i++)
  {
    printf("%c ",pboard[i]);
    if (i % BOARD_SIZE == 0)
      printf("\n");
  }
}

void print_weights( )
{
  int count =1;
  printf("\n*******Weighted array*****\n");
  while(count < POS_SIZE)
  {
    printf("%d\t",pos_weights[count++]);
  }
  printf("\n");
}


/*sequential checking functions*/
int check_win(char pboard[])
{
  int ret = 0;
  if ((pboard[1] == 'X' && pboard[2] == 'X' && pboard[3] == 'X' && pboard[4] == 'X') ||
      (pboard[5] == 'X' && pboard[6] == 'X' && pboard[7] == 'X' && pboard[8] == 'X') ||
      (pboard[9] == 'X' && pboard[10] == 'X' && pboard[11] == 'X' && pboard[12] == 'X') ||
      (pboard[13] == 'X' && pboard[14] == 'X' && pboard[15] == 'X' && pboard[16] == 'X') ||
      (pboard[1] == 'X' && pboard[5] == 'X' && pboard[9] == 'X' && pboard[13] == 'X') ||
      (pboard[2] == 'X' && pboard[6] == 'X' && pboard[10] == 'X' && pboard[14] == 'X') ||
      (pboard[3] == 'X' && pboard[7] == 'X' && pboard[11] == 'X' && pboard[15] == 'X') ||
      (pboard[4] == 'X' && pboard[8] == 'X' && pboard[12] == 'X' && pboard[16] == 'X') ||
      (pboard[1] == 'X' && pboard[6] == 'X' && pboard[11] == 'X' && pboard[16] == 'X') ||
      (pboard[4] == 'X' && pboard[7] == 'X' && pboard[10] == 'X' && pboard[13] == 'X'))
  {
    ret = 1;
  }
  else if ((pboard[1] == 'O' && pboard[2] == 'O' && pboard[3] == 'O' && pboard[4] == 'O') ||
           (pboard[5] == 'O' && pboard[6] == 'O' && pboard[7] == 'O' && pboard[8] == 'O') ||
           (pboard[9] == 'O' && pboard[10] == 'O' && pboard[11] == 'O' && pboard[12] == 'O') ||
           (pboard[13] == 'O' && pboard[14] == 'O' && pboard[15] == 'O' && pboard[16] == 'O') ||
           (pboard[1] == 'O' && pboard[5] == 'O' && pboard[9] == 'O' && pboard[13] == 'O') ||
           (pboard[2] == 'O' && pboard[6] == 'O' && pboard[10] == 'O' && pboard[14] == 'O') ||
           (pboard[3] == 'O' && pboard[7] == 'O' && pboard[11] == 'O' && pboard[15] == 'O') ||
           (pboard[4] == 'O' && pboard[8] == 'O' && pboard[12] == 'O' && pboard[16] == 'O') ||
           (pboard[1] == 'O' && pboard[6] == 'O' && pboard[11] == 'O' && pboard[16] == 'O') ||
           (pboard[4] == 'O' && pboard[7] == 'O' && pboard[10] == 'O' && pboard[13] == 'O'))
  {
    ret = -1;
  }

  return ret;
}

int check_draw(char pboard[])
{
  int win,ret=0;
  win = check_win(pboard);
  if ((win == 0) && (pboard[1] != '_') && (pboard[2] != '_') &&
      (pboard[3] != '_') && (pboard[4] != '_') && (pboard[5] != '_') &&
      (pboard[6] != '_') && (pboard[7] != '_') && (pboard[8] != '_') &&
      (pboard[9] != '_') && (pboard[10] != '_') && (pboard[11] != '_') &&
      (pboard[12] != '_') && (pboard[13] != '_') && (pboard[14] != '_') &&
      (pboard[15] != '_') && (pboard[16] != '_'))
  {
    ret = 1;
  }
  return ret;
}


int evaluationFunction(char pboard[],int *leaf_val,int depth,int player)
{
  int chk_endgame = 0;
  chk_endgame = check_win(pboard);

  if (chk_endgame == 1) {
    *leaf_val = 20 - depth;
  }
  else if (chk_endgame == (-1)) {
    *leaf_val = -(20 - depth);
  }
  else {
    chk_endgame = check_draw(pboard);

    if(chk_endgame)
    {
      *leaf_val = 0;
    }
  }

  return chk_endgame;
}

int chooseNextMove(int* position,char pboard[])
{
  int pos,ret=0;
  int currentpos = *position;
  *position = NOMOVE;

  for (pos = currentpos; pos <= (BOARD_SIZE*BOARD_SIZE); pos++)
  {
    if('_' == pboard[pos])
    {
      *position = pos;
      ret = 1;
      break;
    }
  }

  return ret;
}

int update_pos_weigh(int player,char board[])
{
  int minmaxpos = 1,i,ret;
  ret = chooseNextMove(&minmaxpos, board);
  if(!ret)
  {
    printf("\nPanic - cant choose next position*");
    return -1;
  }
  else
  {
    if(player % 2) //player 1
    {
      for( i =1;i <= (BOARD_SIZE*BOARD_SIZE);i++) //check
      {
        if( (pos_weights[i] > pos_weights[minmaxpos]) &&
           (board[i] == '_') )
          minmaxpos = i;
      }
    }
    else //player 2
    {
      for( i =1;i <= (BOARD_SIZE*BOARD_SIZE);i++) //check
      {
        if( (pos_weights[i] < pos_weights[minmaxpos]) &&
           (board[i] == '_'))
          minmaxpos = i;
      }
    }
  }
  return minmaxpos;
}

/*simd checking functions*/
//#define ShortCut
inline void check_win_simd(__m128i* vec_eval_ended, char pboard_vec[]){
  //For Player X
  __m128i vec_X = _mm_set1_epi8('X');
  __m128i vec_cond_X = _mm_setzero_si128();  

  //Case 1 - 4
  for (int i = 0; i < 4; ++i){
    __m128i vec_cond1 = _mm_set1_epi8(0xff);
    for (int j = 1; j <= 4; ++j){
      __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + i * 4 + j);
      __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_X);
      vec_cond1 = _mm_and_si128(vec_cond1, vec_eq);
    }
    vec_cond_X = _mm_or_si128(vec_cond_X, vec_cond1);
#ifdef ShortCut 
    unsigned short mask = _mm_movemask_epi8(vec_cond_X);
    if (mask == 0xffff){
      *vec_eval_ended = _mm_set1_epi8(1);
      return;
    } 
#endif
  }

  //Case 5 - 8 
  for (int i = 1; i <= 4; ++i){
    __m128i vec_cond1 = _mm_set1_epi8(0xff);
    for (int j = 0; j < 4; ++j){
      __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + i + j * 4);
      __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_X);
      vec_cond1 = _mm_and_si128(vec_cond1, vec_eq);
    }
    vec_cond_X = _mm_or_si128(vec_cond_X, vec_cond1);
#ifdef ShortCut 
    unsigned short mask = _mm_movemask_epi8(vec_cond_X);
    if (mask == 0xffff){
      *vec_eval_ended = _mm_set1_epi8(1);
      return;
    } 
#endif
  }

  //Case 9
  __m128i vec_cond1 = _mm_set1_epi8(0xff);
  for (int j = 1; j < POS_SIZE; j += 5){
    __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + j);
    __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_X);
    vec_cond1 = _mm_and_si128(vec_cond1, vec_eq);
  }
  vec_cond_X = _mm_or_si128(vec_cond_X, vec_cond1);
#ifdef ShortCut 
  unsigned short mask = _mm_movemask_epi8(vec_cond_X);
  if (mask == 0xffff){
    *vec_eval_ended = _mm_set1_epi8(1);
    return;
  } 
#endif

  //Case 10
  vec_cond1 = _mm_set1_epi8(0xff);
  for (int j = 4; j < POS_SIZE - 1; j += 3){
    __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + j);
    __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_X);
    vec_cond1 = _mm_and_si128(vec_cond1, vec_eq);
  }
  vec_cond_X = _mm_or_si128(vec_cond_X, vec_cond1);
#ifdef ShortCut 
  mask = _mm_movemask_epi8(vec_cond_X);
  if (mask == 0xffff){
    *vec_eval_ended = _mm_set1_epi8(1);
    return;
  } 
#endif

  //For Player O 
  __m128i vec_O = _mm_set1_epi8('O');
  __m128i vec_cond_O = _mm_setzero_si128();  

  //Case 1 - 4
  for (int i = 0; i < 4; ++i){
    __m128i vec_cond1 = _mm_set1_epi8(0xff);
    for (int j = 1; j <= 4; ++j){
      __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + i * 4 + j);
      __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_O);
      vec_cond1 = _mm_and_si128(vec_cond1, vec_eq);
    }
    vec_cond_O = _mm_or_si128(vec_cond_O, vec_cond1);
#ifdef ShortCut 
    unsigned short mask = _mm_movemask_epi8(vec_cond_O);
    if (mask == 0xffff){
      *vec_eval_ended = _mm_set1_epi8(-1);
      return;
    } 
#endif
  }

  //Case 5 - 8 
  for (int i = 1; i <= 4; ++i){
    __m128i vec_cond1 = _mm_set1_epi8(0xff);
    for (int j = 0; j < 4; ++j){
      __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + i + j * 4);
      __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_O);
      vec_cond1 = _mm_and_si128(vec_cond1, vec_eq);
    }
    vec_cond_O = _mm_or_si128(vec_cond_O, vec_cond1);
#ifdef ShortCut 
    unsigned short mask = _mm_movemask_epi8(vec_cond_O);
    if (mask == 0xffff){
      *vec_eval_ended = _mm_set1_epi8(-1);
      return;
    } 
#endif
  }

  //Case 9
  vec_cond1 = _mm_set1_epi8(0xff);
  for (int j = 1; j < POS_SIZE; j += 5){
    __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + j);
    __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_O);
    vec_cond1 = _mm_and_si128(vec_cond1, vec_eq);
  }
  vec_cond_O = _mm_or_si128(vec_cond_O, vec_cond1);
#ifdef ShortCut 
  mask = _mm_movemask_epi8(vec_cond_O);
  if (mask == 0xffff){
    *vec_eval_ended = _mm_set1_epi8(-1);
    return;
  } 
#endif

  //Case 10
  vec_cond1 = _mm_set1_epi8(0xff);
  for (int j = 4; j < POS_SIZE - 1; j += 3){
    __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + j);
    __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_O);
    vec_cond1 = _mm_and_si128(vec_cond1, vec_eq);
  }
  vec_cond_O = _mm_or_si128(vec_cond_O, vec_cond1);
#ifdef ShortCut 
  mask = _mm_movemask_epi8(vec_cond_O);
  if (mask == 0xffff){
    *vec_eval_ended = _mm_set1_epi8(-1);
    return;
  } 
#endif

  *vec_eval_ended = _mm_add_epi8(*vec_eval_ended,
                                 _mm_and_si128(vec_cond_X, 
                                               _mm_set1_epi8(1)));

  *vec_eval_ended = _mm_add_epi8(*vec_eval_ended,
                                 _mm_and_si128(vec_cond_O, 
                                               _mm_set1_epi8(-1)));
}


inline void check_draw_simd(__m128i* vec_eval_ended, char pboard_vec[]){
  __m128i vec_win = _mm_setzero_si128();
  check_win_simd(&vec_win, pboard_vec);

  __m128i vec_1 = _mm_set1_epi8(1);
  __m128i vec_n1 = _mm_set1_epi8(-1);
  __m128i vec_ul = _mm_set1_epi8('_');

  __m128i vec_cond = _mm_setzero_si128();
  __m128i vec_win_eq_1 = _mm_cmpeq_epi8(vec_win, vec_1);
  __m128i vec_win_eq_n1 = _mm_cmpeq_epi8(vec_win, vec_n1);
  __m128i vec_cond_1 = _mm_or_si128(vec_win_eq_1, vec_win_eq_n1);

  vec_cond = _mm_or_si128(vec_cond, vec_cond_1);

  for (int j = 1; j < POS_SIZE; ++j){
    __m128i vec_pboard = _mm_load_si128(((__m128i*)pboard_vec) + j);
    __m128i vec_eq = _mm_cmpeq_epi8(vec_pboard, vec_ul);
    vec_cond = _mm_or_si128(vec_cond, vec_eq);
  }

  __m128i vec_ret = _mm_sub_epi8(vec_1, _mm_and_si128(vec_1, vec_cond));

  *vec_eval_ended = _mm_add_epi8(*vec_eval_ended, vec_ret);

}


inline void evaluationFunction_simd(__m128i* vec_eval_ended, char pboard_vec[], __m128i* vec_leaf_val, char depth, char player){

  check_win_simd(vec_eval_ended, pboard_vec);

  __m128i vec_1 = _mm_set1_epi8(1);
  __m128i vec_n1 = _mm_set1_epi8(-1);
  __m128i vec_20 = _mm_set1_epi8(20);
  __m128i vec_depth = _mm_set1_epi8(depth);

  //Can be optimized if there is _mm_mul_epi8
  //vec_leaf_val = _mm_mul_epi8(_mm_sub_epi8(vec_20, vec_depth), vec_eval_ended);

  __m128i vec_cond_1 = _mm_cmpeq_epi8(*vec_eval_ended, vec_1);
  __m128i vec_cond_n1 = _mm_cmpeq_epi8(*vec_eval_ended, vec_n1);

  *vec_leaf_val = _mm_add_epi8(*vec_leaf_val,
                               _mm_and_si128(vec_cond_1,
                                             _mm_sub_epi8(vec_20, vec_depth)));

  *vec_leaf_val = _mm_add_epi8(*vec_leaf_val,
                               _mm_and_si128(vec_cond_n1,
                                             _mm_sub_epi8(vec_depth, vec_20)));   


  check_draw_simd(vec_eval_ended, pboard_vec);
}


/*simd processing, we store n in char in the block*/
void process_simd(_Block* _block, _Block* _nextBlock0, int _si, int _depth, int player, int pos){
  __m128i vec_board_pos = _mm_load_si128((__m128i*)_block->getptr(_si, pos));
  __m128i vec_ul = _mm_set1_epi8('_');
  __m128i vec_cond1 = _mm_cmpeq_epi8(vec_board_pos, vec_ul);

  unsigned short non_leaf_mask1 = _mm_movemask_epi8(vec_cond1);
  if (non_leaf_mask1 == 0) return;

  __declspec(align(16)) char pboard_vec[POS_SIZE * MY_SIMD_WIDTH] = {0};
  for (int i = 0; i < POS_SIZE; ++i){
    memcpy(pboard_vec + i * MY_SIMD_WIDTH, _block->getptr(_si, i), MY_SIMD_WIDTH);
  }
  if (player == 2){
    memset(pboard_vec + pos * MY_SIMD_WIDTH, 'O', MY_SIMD_WIDTH);
  } else if (player == 1){
    memset(pboard_vec + pos * MY_SIMD_WIDTH, 'X', MY_SIMD_WIDTH);
  }

  __declspec(align(16)) char eval_ended[MY_SIMD_WIDTH] = {0};
  __declspec(align(16)) char leaf_val[MY_SIMD_WIDTH] = {0};
  __declspec(align(16)) char startpos[MY_SIMD_WIDTH] = {0};
  memset(startpos, (char)pos, MY_SIMD_WIDTH);
  if (_depth){
    memcpy(startpos, &_block->startpos[_si], MY_SIMD_WIDTH);
  }

  __m128i vec_eval_ended = _mm_setzero_si128();
  __m128i vec_leaf_val = _mm_setzero_si128();
  evaluationFunction_simd(&vec_eval_ended, pboard_vec, &vec_leaf_val, _depth, player);

  _mm_store_si128((__m128i*)eval_ended, vec_eval_ended);
  _mm_store_si128((__m128i*)leaf_val, vec_leaf_val);

#ifdef NOSC
  for (int i = 0; i < MY_SIMD_WIDTH; ++i){
    unsigned short f = 1 << i;
    if (f & non_leaf_mask1){
      if(eval_ended[i])
      {
        if (leaf_val[i]) pos_weights[startpos[i]] += (int)leaf_val[i];
        continue;
      }
      _nextBlock0->addcol(pboard_vec, i, POS_SIZE, startpos[i]);
    }

  }
#else //Using SC
  for (int i = 0; i < MY_SIMD_WIDTH; ++i){
    unsigned short f = 1 << i;
    if ((f & non_leaf_mask1) && eval_ended[i] && leaf_val[i]){
      pos_weights[startpos[i]] += (int)leaf_val[i];
    }
  }

  __m128i vec_0 = _mm_setzero_si128();
  __m128i cond_eval_ended = _mm_cmpeq_epi8(vec_eval_ended, vec_0);
  unsigned short mask2 = _mm_movemask_epi8(cond_eval_ended);
  unsigned short ret_mask = (~non_leaf_mask1) | (~mask2);

  __attribute__((aligned(16))) unsigned char tmp[16];
  unsigned index = 0;
  //do first 8
  *((__int64*)tmp) = g_shuffletable[ret_mask & 0x000000FF];
  index += g_advanceNextPtrCounts[ret_mask & 0x000000FF];
  // now second 8
  *((__int64*)&tmp[index]) = 0x0808080808080808 +
      g_shuffletable[(ret_mask & 0x0000FF00) >> 8];
  index += g_advanceNextPtrCounts[(ret_mask & 0x0000FF00) >> 8];
  // fill rest with 0xFF
  memset(&tmp[index], 0xFF, 16 - index);

  __m128i vec_shuffleTable = _mm_load_si128((const __m128i *)tmp);


  for (int i = 0; i < POS_SIZE; i++){
    __m128i vec_n = _mm_load_si128(((__m128i*)pboard_vec) + i);
    vec_n = _mm_shuffle_epi8(vec_n, vec_shuffleTable);
    _mm_storeu_si128((__m128i*)&_nextBlock0->a[i * _Block::max_block + _nextBlock0->size], vec_n);
  }

  __m128i vec_startpos = _mm_load_si128((__m128i*)startpos);
  vec_startpos = _mm_shuffle_epi8(vec_startpos, vec_shuffleTable); 
  _mm_storeu_si128((__m128i*)&_nextBlock0->startpos[_nextBlock0->size], vec_startpos);

  _nextBlock0->size += index;

#endif//NOSC

}

/*sequential processing*/
void process_point(_Block* _block, _Block* _nextBlock0, int _bi, int _depth, int player, int pos){
  //Leaf node
  if (_block->get(_bi, pos) != '_'){
    return;
  }


  char *pboard = _block->getcol(_bi); //already a copy

  if(player == 2)
  {
    pboard[pos] = 'O';
  }
  else if(player == 1)
  {
    pboard[pos] = 'X';
  }

  int eval_ended;
  int leaf_val = 0;
  char startpos;

  if(_depth == 0)
  {
    startpos = pos;
  } else {
    startpos = _block->startpos[_bi];
  }

  /*Check if the game is over or depth of analysis is reached*/
  eval_ended = evaluationFunction(pboard,&leaf_val,_depth,player);
  if(eval_ended)
  {
    /*Update the pos_weights array with the results*/
    if(leaf_val)
    {
      pos_weights[startpos] += leaf_val;
    }
    free(pboard);
    return;
  }

  _nextBlock0->add(pboard, POS_SIZE, startpos);
  free(pboard);
  return;
}

/*Breadth First execution to expand the number of tasks in software block*/
void minimax_expand_bf(_BlockStack* _stack, int * _depth, int * player)
{
  class _BlockSet *_set = _stack ->  get (*_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();
#ifdef BLOCK_PROFILE
  profiler.record(_block->size, *_depth);
#endif

  for (int pos = 1; pos <= g_ncheck; pos++) { //Only test for 12 positions
    int _si = 0;
    for (; _si < (_block->size - MY_SIMD_WIDTH + 1); _si += MY_SIMD_WIDTH){
      process_simd(_block, _nextBlock0, _si, *_depth, *player, pos);
    }

    //Process the rest
    for (int _bi = _si; _bi < _block->size; _bi++) {
      process_point(_block, _nextBlock0, _bi, *_depth, *player, pos);
    }

  }

  //Free old stack space
  if (!*_depth){
    delete g_initial_block;
  } else
  {
    _stack->release(*_depth - 1);
  }

  int _nextblock0_size = _nextBlock0 -> _Block::size;

  *_depth += 1;
  *player = 3 - *player;
  if (_nextblock0_size > 0 && _nextblock0_size <= _expandSize / g_ncheck * expand_condition) {
    _stack ->  get (*_depth) -> _BlockSet::block = _nextBlock0;
    minimax_expand_bf(_stack, _depth, player);
  } else { //Reach the buffer size, or finish all evaluation
    _stack ->  get (*_depth) -> _BlockSet::block = _nextBlock0;
  }

}


/*Depth First execution to limit the memory consumption*/
void minimax_block(_BlockStack* _stack, int _depth, int pos, int player){
  class _BlockSet *_set = _stack ->  get (_depth);
  class _Block *_block = _set -> block;
  class _Block *_nextBlock0 = &_set -> _BlockSet::nextBlock0;
  _nextBlock0 ->  recycle ();
#ifdef BLOCK_PROFILE
  profiler.record(_block->size, _depth);
#endif

  int _si = 0;
  for (; _si < (_block->size - MY_SIMD_WIDTH + 1); _si += MY_SIMD_WIDTH){
    process_simd(_block, _nextBlock0, _si, _depth, player, pos);
  }

  //Process the rest
  for (int _bi = _si; _bi < _block->size; _bi++) {
    process_point(_block, _nextBlock0, _bi, _depth, player, pos);
  }

  if (_nextBlock0 -> _Block::size > 0) {
    _stack ->  get (_depth + 1) -> _BlockSet::block = _nextBlock0;
    for (int i = 1; i <= g_ncheck; i++) {
      minimax_block(_stack, _depth + 1, i, 3 - player);
    }
  }
}

/*Benchmark entrance called by harness*/
int app_main(int argc, char** argv)
{
  int pos, input, player=1,chk=0,ret=0;
  char board[BOARD_SIZE*BOARD_SIZE+1];
  init_board(board);
  print_board(board);

  if (argc < 1) {
    g_ncheck = 12;
    printf("Checking for 12-way, and block size is using default %d\n", D_MAX_BLOCK_SIZE);
  }

  if (argc >= 1 ) g_ncheck = atoi(argv[0]);
  if (argc == 2) {
    _expandSize = pow(2.0, atoi(argv[1]));
    if (atoi(argv[1]) == 6) expand_condition = 1;
    else expand_condition = 2;
  }

  Harness::start_timing();
  for(pos = 1; pos <= (BOARD_SIZE*BOARD_SIZE); pos++)
  {
    if(pos == 1) input = 5;
    else input = ret;

    if (pos % 2 != 0) board[input] = 'X';
    else board[input] = 'O';

    /*Print the board after each turn*/
    printf("\n****Board:********\n");
    print_board(board);
    /*Check if game is over*/
    chk = check_win(board);

    if (chk == 1)
    {
      printf("Player X wins!\n");
      break;
    }
    else if (chk == -1)
    {
      printf("Player O wins!\n");
      break;
    }
    /*Compute next best move*/
    else if ((chk == 0) && (pos != BOARD_SIZE * BOARD_SIZE))
    {
      memset(pos_weights,0,sizeof(pos_weights));

      player = 1 + pos % 2;
      int l_player = player;

      //Initialize software block stack
      cout << "Set fixed max block buffer size, _expandSize: " << _expandSize << endl;
      _Block::max_block = _expandSize;
      _Block::n = POS_SIZE;
      Harness::set_block_size(_expandSize);
      class _BlockStack * _stack = new _BlockStack;
      class _Block * _block = new _Block;
      g_initial_block = _block;

      _block->add(board, POS_SIZE, 0);
      int _depth = 0;
      _stack->get(_depth)->block = _block;

      //Start to execute blocked minmax 
      if (_expandSize >= g_ncheck)
        minimax_expand_bf(_stack, &_depth, &l_player);

      int df_block_size = _stack->get(_depth)->block->size;
      cout << "This is the max block buffer size for dfs: " << df_block_size << endl;
      if (df_block_size){
        for (int i = 1; i <= g_ncheck; ++i){
          minimax_block(_stack, _depth, i, l_player);
        }
      }
      print_weights();

      delete _stack;
      if (_expandSize < g_ncheck) delete _block;

    }
    else
    {
      printf("The game is tied!\n");
    }
    ret = update_pos_weigh(player,board);
    if(-1 == ret)
    {
      return -1;
    }
    printf("\nOptimal move for player %d is %d",player,ret);
  }
  Harness::stop_timing();

#ifdef BLOCK_PROFILE
  profiler.output();
#endif

  return 0;
}

