/////////////////////////////////////////////////////////
// includes
/////////////////////////////////////////////////////////
#include "mlChol.h"
#include "math_fns.h"
#include "bar.h"
#include "timer.h"
#include "utils.h"
/////////////////////////////////////////////////////////
// shared globals
/////////////////////////////////////////////////////////

  __attribute__ ((section(".tcdm"))) static const float fv0[400] = { 303.470856F, 232.923172F, 230.768829F,
    268.232452F, 290.827576F, 266.681854F, 260.26178F, 243.220169F, 204.95726F,
    254.371506F, 232.923172F, 306.786957F, 150.072327F, 240.676498F, 240.213257F,
    262.100861F, 198.56073F, 208.65712F, 176.16304F, 251.689056F, 230.768829F,
    150.072327F, 240.404602F, 197.724838F, 236.858963F, 234.790741F, 234.871063F,
    204.457382F, 159.889771F, 215.81102F, 268.232452F, 240.676498F, 197.724838F,
    282.804321F, 268.153351F, 254.361664F, 255.589432F, 211.94133F, 202.797195F,
    234.541214F, 290.827576F, 240.213257F, 236.858963F, 268.153351F, 333.248505F,
    288.502899F, 287.612122F, 204.519318F, 186.860687F, 282.243958F, 266.681854F,
    262.100861F, 234.790741F, 254.361664F, 288.502899F, 342.523132F, 250.400269F,
    221.994583F, 190.439819F, 260.307373F, 260.26178F, 198.56073F, 234.871063F,
    255.589432F, 287.612122F, 250.400269F, 316.82254F, 215.144745F, 220.304245F,
    274.951F, 243.220169F, 208.65712F, 204.457382F, 211.94133F, 204.519318F,
    221.994583F, 215.144745F, 285.421875F, 201.810257F, 218.668243F, 204.95726F,
    176.16304F, 159.889786F, 202.797195F, 186.860687F, 190.439819F, 220.304245F,
    201.810242F, 196.810059F, 191.85791F, 254.371506F, 251.689056F, 215.81102F,
    234.541214F, 282.243958F, 260.307373F, 274.951F, 218.668243F, 191.85791F,
    304.568939F, 192.720413F, 187.857651F, 191.182724F, 192.309235F, 181.072556F,
    192.601608F, 229.261627F, 169.053772F, 97.7473145F, 67.866188F, 187.857651F,
    359.280731F, 314.138794F, 235.301544F, 355.080902F, 245.752823F, 273.9534F,
    210.029968F, 195.828949F, 160.757233F, 191.182724F, 314.138794F, 389.621216F,
    241.086227F, 334.472F, 257.45929F, 236.9832F, 284.335236F, 231.341217F,
    130.113632F, 192.309235F, 235.301544F, 241.086227F, 257.856354F, 257.257111F,
    231.584412F, 252.30957F, 212.795822F, 143.377777F, 79.846344F, 181.072556F,
    355.080902F, 334.472F, 257.257111F, 481.132324F, 260.147339F, 254.54541F,
    284.495697F, 238.155823F, 201.209244F, 192.601608F, 245.752823F, 257.45929F,
    231.584412F, 260.147339F, 280.765289F, 217.672562F, 212.882431F, 146.165878F,
    91.8943F, 229.261627F, 273.9534F, 236.9832F, 252.30957F, 254.54541F,
    217.672562F, 342.737F, 216.477936F, 157.514282F, 124.563507F, 169.053772F,
    210.029968F, 284.335236F, 212.795822F, 284.495697F, 212.882431F, 216.477936F,
    354.106323F, 182.385559F, 152.80928F, 97.7473145F, 195.828949F, 231.341217F,
    143.377777F, 238.155853F, 146.165894F, 157.514267F, 182.385559F, 191.854477F,
    122.254814F, 67.866188F, 160.757233F, 130.113632F, 79.8463364F, 201.209229F,
    91.8942947F, 124.563515F, 152.809265F, 122.254814F, 148.702759F, 282.681671F,
    267.321167F, 210.169724F, 219.442596F, 116.133614F, 313.660217F, 184.807587F,
    112.703224F, 231.434204F, 244.006027F, 267.321167F, 366.616791F, 274.316376F,
    327.876343F, 152.137451F, 410.695F, 239.623642F, 167.794693F, 271.897644F,
    317.203888F, 210.169724F, 274.316376F, 336.857F, 349.288116F, 142.974274F,
    358.299835F, 232.703049F, 133.79097F, 244.086029F, 334.48761F, 219.442596F,
    327.876343F, 349.288116F, 458.382599F, 196.235962F, 453.637787F, 326.323883F,
    215.591354F, 322.43457F, 418.805298F, 116.133614F, 152.137451F, 142.974274F,
    196.235962F, 152.269592F, 235.53476F, 156.949585F, 119.845047F, 184.52887F,
    163.539902F, 313.660217F, 410.695F, 358.299835F, 453.637787F, 235.53476F,
    549.058594F, 350.008484F, 226.395782F, 391.164307F, 422.684418F, 184.807587F,
    239.623642F, 232.703049F, 326.323883F, 156.949585F, 350.008484F, 266.563416F,
    156.543137F, 245.178375F, 293.531891F, 112.703224F, 167.794693F, 133.79097F,
    215.591354F, 119.845047F, 226.395782F, 156.543137F, 153.599716F, 170.179779F,
    211.887787F, 231.434204F, 271.897644F, 244.086014F, 322.43457F, 184.52887F,
    391.164307F, 245.178375F, 170.179764F, 322.315247F, 297.762512F, 244.006027F,
    317.203888F, 334.487579F, 418.805298F, 163.539902F, 422.684418F, 293.53186F,
    211.887802F, 297.762512F, 426.426392F, 385.477142F, 290.105377F, 208.15271F,
    276.383484F, 327.724884F, 274.768799F, 195.594528F, 231.961853F, 236.81543F,
    125.517914F, 290.105377F, 365.236481F, 130.390488F, 239.91008F, 282.727325F,
    275.931366F, 207.462677F, 274.413208F, 207.663055F, 98.3234406F, 208.15271F,
    130.390488F, 163.724518F, 149.080597F, 142.606567F, 104.238785F, 86.092247F,
    140.764938F, 158.837662F, 96.3741531F, 276.383484F, 239.91008F, 149.080597F,
    271.327423F, 249.644424F, 267.503815F, 208.790161F, 160.691986F, 187.503815F,
    91.5999756F, 327.724884F, 282.727325F, 142.606567F, 249.644424F, 335.381866F,
    310.307129F, 241.36937F, 198.531876F, 191.958038F, 77.014F, 274.768799F,
    275.931366F, 104.238785F, 267.503815F, 310.307129F, 343.373901F, 265.854248F,
    152.787384F, 182.657257F, 58.0079269F, 195.594528F, 207.462677F, 86.092247F,
    208.790161F, 241.36937F, 265.854248F, 272.105896F, 164.66478F, 125.637184F,
    44.6241798F, 231.961853F, 274.413208F, 140.764938F, 160.691986F, 198.531876F,
    152.787384F, 164.66478F, 313.961304F, 168.816238F, 102.921577F, 236.81543F,
    207.663055F, 158.837662F, 187.50383F, 191.958038F, 182.657257F, 125.637184F,
    168.816238F, 220.208511F, 113.273285F, 125.517906F, 98.3234406F, 96.3741608F,
    91.5999832F, 77.0139923F, 58.0079269F, 44.6241798F, 102.921577F, 113.273285F,
    82.2213669F };


    __attribute__ ((section(".tcdm"))) static const float fv1[16] = { 249.197952F, 249.192978F, 23.1504765F,
    23.1500149F, 297.39035F, 297.38443F, 21.3576164F, 21.3571892F, 315.765961F,
    315.759674F, 23.4114571F, 23.4109879F, 238.491898F, 238.487137F, 22.0633163F,
    22.0628738F };

/////////////////////////////////////////////////////////
// subfunctions
/////////////////////////////////////////////////////////


/////////////////////////////////////////////////////////
// main testing function 
/////////////////////////////////////////////////////////
int main(int argc, const char * const argv[])
{
  (void)argc;
  (void)argv;

  int coreid;

  boolean_T pass;
  int k;
  float A[100];
  int i0;
  int jmax;

  int info;
  int j;
  boolean_T exitg2;
  int jj;
  float ajj;
  int ix;
  int iy;
  int i;
  float xbar;
  int ia;
  float r;
  float y;
  float check[2];
  float golden[4];

  /////////////////////////////////////////////////////////
  // main test loop 
  // each core loops over a kernel instance
  /////////////////////////////////////////////////////////
  
  // char str[20] = "test\0";
  // printFloat(str, 0.5, 5);
  // printFloat(str, 1.5, 5);
  // printFloat(str, 0.9, 5);
  // printFloat(str, 0.2, 5);
  // printFloat(str, -1.9, 5);

  #ifdef LINUX
  for(coreid = 0;coreid<N_CORES;coreid++)
  {
#else
  coreid = get_core_id();

  print("starting %d kernel iterations... (coreid = %d)\n",KERNEL_ITS,coreid,0,0);

  synch_barrier();
  
  if(coreid == 0)
  {
    reset_timer();
    start_timer();
  }

#endif

  for(k = 0; k < KERNEL_ITS; k++)
  {
    // matlab kernel

    /*  mlCholWrap */
    for (i0 = 0; i0 < 10; i0++) {
      for (jmax = 0; jmax < 10; jmax++) {
        A[jmax + 10 * i0] = fv0[(jmax + 10 * i0) + 100 * coreid];
      }
    }

    info = 0;
    j = 0;
    exitg2 = false;
    while ((!exitg2) && (j + 1 < 11)) {
      jj = j + j * 10;
      ajj = fZero();
      if (j < 1) {
      } else {
        ix = j;
        iy = j;
        for (jmax = 1; jmax <= j; jmax++) {
          ajj += A[ix] * A[iy];
          ix += 10;
          iy += 10;
        }
      }

      ajj = A[jj] - ajj;
      if (ajj > fZero()) {
        ajj = (real32_T)fSqrt(ajj);
        A[jj] = ajj;
        if (j + 1 < 10) {
          if (j == 0) {
          } else {
            ix = j;
            i0 = (j + 10 * (j - 1)) + 2;
            for (i = j + 2; i <= i0; i += 10) {
              xbar = -A[ix];
              iy = jj + 1;
              jmax = (i - j) + 8;
              for (ia = i; ia <= jmax; ia++) {
                A[iy] += A[ia - 1] * xbar;
                iy++;
              }

              ix += 10;
            }
          }

          ajj = fDiv(1.0F, ajj);
          i0 = (jj - j) + 10;
          for (jmax = jj + 1; jmax + 1 <= i0; jmax++) {
            A[jmax] *= ajj;
          }
        }

        j++;
      } else {
        A[jj] = ajj;
        info = j + 1;
        exitg2 = true;
      }
    }
  }

#ifndef LINUX
  synch_barrier();
  
  if(coreid == 0)
  {
    stop_timer();
    cycleCount((int)get_time());
  }
#endif


  /////////////////////////////////////////////////////////
  // check results
  /////////////////////////////////////////////////////////




  if (info == 0) {
    jmax = 10;
  } else {
    jmax = info - 1;
  }

  for (j = 1; j + 1 <= jmax; j++) {
    for (i = 1; i <= j; i++) {
      A[(i + 10 * j) - 1] = fZero();
    }
  }

  ajj = A[0];
  ix = 0;
  xbar = A[0];
  for (jmax = 0; jmax < 99; jmax++) {
    ajj += A[jmax + 1];
    ix++;
    xbar += A[ix];
  }

  xbar = fDiv(xbar,100.0F);
  ix = 0;
  r = A[0] - xbar;
  y = r * r;
  for (jmax = 0; jmax < 99; jmax++) {
    ix++;
    r = A[ix] - xbar;
    y += r * r;
  }

  y = fDiv(y,99.0F);
  check[0] = ajj;
  check[1] = y;
  pass = true;
  for (i0 = 0; i0 < 2; i0++) {
    for (jmax = 0; jmax < 2; jmax++) {
      golden[jmax + (i0 << 1)] = fv1[(jmax + (i0 << 1)) + (coreid << 2)];
    }

    pass = pass && (check[i0] <= golden[i0 << 1]);
    pass = pass && (check[i0] >= golden[1 + (i0 << 1)]);
    disp_error(!pass, check[i0] ,golden[i0 << 1] ,golden[1 + (i0 << 1)], i0, coreid);

  }

  flagPassFail(pass, coreid);

/////////////////////////////////////////////////////////
// synchronize and exit
/////////////////////////////////////////////////////////

#ifdef LINUX
  }
  return 0;

#else
  
  synch_barrier();
  
  eoc(0);
#endif

}

