/*
 * LSU EE 7700-1 Spring 2006  Sample Code
 *
 * Some Branch Predictor Simulators (Using Shade)
 *
 * gshare
 * Bimodal
 * YAGS:      http://www.ece.lsu.edu/tca/s/eden-98.pdf
 * Perceptron http://www.ece.lsu.edu/tca/s/p280-tarjan.pdf
 *
 * Note: The branch predictors for a given run the branch predictors
 * use the same amount of global history but they DO NOT use the
 * same amount of storage. Bimodal is the smallest, then gshare, then
 * YAGS. The size of the perceptron predictor varies.
 *
 *
 * To Run:
 *
 *  echo MY_BENCHMARK MY_BENCHMARK_ARGS | percep HISTORY_LEN [BHT_LG]
 *
 */

#define TR_REGS
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>

#include <IHASH.h>
#include <ITYPES.h>
#include <trace.h>
#include <stdtr.h>
#include <trctl.h>

extern int shade_sshell(char *command, void (*analyzer)());

typedef enum
  {
    BPU_Normal = 0,  // Update GHR using outcome. For gshare.
    BPU_Include = 0,
    BPU_None = 1,        // Don't touch GHR. For bimodal.
    BPU_Exclude = 1,
    BPU_Pad = 2          // Shift a zero into GHR.
  }
BP_GHR_Update_Method;

const char* const bpu_str = "IEP";

static void sim_init(int argc, char **argv, char *bm_command);
static void sim_analyzer_phase_one();
static void sim_after_phase_one();
static void sim_results_phase_one_write();

int shade_main(int argc, char **argv)
{

  /* Copy first line of stdin to bm_command. */

  static char bm_command[200];
  {
    char *bptr = bm_command;
    char* const bstop = bm_command + sizeof(bm_command);
    while( 1 )
      {
        char c = getchar();
        if( c == EOF || c == '\n' ) break;
        if( bptr == bstop )
          {
            fprintf(stderr,"Benchmark command too long, exiting.\n");
            return 1;
          }
        *bptr++ = c;
      }
    *bptr++ = 0;
  }

  /* Shade Documentation
   *
   * Overview:     http://www.ece.lsu.edu/tca/intro.pdf
   * Ref  Manual:  http://www.ece.lsu.edu/tca/shade_man.pdf
   *
   * Trace Structure  (trace->tr_pc, trace->ih, etc.)
   *   /data4/koppel/shadekit/shade.v9.elf/src/include/trace.h
   * Trace Information Constants (TC_PC, TC_TAKEN, etc.)
   *   /data4/koppel/shadekit/shade.v9.elf/src/include/trctl.h
   * Instruction Types (IT_ANY, IT_BRANCH, etc.)
   *   /data4/koppel/shadekit/spixtools.v9.elf/src/include/ITYPES.h
   */

  /* Tell Shade size of trace structure. (It can be customized.) */
  shade_trctl_trsize( sizeof(Trace) );

  /*
   * Tell Shade which instructions to trace and what information to collect.
   */
  shade_trctl_it
    (IT_ANY,  // Which instructions. (All)
     1,       // Turn tracing on or off. (On)
     0,       // Trace annulled instructions?  (No)
     TC_TAKEN | TC_PC | TC_IH  // What info to collect? (Outcome,pc,opcode)
     );

  /*
   * Phase One:  Collect data on bimodal and gshare predictors.
   */

  sim_init(argc, argv, bm_command);
  shade_sshell(bm_command,sim_analyzer_phase_one);
  sim_after_phase_one();
  return 0;
}

/* ************************************************************************* */
/*
 * Branch Predictors
 *
 */

#define SINC2(a) { if( (a) < 3 ) (a)++; }
#define SDEC2(a) { if( (a) > 0 ) (a)--; }

#define BP_ACCURACY(bp) bp_gshare_accuracy((BP_gshare*)bp)

typedef struct {
  char counter;
  int tag;
}
BP_yags_pht_elt;

typedef struct
{
  int correct, wrong;
  double accuracy;
  unsigned int ghr;
  int history_length;
  BP_yags_pht_elt *pht_t, *pht_n;
  int bht_lg;
  int bht_size;
  int bht_mask;
  char *bht;
  int tag_size; // Size of tag stored in phts.
  int pht_size;
  int pht_mask;
  int tag_mask;
  int capacity_bits;
}
BP_yags;

void bp_yags_reset(BP_yags *g);

BP_yags*
bp_yags_new(int history_length, int bht_lg)
{
  BP_yags* const g = (BP_yags*) calloc(1,sizeof(BP_yags));
  g->history_length = history_length;
  g->pht_size = 1 << history_length;
  g->pht_mask = g->pht_size - 1;
  g->pht_t = (BP_yags_pht_elt*) calloc(g->pht_size,sizeof(g->pht_t[0]));
  g->pht_n = (BP_yags_pht_elt*) calloc(g->pht_size,sizeof(g->pht_n[0]));
  g->bht_lg = bht_lg;
  g->bht_size = 1 << g->bht_lg;
  g->bht_mask = g->bht_size - 1;
  g->bht = (char*) calloc(g->bht_size,1);
  g->tag_size = 6;
  g->tag_mask = ( 1 << ( g->tag_size + 2 ) ) - 1;
  const int pht_capacity = g->pht_size * ( 2 + g->tag_size );
  const int bht_capacity = g->bht_size * 2;
  g->capacity_bits = pht_capacity * 2 + bht_capacity;
  bp_yags_reset(g);
  return g;
}

void
bp_yags_reset(BP_yags *g)
{
  memset(g->pht_t, 0, g->pht_size * sizeof(g->pht_t[0]) );
  memset(g->pht_n, 0, g->pht_size * sizeof(g->pht_t[0]) );
  memset(g->bht, 0, g->bht_size * sizeof(g->bht[0]) );
  g->ghr = 0;
  g->accuracy = -1;
}

char*
bp_yags_config(BP_yags *g)
{
  static char buffer[100];
  sprintf(buffer,"Y%d/%d/%d",
          g->history_length,g->bht_lg,g->tag_size);
  return buffer;
}

int
bp_yags_predict(BP_yags *g, unsigned int pc, int outcome)
{
  char* const bht_entry = &g->bht[ ( pc >> 2 ) & g->pht_mask ];
  const int pred_bm = bht_entry[0] > 1;
  const int pht_idx = ( g->ghr ^ ( pc >> 2 ) ) & g->pht_mask;
  BP_yags_pht_elt* const pht_elt = ( pred_bm ? g->pht_t : g->pht_n ) + pht_idx;
  const int tag = pc & g->tag_mask;
  const int hit = pht_elt->tag == tag;
  const int prediction = hit ? pht_elt->counter > 1 : pred_bm;
  const int bm_correct = pred_bm == outcome;
  const int yg_correct = prediction == outcome;

  if( bm_correct || !yg_correct )
    {
      if( outcome ) { SINC2( *bht_entry ); } else { SDEC2( *bht_entry ); }
    }

  if( !bm_correct && !hit )
    {
      pht_elt->tag = tag;
      pht_elt->counter = outcome ? 3 : 0;
    }
  else if ( hit )
    {
      if( outcome ) { SINC2( pht_elt->counter ); }
      else { SDEC2( pht_elt->counter ); }
    }

  if( yg_correct ) g->correct++; else g->wrong++;
  g->ghr = ( ( g->ghr << 1 ) | outcome ) & g->pht_mask;
  return yg_correct;
}

typedef struct
{
  int correct, wrong;
  double accuracy;              /* Set after call to bp_gshare_accuracy. */
  unsigned int ghr;
  char *pht;
  int history_length;
  int pht_size;
  int pht_mask;
  int capacity_bits;
}
BP_gshare;

void bp_gshare_reset(BP_gshare *g);

/*
 * Create and return a new branch predictor.
 */
BP_gshare*
bp_gshare_new(int history_length)
{
  BP_gshare* const g = (BP_gshare*) malloc(sizeof(BP_gshare));
  g->history_length = history_length;
  g->pht_size = 1 << history_length;
  g->pht_mask = g->pht_size - 1;
  g->pht = (char*) malloc(g->pht_size);
  g->capacity_bits = g->pht_size * 2;
  bp_gshare_reset(g);
  return g;
}

/*
 * Reset branch predictor tables and statistics.
 */
void
bp_gshare_reset(BP_gshare *g)
{
  g->ghr = 0;
  g->correct = g->wrong = 0;
  g->accuracy = -1;
  bzero(g->pht,g->pht_size);
}

/*
 * Predict outcome of branch and update GHR using actual outcome.
 *
 * Method specifies how to update GHR: BPU_Include for gshare, BPU_Exclude
 * for bimodal, and BPU_Pad for ??.
 * Return 1 if prediction correct and 0 if prediction wrong.
 */
int
bp_gshare_predict
(BP_gshare *g, unsigned int pc, int outcome, BP_GHR_Update_Method method)
{
  char* const pht_entry = &g->pht[ ( g->ghr ^ ( pc >> 2 ) ) & g->pht_mask ];
  const int prediction = pht_entry[0] > 1;
  if( prediction == outcome ) g->correct++; else g->wrong++;
  if(  outcome && pht_entry[0] < 3 ) pht_entry[0]++;
  if( !outcome && pht_entry[0] > 0 ) pht_entry[0]--;
  if( method == BPU_Include )
    g->ghr = ( ( g->ghr << 1 ) | outcome ) & g->pht_mask;
  else if ( method == BPU_Pad )
    g->ghr = ( g->ghr << 1 ) & g->pht_mask;
  return prediction == outcome;
}

/*
 * Return accuracy of predictor. Once this is called the accuracy is
 * available in g->accuracy.
 */
double
bp_gshare_accuracy(BP_gshare *g)
{
  if( g->accuracy < 0 )
    {
      const double exec = g->correct + g->wrong;
      g->accuracy = exec ? g->correct / exec : 0;
    }
  return g->accuracy;
}

typedef struct
{
  int correct, wrong;
  double accuracy;              /* Set after call to bp_gshare_accuracy. */

  unsigned int ghr;
  unsigned int *gpr;  // Global path register: Holds addresses of recent br.
  int gpr_idx;
  char** weight_tables;
  char** weight_entries;  // Entries used to make a particular prediction.

  int weight_bits;
  int weight_max, weight_min;

  char *bt; // Bias table.
  int bt_lg, bt_size, bt_mask;

  int spec_wt_per_pred;  // Weights per prediction.
  int spec_b_per_wt;     // Branches per weight.
  int spec_a;            // If non-zero, use predicted branch pc for wt idx.
  int spec_p;
  int spec_g_mask;

  int history_length;
  int wt_lg, wt_size, wt_mask;

  int capacity_bits;  // Storage needed to implement.

}
BP_perceptron;

BP_perceptron*
bp_perceptron_new(int history_lengthp, int bt_lg, int b_per_wt, int tight)
{
  BP_perceptron* const bp = (BP_perceptron*) malloc(sizeof(BP_perceptron));

  bp->weight_bits = 7;

  bp->spec_b_per_wt = b_per_wt;
  bp->spec_wt_per_pred = history_lengthp / bp->spec_b_per_wt;
  bp->spec_g_mask = ( 1 << bp->spec_b_per_wt ) - 1;
  bp->spec_p = 0;
  bp->spec_a = 1;

  bp->bt_lg = bt_lg;
  bp->bt_size = 1 << bt_lg;
  bp->bt_mask = bp->bt_size - 1;

  bp->wt_lg = tight || bt_lg < bp->spec_b_per_wt ? bp->spec_b_per_wt : bt_lg;
  bp->wt_size = 1 << bp->wt_lg;
  bp->wt_mask = bp->wt_size - 1;
  bp->history_length = bp->spec_wt_per_pred * bp->spec_b_per_wt;
  bp->weight_tables =
    (char**) malloc( ( 1 + bp->spec_wt_per_pred ) * sizeof(char*) );
  bp->weight_entries =
    (char**) malloc( ( 1 + bp->spec_wt_per_pred ) * sizeof(char*) );

  bp->capacity_bits =
    bp->wt_size * bp->spec_wt_per_pred * bp->weight_bits
    + bp->bt_size * bp->weight_bits;

  bp->weight_max = 1 << ( bp->weight_bits - 1 );
  bp->weight_min = - bp->weight_max;

  int i;
  for(i=0; i<bp->spec_wt_per_pred; i++)
    bp->weight_tables[i] = (char*) calloc( bp->wt_size, 1 );
  bp->weight_tables[i] = (char*) calloc( bp->bt_size, 1 );
  bp->gpr = (unsigned int*) calloc( bp->history_length, sizeof(int) );
  bp->gpr_idx = 0;

  bp->correct = bp->wrong = 0;
  bp->accuracy = -1;
  return bp;
}

char*
bp_perceptron_config(BP_perceptron *bp)
{
  static char buffer[100];
  snprintf(buffer,sizeof(buffer),"%s%s%s-%2d/%2d-%2d-%2d",
           bp->spec_a ? "A" : "_",
           bp->spec_p ? "P" : "_",
           bp->spec_g_mask ? "G" : "_",
           bp->history_length,
           bp->spec_wt_per_pred,
           bp->wt_lg,
           bp->bt_lg
           );
  return buffer;
}

#define INC(a) { if( (a) < g->weight_max ) (a)++; }
#define DEC(a) { if( (a) > g->weight_min ) (a)--; }

int
bp_perceptron_predict(BP_perceptron *g, unsigned int pc, int outcome)
{
  int ghr = g->ghr;
  int gpr_idx = g->gpr_idx;
  char **wt_ptrs = g->weight_entries;
  const int idx_init_val = g->spec_a ? pc >> 2 : 0;
  wt_ptrs[g->spec_wt_per_pred] =
    &g->weight_tables[g->spec_wt_per_pred][(pc>>2)&g->bt_mask];
  int i;
  for( i = 0; i < g->spec_wt_per_pred; i++)
    {
      int idx = idx_init_val;
      if( g->spec_p ) idx = idx ^ g->gpr[gpr_idx];
      if( g->spec_g_mask ) idx = idx ^ ( ghr & g->spec_g_mask );
      char* const wt = g->weight_tables[i];
      wt_ptrs[i] = &wt[ idx & g->wt_mask ];
      ghr >>= g->spec_b_per_wt;
      gpr_idx -= g->spec_b_per_wt;
      if( gpr_idx < 0 ) gpr_idx += g->history_length;
    }

  int score = 0;
  for( i = 0; i <= g->spec_wt_per_pred; i++) score += *wt_ptrs[i];

  const int pred = score >= 0;
  const int correct = pred == outcome;
  const int abs_score = score < 0 ? -score : score;
  if( correct ) g->correct++; else g->wrong++;
  if( !correct || abs_score < 10 )
    {
      if( outcome )
        for( i = 0; i <= g->spec_wt_per_pred; i++) { INC( *wt_ptrs[i] ); }
      else
        for( i = 0; i <= g->spec_wt_per_pred; i++) { DEC( *wt_ptrs[i] ); }
    }

  g->gpr_idx++;
  if( g->gpr_idx == g->history_length ) g->gpr_idx = 0;
  g->ghr = ( g->ghr << 1 ) | outcome;
  g->gpr[g->gpr_idx] = pc >> 2;

  return correct;
}

double
bp_perceptron_accuracy(BP_perceptron *g)
{
  if( g->accuracy < 0 )
    {
      const double exec = g->correct + g->wrong;
      g->accuracy = exec ? g->correct / exec : 0;
    }
  return g->accuracy;
}


/* ************************************************************************* */
/*
 * Branch Information Table (BIT)
 *
 * Holds information about each branch instruction in program.
 */

typedef struct
{
  unsigned int pc; /* Address of branch. */
  int rank;        /* Rank in number of executions: 0 most frequent, etc. */
  int round;

  int correct_p1, wrong_p1;  /* Prediction results for phase-one gshare. */
  int correct_bm, wrong_bm;  /* Prediction results for phase-one bimodal. */
  int correct_yg, wrong_yg;
  int correct_pr, wrong_pr;
  int icount;                /* Number of time this branch executes. */
  double icount_inv;
  double icount_frac;        /* icount / executions of all branches. */

  double scaled_change;
}
BIT_Elt;

typedef struct
{
  int icount;   /* Number of instructions.   */
  int icount_branch;  /* Number of branches. */
  char *bm_command;
  int print_amt_max;            /* Maximum number of insn to print. */
  int history_length;           /* Length of ghr. */

  /* Branch information table size variables. */
  int bit_lg;    /* Log_2 size. (Input or constant.) */
  int bit_size;  /* Computed. */
  int bit_mask;  /* Computed. */
  int bit_occ;   /* Number of branches in table. */

  BP_yags *yg_phase_one;
  BP_perceptron *pr_phase_one;  /* perceptron */
  BP_perceptron *pr_array[20];
  BP_gshare *gs_phase_one;      /* gshare */
  BP_gshare *bm_phase_one;      /* bimodal */

  BIT_Elt *bit;          /* Branch information table. */
  BIT_Elt **bit_sorted;  /* Branch information table sorted by executions. */
}
Sim_Info;

Sim_Info sim_info;

/*
 * Return branch information table entry for PC.
 *
 * If there is a collision (two branches map to same element) table
 * entry is reset.  This should happen rarely, if it does increase
 * sim_info.bit_lg.
 */
BIT_Elt*
bit_get(Sim_Info *si, unsigned int pc)
{
  BIT_Elt* const bi =
    &si->bit[ si->bit_mask &
              ( ( pc ^ ( pc >> si->bit_lg ) ^ ( pc >> 8 ) ) >> 2 )];
  if( bi->pc != pc )
    {
      bi->pc = pc;
      bi->correct_p1 = bi->wrong_p1 = 0;
      bi->correct_bm = bi->wrong_bm = 0;
    }
  return bi;
}

/*
 * Return ordering of two BIT entries (for qsort).
 *
 * When assigned, sort order is number of times branch executes.
 */
int
comp_mispred(const void *ap, const void *bp)
{
  BIT_Elt* const a = *((BIT_Elt**)ap);
  BIT_Elt* const b = *((BIT_Elt**)bp);
  const int order = b->correct_p1 + b->wrong_p1 - a->wrong_p1 - a->correct_p1;
  /* If it's a tie put them in prog order. */
  if( order == 0 ) return a->pc - b->pc;
  return order;
}


/* ************************************************************************* */
/*
 * Simulation Routines
 *
 */

void
sim_init(int argv, char **argc, char *bm_command)
{
  int i;

  if( argv < 2 )
    {
      fprintf(stderr,"Usage: %s HISTORY_LEN [WT_LG] [BHT_LG]\n",argc[0]);
      exit(1);
    }

  /* Tune this for preference. (Number of insn to print info about.) */
  sim_info.print_amt_max = 20;

  /* Make this large enough so branches don't share entries. */
  sim_info.bit_lg = 19; // Size of branch information table.

  sim_info.history_length = atoi(argc[1]);
  int wt_lg = argv > 2 ? atoi(argc[2]) : sim_info.history_length;
  int bht_lg = argv > 3 ? atoi(argc[3]) : sim_info.history_length;

  sim_info.bit_size = 1 << sim_info.bit_lg;
  sim_info.bit_mask = sim_info.bit_size - 1;
  sim_info.bit = (BIT_Elt*) calloc(sim_info.bit_size,sizeof(sim_info.bit[0]));
  sim_info.bit_sorted = (BIT_Elt**) malloc(sim_info.bit_size*sizeof(void*));
  for(i=0; i<sim_info.bit_size; i++) sim_info.bit_sorted[i] = &sim_info.bit[i];
  sim_info.gs_phase_one = bp_gshare_new(sim_info.history_length);
  sim_info.bm_phase_one = bp_gshare_new(sim_info.history_length);
  sim_info.yg_phase_one = bp_yags_new(sim_info.history_length,bht_lg);
  sim_info.pr_phase_one =
    bp_perceptron_new(32,sim_info.history_length-3,4,0);
  BP_perceptron **bpptr = sim_info.pr_array;
  for(i=0; i < 6; i++)
    {
      const int wt_table_lg = sim_info.history_length - i;
      const int b_per_wt = sim_info.history_length >> i;
      if( b_per_wt )
        *bpptr++ =
          bp_perceptron_new(sim_info.history_length,wt_table_lg,b_per_wt,0);
      const int b_per_wt_32 = 32 >> i;
      if( b_per_wt_32 && b_per_wt_32 < 32 )
        *bpptr++ =
          bp_perceptron_new(32,wt_table_lg,b_per_wt_32,0);
    }


  sim_info.icount_branch = 0;
  sim_info.bm_command = bm_command;
}

/*
 * Phase One
 *
 * Compute gshare and bimodal prediction accuracies.
 * Record per-instruction prediction accuracies.
 */
void
sim_analyzer_phase_one()
{
  Trace *trace_entry;

  while( ( trace_entry = shade_step() ) )
    {
      /* One iteration per (traced) instruction.  */

      /* ih: Instruction hash, sort of an ideal opcode. */
      const int ih = trace_entry->tr_ih;
      sim_info.icount++;
      if( !ih_isbranch(ih) ) continue; // Continue if not an integer branch.
      sim_info.icount_branch++;

      int i;
      for(i=0; i<sim_info.history_length && sim_info.pr_array[i]; i++)
        bp_perceptron_predict
          (sim_info.pr_array[i], trace_entry->tr_pc, trace_entry->tr_taken);

      const int pr_correct = bp_perceptron_predict
        (sim_info.pr_phase_one, trace_entry->tr_pc, trace_entry->tr_taken);

      /* Bimodal Prediction */
      const int bm_correct = bp_gshare_predict
        (sim_info.bm_phase_one,
         trace_entry->tr_pc, trace_entry->tr_taken, BPU_Exclude);

      /* gshare Prediction */
      const int gs_correct = bp_gshare_predict
        (sim_info.gs_phase_one,
         trace_entry->tr_pc, trace_entry->tr_taken, BPU_Include);

      /* yags Prediction */
      const int yg_correct = bp_yags_predict
        (sim_info.yg_phase_one, trace_entry->tr_pc, trace_entry->tr_taken);

      /* Record outcome for this branch. */
      BIT_Elt* const bi = bit_get(&sim_info,trace_entry->tr_pc);
      if( gs_correct ) bi->correct_p1++; else bi->wrong_p1++;
      if( bm_correct ) bi->correct_bm++; else bi->wrong_bm++;
      if( pr_correct ) bi->correct_pr++; else bi->wrong_pr++;
      if( yg_correct ) bi->correct_yg++; else bi->wrong_yg++;
    }
}

void
sim_after_phase_one()
{
  BP_ACCURACY(sim_info.gs_phase_one);
  BP_ACCURACY(sim_info.bm_phase_one);
  BP_ACCURACY(sim_info.pr_phase_one);
  BP_ACCURACY(sim_info.yg_phase_one);

  /* Prepare sorted list of BIT entries. */
  qsort(sim_info.bit_sorted,sim_info.bit_size,
        sizeof(sim_info.bit_sorted[0]),comp_mispred);

  const double icount_all_inv = 1.0 / sim_info.icount_branch;

  int i;
  for(i = 0; i < sim_info.bit_size; i++)
    {
      BIT_Elt* const bi = sim_info.bit_sorted[i];
      bi->icount = bi->correct_p1 + bi->wrong_p1;
      if( bi->icount == 0 ) break;
      bi->rank = i;
      bi->icount_frac = bi->icount * icount_all_inv;
      bi->icount_inv = 1.0 / bi->icount;
    }
  sim_info.bit_occ = i;

  sim_results_phase_one_write();
}

/* ************************************************************************* */
/*
 * Write Results
 *
 */

void
sim_results_phase_one_write()
{
  BP_gshare* const gs = sim_info.gs_phase_one;
  BP_gshare* const bm = sim_info.bm_phase_one;
  BP_perceptron* const pr = sim_info.pr_phase_one;
  BP_yags* const yg = sim_info.yg_phase_one;
  int i;

  printf("Found %d instructions of which %d were branches.\n",
         sim_info.icount, sim_info.icount_branch);

  for(i=0; i<sim_info.history_length && sim_info.pr_array[i]; i++)
    printf("Percep %s  acc %.4f  size %d\n",
           bp_perceptron_config(sim_info.pr_array[i]),
           BP_ACCURACY(sim_info.pr_array[i]),
           sim_info.pr_array[i]->capacity_bits
           );

  printf("Perceptron config: %s, size %d bits\n",
         bp_perceptron_config(pr), pr->capacity_bits);
  printf("YAGS config %s, size %d bits\n",
         bp_yags_config(yg),yg->capacity_bits);
  printf("gshare size %d bits,  bm size %d bits.\n",
         gs->capacity_bits, bm->capacity_bits);
  printf("For ghr len %d: ", sim_info.history_length);
  printf("Bimodal %.4f,  gshare %.4f,  yags %.4f,  percep %.4f\n",
         bm->accuracy, gs->accuracy, yg->accuracy, pr->accuracy);
  printf("Rnk Insn Addr  bimodal  GS:Corr    Wrong  gshare  yags    percep\n");

  for(i=0; i<sim_info.print_amt_max; i++)
    {
      BIT_Elt* const bi = sim_info.bit_sorted[i];
      const int c = bi->correct_p1;
      const int w = bi->wrong_p1;
      const double cb = bi->correct_bm;
      if( c + w == 0 ) break;
      printf("%3d 0x%08x  %.4f  %7d  %7d  %.4f  %.4f  %.4f\n",
             bi->rank, bi->pc, cb/(c+w),
             c, w, c/((double)(c+w)),
             ((double)bi->correct_yg) / (c+w),
             ((double)bi->correct_pr) / (c+w)
             );
    }
}