/// LSU EE 4720 -- Spring 2002 -- Computer Architecture
//
/// Pipelined Hardwired Control MIPS, Dynamically Scheduled
//
// Implements a small subset of MIPS32 instructions and features.
//
// Time-stamp: <21 April 2002, 14:16:27 CDT, koppel@sol>
//

/// Dependencies

 /// To compile run this the following is also needed:
//
// mipspipetb.v   The testbench.  It is included in here for convenience.
//
// mips_memory.v  Memory system interface.  Also included for convenience.
//
// An "assembled" program, for example dstest.v.  The name of the
//  file is specified in the MIPS_PROG macro, the testbench uses the
//  macro to include the file.
//
// ped            A Perlscript used to format pipeline execution diagrams.
//                It is invoked by the testbench.

/// Under Construction

/// Limitations
//
//  This code will probably never implement all of MIPS32, but the
//  limitations below will be addressed at some time.
//
//  No bypassing, so there is large latency between instructions.
//  No load or store instructions.
//  No floating-point operations.


 /// Specify Program to Load
//
// A quick-and-dirty method of loading a program is used. (For now.)
// Therefore the description must be re-compiled each time the program
// is changed.  (The program is assembled by SPIM and some perl code.)
//
`define MIPS_PROG "dstest.v"

 /// Testbench Settings
//

// Maximum number of cycles to simulate.
//
`define CYCLE_LIMIT 50000

// Maximum number of cycles to print register and instruction data.
//
`define WATCH_LIMIT 100

// Maximum number of instruction states to trace. (Roughly product of
// number of cycles times average number of in-flight instructions.)

`define PTRACE_LIMIT 100000

 /// Unexpected Macro
//
// Used in the default case item of case statements when the default
// item should not be executed.  Used in case statements that assign
// the same register in each item.  If the default item is executed
// (indicating a programming bug) simulation is stopped and hopefully
// the bug is fixed.  For synthesis, the unexpected macro would assign
// the variable, avoiding the synthesis of a latch.  Alas, Leonardo
// Spectrum does not recognize macros with parameters so a task is
// used to assign the variable.  That task must be included in each
// module. (If Leonardo understood hierarchical references, the task
// could be put in a special utility module, but it can't.)

`ifdef exemplar
 `define UNEXPECTED unexpected
`else
 `define UNEXPECTED(var,ctrl) begin var = 0; if( ctrl[0] !== 1'bx ) $stop; end 0:
`endif


///
/// MIPS Processor
///

module cpu_p1(exc,data_out_2, addr_1,addr_2,size_2,we_2,
           data_in_1,data_in_2, mem_error_in_1,mem_error_in_2,reset,clk);
   input [31:0] data_in_1, data_in_2;
   input [2:0]  mem_error_in_1, mem_error_in_2;
   input reset,clk;
   output [7:0] exc;
   output [31:0] data_out_2, addr_1, addr_2;
   output [1:0]  size_2;
   output        we_2;

   ///
   /// MODULE CONFIGURATION CONSTANTS
   ///

   /// Debugging
   //

   // Skip ready instruction if $random & SCHED_RDELAY_MASK != 0.
   //
   parameter SCHED_RDELAY_MASK = 7;


   /// Reorder Buffer
   //
   parameter ROB_lg = 3;
   parameter ROB_sze = (1<<ROB_lg);
   `define ROB_idxr ROB_lg-1:0
   `define ROB_szer 0:ROB_sze-1

   /// Physical Register File
   //
   parameter PRF_sze = ROB_sze + 32;
   parameter PRF_lg = ROB_sze >= 32 ? ROB_lg + 1 : 6;
   `define PRF_idxr PRF_lg-1:0

   // Register Map Vector (Used to store 32-element register map in one vector.)
   `define RMV_r (32*PRF_lg-1):0

   /// Integer (so far) Instruction Queue
   //
   parameter IQI_sze = 8; // KEEP idxr UPDATED!!  Yes, YOU, it's NOT automatic!
   `define IQI_idxr 3:0
   `define IQI_1r IQI_sze-1:0
   `define IQI_2r IQI_sze*2-1:0

   /// Branch Queue
   //
   parameter BQ_lg = 2;  // >0 for now
   parameter BQ_sze = 1 << BQ_lg;
   `define BQ_idxr BQ_lg-1:0
   // Prediction Mask
   `define BQ_pmr BQ_sze-1:0


   ///
   /// MIPS CONSTANTS
   ///
   //
   // Defined by the ISA

   /// "opcode" Field Values
   //
   parameter  O_rfmt = 6'h0;
   parameter  O_j    = 6'h2;
   parameter  O_beq  = 6'h4;
   parameter  O_bne  = 6'h5;
   parameter  O_addi = 6'h8;
   parameter  O_slti = 6'ha;
   parameter  O_andi = 6'hc;
   parameter  O_ori  = 6'hd;
   parameter  O_lui  = 6'hf;
   parameter  O_lw   = 6'h23;
   parameter  O_lbu  = 6'h24;
   parameter  O_sw   = 6'h2b;
   parameter  O_sb   = 6'h28;

   /// "function" Field Values
   //
   parameter  F_sll = 6'h0;
   parameter  F_srl = 6'h2;
   parameter  F_sys = 6'hc;
   parameter  F_add = 6'h20;
   parameter  F_sub = 6'h22;
   parameter  F_and = 6'h24;
   parameter  F_or  = 6'h25;
   parameter  F_xor = 6'h26;


   ///
   /// IMPLEMENTATION CONSTANTS
   ///
   //
   // Defined for this implementation, not standardized.  These codes
   // are used by other modules in this implementation, so they should
   // be changed consistently.

   /// Processor Exception Codes
   //
   parameter   EXC_none   = 8'd0;
   parameter   EXC_if_bus = 8'd1; // Bus Error (Mis-aligned address.)
   parameter   EXC_if_seg = 8'd2; // Bad Address
   parameter   EXC_id_ins = 8'd3; // Illegal (Reserved) Instruction
   parameter   EXC_id_sys = 8'd5; // Syscall Instruction
   parameter   EXC_me_bus = 8'd6; // Bus Error (Mis-aligned address.)
   parameter   EXC_me_seg = 8'd7; // Bad Address
   //
   // The exception codes above are not MIPS codes, though they are
   // similar.  (For one thing, MIPS does not use the bus exception
   // for a misaligned address.)  For the real codes see Table 6-17 in
   //http://www.ece.lsu.edu/ee4720/mips32v3.pdf


   /// ALU Operations + Extra Bit
   //
   // The lower 5 bits of the codes below are used by the ALU, the
   // sixth bit is used for special cases and is removed in the ID
   // stage.
   //
   parameter  OP_xxx = 6'h0;  // Don't care.
   parameter  OP_add = 6'h0;
   parameter  OP_sll = 6'h1;
   parameter  OP_srl = 6'h2;
   parameter  OP_xor = 6'h3;
   parameter  OP_sub = 6'h4;
   parameter  OP_or  = 6'h5;
   parameter  OP_and = 6'h6;
   parameter  OP_slt = 6'h7;
   parameter  OP_seq = 6'h8;
   parameter  OP_b   = 6'h9;
   parameter  OP_sne = 6'ha;
   parameter  OP_ill = 6'h20;  // Illegal Instruction
   parameter  OP_sys = 6'h21;  // Syscall

   /// Memory Exception Codes
   //
   parameter   MEM_ERR_none = 0;
   parameter   MEM_ERR_bus  = 1;   // Bad alignment.
   parameter   MEM_ERR_seg  = 2;   // Bad address.


   ///
   /// MODULE CONSTANTS
   ///
   //
   // Defined for this module, intended to improve readability of code.

   /// PC Mux
   //
   parameter  PC_npc = 2'd0;
   //  parameter  PC_dsp = 2'd1;  // Displacement (Branches)
   parameter  PC_alt = 2'd1;  // Alternate Path (for misprediction recovery)
   parameter  PC_rgn = 2'd2;  // Region (Jumps)
   parameter  PC_rs  = 2'd3;
   parameter  PC_bp  = 2'd3;  // Branch Predictor

   /// ALU Muxen
   //
   parameter  SRC_xx = 3'd6; // Won't be used, avoids stalls on alu_a and alu_b.

   parameter  SRC_00 = 3'd6; // Use zero, or doesn't use ALU.

   parameter  SRC_me = 3'd1;
   parameter  SRC_wb = 3'd2;

   parameter  SRC_rs = 3'd3;
   parameter  SRC_sa = 3'd4;

   parameter  SRC_rt = 3'd3;
   parameter  SRC_im = 3'd4;
   parameter  SRC_np = 3'd5;


   /// Register Number to Write Back
   //
   parameter  WB_00 = 3'd0;
   parameter  WB_rd = 3'd1;
   parameter  WB_rt = 3'd2;

   /// Immediate Formatting
   //
   parameter  IMM_x = 3'd0; // Don't care.
   parameter  IMM_s = 3'd0;
   parameter  IMM_u = 3'd1;
   parameter  IMM_l = 3'd2;

   /// Memory Access Size
   //
   parameter  ME_SIZE_0 = 2'd0;
   parameter  ME_SIZE_1 = 2'd1;
   parameter  ME_SIZE_2 = 2'd2;
   parameter  ME_SIZE_4 = 2'd3;
   parameter  ME_CONTROL_nop = { ME_SIZE_0, 1'd0 };

   ///
   /// DECLARATIONS
   ///

   /// Instruction Fields
   //
   wire [4:0]  rs, rt, rd, sa;
   wire [5:0]  opcode, func;
   wire [25:0] ii;
   wire [15:0] immed;

   /// Some ALU Connections
   //
   wire [31:0] alu_out;
   reg [31:0]  alu_a, alu_b;


   /// Register Files and Maps
   //

   // Physical Register File
   reg [31:0]      prf_value[0:PRF_sze-1];
   reg             prf_avail[0:PRF_sze-1];  // If 1, value available.

   reg [`PRF_idxr] reg_id_map[0:31];  // ID:  architected reg -> physical reg
   reg [`PRF_idxr] reg_co_map[0:31];  // Commit:  architected reg -> physical reg
   // Backups of id map. Entire id map stored in a single element.
   reg [`RMV_r]    reg_id_map_backup [0:BQ_sze-1];


   ///
   /// INSTRUCTION HANDLING UNITS
   ///

   ///
   /// Reorder Buffer Declarations
   ///

   // ROB Main Element Size
   parameter ROB_melt_sze = 32 + 32 + 1 + 1 + 5 + PRF_lg;

   // ROB Divided into three sections, each section written at a
   // different time.  All sections are indexed by the same head and
   // tail pointers.
   reg [ROB_melt_sze-1:0] rob_main [0:ROB_sze-1];  // Main, write at ID
   reg [8:0]              rob_dexc [0:ROB_sze-1];  // Done/Exception, write at WB
   reg [`PRF_idxr]        rob_dstp [0:ROB_sze-1];  // Dest, write at commit.
   reg [`ROB_idxr]        rob_head;                // Next element to commit.
   reg [`ROB_idxr]        rob_tail;                // First empty element.
   // Note: ROB element at tail should never be used.
   wire             rob_empty = rob_head == rob_tail;
   wire             rob_full;

   // ROB Tail Symbols
   //
   // Main, written at ID.
   //
   wire [31:0] rob_t_pc;
   wire [31:0] tb_rob_t_din;   // Dynamic instruction number (for testbench).
   wire        tb_rob_t_idest; // If 1, instruction writes a register (for tb).
   wire        rob_t_done_1;   // Done when rob_t_done_1 == rob_t_done_2.
   reg [4:0]   rob_t_dest_a;   // Architected destination register.
   // Incumbent: Physical register previously mapped to destination
   // register.  Will be added to free list (put in dest_p field) if
   // instruction commits.
   reg [`PRF_idxr]  rob_t_incumb;
   //
   // Done/Exception, written at writeback (complete).
   //
   reg [7:0]        rob_t_exc;      // Exception code raised by instruction.
   wire             rob_t_done_2;   // Set to done_1 when instruction completes.
   //
   // Physical Destination (now part of free list), written at commit.
   //
   // For a ROB element in use holds physical register for destination
   // of instruction, for a ROB elt not in use, holds a free physical
   // register.
   wire [`PRF_idxr] rob_t_dest_p;

   // ROB Head Symbols
   //
   wire [31:0]     rob_h_pc;
   wire [31:0]     tb_rob_h_din;
   wire            tb_rob_h_idest;
   wire            rob_h_done_1;  // Done when rob_h_done_1 == rob_h_done_2.
   wire [4:0]      rob_h_dest_a;
   wire [`PRF_idxr] rob_h_dest_p;
   wire [`PRF_idxr] rob_h_incumb;

   wire [7:0]     rob_h_exc;
   wire           rob_h_done_2;


   assign rob_t_done_2 = rob_dexc[rob_tail];

   assign {rob_h_pc,
           tb_rob_h_din,
           tb_rob_h_idest,
           rob_h_done_1,
           rob_h_dest_a,
           rob_h_incumb } = rob_main[rob_head];

   assign {rob_h_exc, rob_h_done_2} = rob_dexc[rob_head];


   ///
   /// Instruction Queues, Scheduler
   ///
   parameter IQ_elt_sze = 2*PRF_lg + 5 + 32 + 2*3 + 5 + 8 + 1
                          + PRF_lg + ROB_lg + BQ_lg + 1 + 1 + 1 + 2*32;

   `define IQI_eltr IQ_elt_sze-1:0

   // Instruction queue divided into four sections.
   //
   // iqi:       Main storage.
   // iqi_occ:   Occupied bit, used to find empty and ready entries.
   // iqi_await: Waiting bits, when both zero instruction ready.
   // iqi_bmask: B-mask, used to find instructions that need to be squashed.
   //
   reg [IQ_elt_sze-1:0] iqi [0:IQI_sze-1];
   reg [`IQI_1r]        iqi_occ;
   reg [`IQI_2r]        iqi_await;
   reg [`BQ_pmr]        iqi_bmask[0:IQI_sze-1];

   // For each physical register, mask indicating waiting iqi elements.
   reg [`IQI_2r]  pr_to_iqi_await [0:PRF_sze-1];


   ///
   /// Branch Queue
   ///

   parameter      BQE_sze = 32 + 32;

   // Branch queue has one element per in-flight branch. Element
   // holds the path not taken, to be used for recovery.  Element
   // also holds PC of branch, for debugging.
   //
   reg [BQE_sze-1:0] bq [0:BQ_sze-1];

   wire [31:0] bqe_res_alt_path;
   wire [31:0] tb_bqe_res_pc;

   reg [`BQ_idxr] bq_tail;
   wire [`BQ_idxr] bq_tail_soon;

   // Used to determine when bmask_ins should be updated and
   // when to backup the id map.
   //
   reg [1:0] backup_id_map_sr;  // Shift Register

   // Branch masks have one bit per branch queue entry, when 1 that
   // entry is active.  A copy of bmask_ins is carried by in-flight
   // instructions to determine whether the instruction should be
   // squashed when a branch is mispredicted.  Mask bmask_reg is used
   // to determine whether the id register map was backed up before a
   // misprediction. (If not, the current id map is valid.)
   //
   reg [`BQ_pmr]         bmask_ins;  // Assigned to instructions.
   reg [`BQ_pmr]         bmask_reg;  // Assigned to reg_id_map.

   // Matched against bmask, used to detect instructions to squash
   // and whether id map backed up.
   wire [`BQ_pmr]        wb_bp_mask_bad, wb_bp_mask_good;

   wire [31:0] id_alt_path;
   wire [31:0] id_pred_path, id_taken_path;

   wire        wb_bp_good, wb_bp_bad;
   reg [31:0]  id_bq_alt_path;
   reg [31:0]  id_bq_pc;


   ///
   /// PIPELINE LATCHES
   ///

   /// IF:  Instruction Fetch
   //
   reg [31:0] if_pc, next_if_pc;

   /// ID:  Instruction Decode
   //
   reg [31:0] if_id_ir;
   reg [31:0] if_id_npc, if_id_pc;
   reg [7:0]  if_id_exc, next_if_id_exc;
   reg        if_id_occ;
   reg [31:0] tb_if_din, tb_if_id_din;

   /// QU:   Instruction Queue
   //
   // All of the group below are put in an (so far, the) issue queue.
   reg [`PRF_idxr] id_qu_rs;
   reg [`PRF_idxr] id_qu_rt;
   reg [4:0]       id_qu_sa;
   reg [31:0]      id_qu_imm, next_id_qu_imm;
   reg [2:0]       id_qu_alu_a_src, next_id_qu_alu_a_src;
   reg [2:0]       id_qu_alu_b_src, next_id_qu_alu_b_src;
   reg [4:0]       id_qu_alu_op, next_id_qu_alu_op;
   reg [7:0]       id_qu_exc;
   reg             id_qu_idest;
   reg [`PRF_idxr] id_qu_dest_p;
   reg [`ROB_idxr] id_qu_rob_idx;
   reg [`BQ_idxr]  id_qu_bq_idx;
   reg             id_qu_bp;
   reg             id_qu_bp_taken;
   reg             id_qu_done_1;
   reg [31:0]      tb_id_qu_pc;
   reg [31:0]      tb_id_qu_din;

   // The items below are put in the issue queue but in separate memories
   // for coding ease and assumed efficient synthesis.
   reg [`BQ_pmr]   id_qu_bmask;
   reg             id_qu_occ;

   /// RR:  Register Read
   //
   parameter       IQ_rr_pac_sze = IQ_elt_sze - 2*PRF_lg - 2*32;
   reg [`PRF_idxr] qu_rr_rs;
   reg [`PRF_idxr] qu_rr_rt;
   reg [`BQ_pmr]   qu_rr_bmask;
   reg [31:0]      tb_qu_rr_pc;
   reg [31:0]      tb_qu_rr_din;
   reg [IQ_rr_pac_sze-1:0] qu_rr_elt;

   reg             qu_rr_occ;

   /// EX:  Execute
   //
   parameter   IQ_ex_pac_sze = IQ_rr_pac_sze - 5 - 32 - 2*3 - 5 - 8;
   reg [4:0]   rr_ex_sa;
   reg [31:0]  rr_ex_imm;
   reg [2:0]   rr_ex_alu_a_src;
   reg [2:0]   rr_ex_alu_b_src;
   reg [4:0]   rr_ex_alu_op;
   reg [7:0]   rr_ex_exc;
   reg [31:0]  tb_rr_ex_pc;
   reg [31:0]  tb_rr_ex_din;
   reg [IQ_ex_pac_sze-1:0] rr_ex_elt;

   reg [31:0]    rr_ex_rs_val;
   reg [31:0]    rr_ex_rt_val;
   reg [`BQ_pmr] rr_ex_bmask;
   reg           rr_ex_occ;

   /// WB:  Writeback
   //
   reg [31:0]    ex_wb_result;
   reg [7:0]     ex_wb_exc;
   reg           ex_wb_occ;

   reg [`ROB_idxr] ex_wb_rob_idx;
   reg             ex_wb_done_1;
   reg             ex_wb_idest;
   reg [`PRF_idxr] ex_wb_dest_p;
   reg [`BQ_idxr]  ex_wb_bq_idx;
   reg             ex_wb_bp;
   reg             ex_wb_bp_taken;

   reg [31:0]      tb_ex_wb_pc;
   reg [31:0]      tb_ex_wb_din;


   ///
   /// INTERSTAGE SIGNALS
   ///
   // Others are declared elsewhere.

   reg        branch_in_id;
   reg        freeze_id;
   reg [1:0]  pc_src;
   wire       iqi_full;

   ///
   /// INSTANTIATION
   ///

   alu our_alu(alu_out, alu_a, alu_b, rr_ex_alu_op);


   ///
   /// FUNCTIONS
   ///

   task unexpected;
      inout [31:0] var;
      input [10:0] control;
      var = 0;
   endtask

   function [`IQI_2r] iqi_mask_2;
      input [1:0] bits;
      input [`IQI_idxr] pos;
      iqi_mask_2 = bits << {pos,1'b0};
   endfunction

   /// Some Memory Connections
   //
   assign     addr_1     = if_pc;
   `ifdef XXX
   assign     addr_2     = ex_me_alu;
   assign     we_2       = ex_me_we;
   assign     size_2     = ex_me_size;
   assign     data_out_2 = ex_me_rt_val;
   `endif


   ///
   ///  Pipeline Flow Control
   ///

   wire squash_if = wb_bp_bad;
   wire squash_id = ex_wb_occ && | ( wb_bp_mask_bad & bmask_ins );
   wire squash_qu = ex_wb_occ && | ( wb_bp_mask_bad & id_qu_bmask );

   wire stall_bq_full = if_id_occ && branch_in_id && bmask_ins[bq_tail_soon];
   wire stall_rob_full = if_id_occ && rob_full;
   wire stall = stall_rob_full || stall_bq_full || freeze_id || iqi_full;

   wire stall_if = stall;
   wire stall_id = stall;
   wire stall_qu = iqi_full;

   wire id_live = if_id_occ && ~squash_id && ~stall_id;
   wire qu_live = id_qu_occ && ~squash_qu && ~stall_qu;


   ///
   /// PIPELINE STARTS HERE
   ///

   ///
   /// IF:  Instruction Fetch
   ///

   wire [31:0] if_npc = if_pc + 4;

   always @( pc_src or if_id_npc or id_pred_path
             or ii or bqe_res_alt_path or if_npc )
     case( pc_src )
       PC_npc  : next_if_pc = if_npc;
       PC_rgn  : next_if_pc = { if_id_npc[31:28], ii, 2'b0 };
       PC_alt  : next_if_pc = bqe_res_alt_path;
       PC_bp   : next_if_pc = id_pred_path;
       //  PC_rs   : next_if_pc = 32'd0; // Update!
       default : `UNEXPECTED(next_if_pc,pc_src);
     endcase

   always @( mem_error_in_1 )
     case( mem_error_in_1 )
       MEM_ERR_none : next_if_id_exc = 0;
       MEM_ERR_seg  : next_if_id_exc = EXC_if_seg;
       MEM_ERR_bus  : next_if_id_exc = EXC_if_bus;
       default      : `UNEXPECTED(next_if_id_exc, mem_error_in_1);
     endcase

   /// Registers Below

   always @( posedge clk )
     if( reset ) begin

        // The value below is the usual entry point for SPIM-compiled
        // code.  Real MIPS processors reset PC to 'hbfc00000.
        if_pc     <= 'h400000;
        tb_if_din <= 1;

     end else if( squash_if || ~stall_if ) begin

        if_pc        <= next_if_pc;
        tb_if_din    <= tb_if_din + 1;

     end


   always @( posedge clk )
     if( reset ) begin

        if_id_occ <= 0;
        if_id_exc <= 0;

     end else if ( stall_id && squash_id ) begin

        if_id_occ <= 0;

     end else if( ~stall_id ) begin

        if_id_pc     <= if_pc;
        if_id_ir     <= data_in_1;
        if_id_npc    <= next_if_pc;
        if_id_exc    <= next_if_id_exc;
        if_id_occ    <= ~squash_if;
        tb_if_id_din <= tb_if_din;

     end


   ///
   /// ID: Instruction Decode
   ///

   // Stage-Local Declarations
   //
   reg [17:0] d_a_op_b;  // Dest <- operand_a op operand_b, immed_fmt
   reg [2:0]  size_we;   // Memory control bits.
   reg [2:0]  immed_fmt; // Formating to apply to immediate.
   reg [2:0]  dest_field;
   reg        extra_op_bit;
   reg [2:0]  alu_a_src_maybe, alu_b_src_maybe;
   reg [7:0]  id_exc;

   assign {opcode,rs,rt,rd,sa,func} = if_id_ir;
   assign ii                        = if_id_ir[25:0];
   assign immed                     = if_id_ir[15:0];

   wire [31:0] rs_p = reg_id_map[rs];
   wire [31:0] rt_p = reg_id_map[rt];
   wire [31:0] rd_p = reg_id_map[rd];

   always @( if_id_ir or if_id_occ or if_id_npc
             or opcode or func or rs or rt or rd or immed )
     begin

        // Note: Case statements below could be synthesized as a memory
        // which is why only constants appear on the RHS of the assignments.
        case( opcode )

          O_rfmt:

            // R-Format Instructions

            case( func )
              F_sll   : d_a_op_b = {WB_rd, SRC_sa, OP_sll, SRC_rt, IMM_x};
              F_srl   : d_a_op_b = {WB_rd, SRC_sa, OP_srl, SRC_rt, IMM_x};
              F_sys   : d_a_op_b = {WB_00, SRC_00, OP_sys, SRC_00, IMM_x};
              F_add   : d_a_op_b = {WB_rd, SRC_rs, OP_add, SRC_rt, IMM_x};
              F_sub   : d_a_op_b = {WB_rd, SRC_rs, OP_sub, SRC_rt, IMM_x};
              F_and   : d_a_op_b = {WB_rd, SRC_rs, OP_and, SRC_rt, IMM_x};
              F_or    : d_a_op_b = {WB_rd, SRC_rs, OP_or,  SRC_rt, IMM_x};
              F_xor   : d_a_op_b = {WB_rd, SRC_rs, OP_xor, SRC_rt, IMM_x};
              default : d_a_op_b = {WB_00, SRC_00, OP_ill, SRC_00, IMM_x};
            endcase

          // I- and J-Format Instructions

          O_lw, O_lbu
                    : d_a_op_b = {WB_rt, SRC_rs, OP_add, SRC_im, IMM_s};
          O_sb      : d_a_op_b = {WB_00, SRC_rs, OP_add, SRC_im, IMM_s};
          O_lui     : d_a_op_b = {WB_rt, SRC_rs, OP_or,  SRC_im, IMM_l};
          O_addi    : d_a_op_b = {WB_rt, SRC_rs, OP_add, SRC_im, IMM_s};
          O_andi    : d_a_op_b = {WB_rt, SRC_rs, OP_and, SRC_im, IMM_u};
          O_ori     : d_a_op_b = {WB_rt, SRC_rs, OP_or,  SRC_im, IMM_u};
          O_slti    : d_a_op_b = {WB_rt, SRC_rs, OP_slt, SRC_im, IMM_s};
          O_j       : d_a_op_b = {WB_00, SRC_00, OP_xxx, SRC_00, IMM_x};
          O_bne     : d_a_op_b = {WB_00, SRC_rs, OP_sne, SRC_rt, IMM_x};
          O_beq     : d_a_op_b = {WB_00, SRC_rs, OP_seq, SRC_rt, IMM_x};
          default   : d_a_op_b = {WB_00, SRC_00, OP_ill, SRC_00, IMM_s};

        endcase

        {  dest_field,
           alu_a_src_maybe,
           extra_op_bit,
           next_id_qu_alu_op,
           alu_b_src_maybe,
           immed_fmt
           }                    = d_a_op_b;

        next_id_qu_alu_a_src = alu_a_src_maybe == SRC_rs && !rs
                               ? SRC_00 : alu_a_src_maybe;

        next_id_qu_alu_b_src = alu_b_src_maybe == SRC_rt && !rt
                               ? SRC_00 : alu_b_src_maybe;

        case( opcode )
          O_lbu   : size_we = {ME_SIZE_1, 1'b0};
          O_lw    : size_we = {ME_SIZE_4, 1'b0};
          O_sb    : size_we = {ME_SIZE_1, 1'b1};
          default : size_we = {ME_SIZE_0, 1'b0};
        endcase

        case( {extra_op_bit,next_id_qu_alu_op} )
          OP_sys  : id_exc = EXC_id_sys;
          OP_ill  : id_exc = EXC_id_ins;
          default : id_exc = EXC_none;
        endcase

        case( opcode )
          O_bne, O_beq: branch_in_id = 1;
          default:      branch_in_id = 0;
        endcase

        case( immed_fmt )
          IMM_s: next_id_qu_imm = { immed[15] ? 16'hffff : 16'h0, immed };
          IMM_l: next_id_qu_imm = { immed, 16'h0 };
          IMM_u: next_id_qu_imm = { 16'h0, immed };
          default: `UNEXPECTED(next_id_qu_imm,immed_fmt);
        endcase

     end

   wire [7:0] next_id_qu_exc = if_id_exc ? if_id_exc : id_exc;

   // exemplar full_case
   always @( dest_field or rd or rt or rd_p or rt_p or rob_t_dest_p )
     case ( dest_field )
       WB_00: begin rob_t_dest_a = 0;  rob_t_incumb = rob_t_dest_p; end
       WB_rd: begin rob_t_dest_a = rd; rob_t_incumb = rd_p;         end
       WB_rt: begin rob_t_dest_a = rt; rob_t_incumb = rt_p;         end
       default: `UNEXPECTED(rob_t_incumb,dest_field);
     endcase

   assign rob_t_dest_p = rob_dstp[rob_tail];

   wire next_id_qu_idest = rob_t_dest_a !== 0;

   assign rob_t_pc       = if_id_pc;
   assign tb_rob_t_din   = tb_if_id_din;
   assign tb_rob_t_idest = next_id_qu_idest;
   assign rob_t_done_1   = ~rob_t_done_2;

   //  next_id_qu_me = size_we;

   reg       id_pred_taken;

   always @( posedge clk ) id_pred_taken <= $random;
   //  always @( posedge clk ) id_pred_taken <= 0;

   always @( opcode or if_id_occ or wb_bp_bad ) begin

      case( 1 )
        wb_bp_bad   : pc_src = PC_alt;
        !if_id_occ  : pc_src = PC_npc;
        default     :
          case( opcode )
            O_bne,
            O_beq   : pc_src = PC_bp;
            O_j     : pc_src = PC_rgn;
            default : pc_src = PC_npc;
          endcase
      endcase

   end

   assign id_taken_path = if_id_npc + {immed[15]?14'h3fff:14'h0,immed,2'b0};

   assign { id_pred_path,  id_alt_path } = id_pred_taken ?
          { id_taken_path, if_npc } :
          { if_npc,        id_taken_path };


   /// Branch Misprediction Recovery
   //

   reg [`BQ_pmr]  next_bmask_ins, next_bmask_reg;

   wire   cdb_mispredict_maybe = ex_wb_bp_taken ^ ex_wb_result[0];

   assign wb_bp_good = ex_wb_occ && ex_wb_bp && !cdb_mispredict_maybe;
   assign wb_bp_bad  = ex_wb_occ && ex_wb_bp &&  cdb_mispredict_maybe;

   assign bq_tail_soon = bq_tail + backup_id_map_sr[0];

   wire   cdb_before_bq = ~bmask_reg[ex_wb_bq_idx];

   assign {bqe_res_alt_path,tb_bqe_res_pc} =
          cdb_before_bq ? {id_bq_alt_path,id_bq_pc} : bq[ex_wb_bq_idx];

   always @( bmask_ins or bmask_reg or ex_wb_occ or ex_wb_bp or ex_wb_bq_idx
             or wb_bp_bad or id_live or backup_id_map_sr[0] or bq_tail_soon
             or bq_tail or qu_live or id_qu_bp or backup_id_map_sr[1] ) begin

      next_bmask_ins = bmask_ins;
      next_bmask_reg = bmask_reg;

      if( id_live && backup_id_map_sr[1] ) next_bmask_ins[bq_tail] = 1;
      if( id_live && backup_id_map_sr[0] ) next_bmask_reg[bq_tail] = 1;

      // Probably won't synthesize well, if at all.
      if( ex_wb_occ & ex_wb_bp )
        begin
           next_bmask_ins[ex_wb_bq_idx] = 0;
           next_bmask_reg[ex_wb_bq_idx] = 0;
        end

      if( wb_bp_bad ) begin:NB

         integer i;
         reg           set_to_zero;

         set_to_zero = ex_wb_bq_idx > bq_tail_soon;

         for(i=0; i<BQ_sze; i=i+1) begin

            if( i == ex_wb_bq_idx + 1 ) set_to_zero = ~ set_to_zero;
            if( i == bq_tail_soon )     set_to_zero = ~ set_to_zero;

            if( set_to_zero ) begin
               next_bmask_ins[i] = 0;
               next_bmask_reg[i] = 0;
            end

         end

      end

   end


   always @( posedge clk )
     if( reset ) begin

        bq_tail <= 0;
        bmask_ins <= 0;
        bmask_reg <= 0;

     end else begin

        if( wb_bp_bad ) begin

           bq_tail <= ex_wb_bq_idx;

        end else if( id_live && backup_id_map_sr[1] ) begin

           bq[bq_tail_soon] <= { id_bq_alt_path, id_bq_pc };

        end else if( id_live && backup_id_map_sr[0] ) begin

           bq_tail <= bq_tail + 1;

        end

        if( id_live && branch_in_id ) begin
           id_bq_alt_path <= id_alt_path;
           id_bq_pc <= if_id_pc;
        end

        bmask_ins <= next_bmask_ins;
        bmask_reg <= next_bmask_reg;

     end


   // ID Register Map

   always @( posedge clk ) begin:IDM

      integer i;

      if( reset ) begin

         for(i=0; i<32; i=i+1) reg_id_map[i] <= i;
         backup_id_map_sr <= 0;

      end else begin:IDME

         reg [`RMV_r] id_map_vec;
         reg          id_map_update, bit_1, good_and_fast;

         good_and_fast = wb_bp_good && cdb_before_bq;
         bit_1 = good_and_fast ? 0 : backup_id_map_sr[1];

         if ( wb_bp_bad ) begin

            backup_id_map_sr <= 0;

         end else if( id_live ) begin

            backup_id_map_sr <= {branch_in_id,bit_1};

         end else if( good_and_fast ) begin

            backup_id_map_sr <= 0;

         end

         if ( backup_id_map_sr[0]
              && id_live 
              && !wb_bp_bad && !good_and_fast 
              ) begin

            // Not checked for synthesis efficiency.
            for(i=0; i<32; i=i+1) id_map_vec = { id_map_vec, reg_id_map[i] };
            reg_id_map_backup[bq_tail] = id_map_vec;

         end

         id_map_update = id_live && next_id_qu_idest;

         if( wb_bp_bad && !cdb_before_bq ) begin

            id_map_vec = reg_id_map_backup[ex_wb_bq_idx];

            // Not checked for synthesis efficiency.
            for(i=0; i<32; i=i+1) begin

               reg_id_map[31-i] <= id_map_vec;
               id_map_vec = id_map_vec >> PRF_lg;

            end
         end else if( id_map_update )
           reg_id_map[rob_t_dest_a] <= rob_t_dest_p;
      end
   end


   // ROB ID
   //

   wire [`ROB_idxr] next_rob_tail = rob_tail + 1;
   assign           rob_full = next_rob_tail == rob_head;

   always @( posedge clk )
     if( reset ) begin:RID
        integer i;

        rob_tail <= 0;

        // Replace with something that doesn't require a complete sweep?
        for(i=0; i<ROB_sze; i=i+1) rob_main[i] <= 32 + i;

     end else begin

        if( wb_bp_bad ) begin:RT

           reg [`ROB_idxr] next_idx;

           next_idx = ex_wb_rob_idx + 1;

           if( rob_tail == next_idx ) begin

              if( id_live ) rob_tail <= next_rob_tail;

           end else begin

             rob_tail <= ex_wb_rob_idx + 2;

           end

        end else begin

           if( id_live ) rob_tail <= next_rob_tail;

        end

        if( id_live ) begin

           rob_main[rob_tail] <= {rob_t_pc, tb_rob_t_din, tb_rob_t_idest,
                                  rob_t_done_1,
                                  rob_t_dest_a,rob_t_incumb};
        end

     end


   always @( posedge clk )
     if( reset || squash_qu ) begin

        id_qu_occ <= 0;
        tb_id_qu_pc <= 0;
        freeze_id <= 0;

     end else if( ~stall_qu ) begin

        freeze_id       <= id_qu_occ && id_qu_exc !== 0;
        id_qu_rs        <= rs_p;
        id_qu_rt        <= rt_p;
        id_qu_sa        <= sa;
        id_qu_imm       <= next_id_qu_imm;
        id_qu_alu_a_src <= next_id_qu_alu_a_src;
        id_qu_alu_b_src <= next_id_qu_alu_b_src;
        id_qu_alu_op    <= next_id_qu_alu_op;
        id_qu_exc       <= next_id_qu_exc;
        id_qu_idest     <= next_id_qu_idest;
        id_qu_dest_p    <= rob_t_dest_p;
        id_qu_rob_idx   <= rob_tail;
        id_qu_bq_idx    <= bq_tail_soon;
        id_qu_bp        <= branch_in_id;
        id_qu_bp_taken  <= id_pred_taken;
        id_qu_done_1    <= rob_t_done_1;
        tb_id_qu_pc     <= if_id_pc;
        tb_id_qu_din    <= tb_if_id_din;

        id_qu_bmask     <= bmask_ins & ~wb_bp_mask_good;
        id_qu_occ       <= id_live;

     end else begin

        id_qu_bmask     <= id_qu_bmask & ~wb_bp_mask_good;

     end


   ///
   /// Q: Issue / Instruction Queue, Schedule,
   /// DS: Dispatch
   ///

   assign iqi_full = &iqi_occ;
   reg  iqi_free_idx_assigned;
   reg [`IQI_idxr] iqi_free_idx;

   always @( iqi_occ ) begin:IQIO

      integer i;

      iqi_free_idx_assigned = 0;
      iqi_free_idx = 0; // Avoid latch.

      for(i=0; i<IQI_sze; i=i+1)
        if( !iqi_free_idx_assigned && !iqi_occ[i] ) begin
           iqi_free_idx_assigned = 1;
           iqi_free_idx = i;
        end

   end

   wire qu_need_rs = id_qu_alu_a_src === SRC_rs;
   wire qu_need_rt = id_qu_alu_b_src === SRC_rt;

   wire [`IQI_2r] iqi_await_update_maybe = pr_to_iqi_await[ex_wb_dest_p];

   wire [`IQI_2r] iqi_await_update
                  = ex_wb_occ && ex_wb_idest ? iqi_await_update_maybe : 0;

   wire [`IQI_2r] iqi_await_rs = pr_to_iqi_await[id_qu_rs];
   wire [`IQI_2r] iqi_await_rt = pr_to_iqi_await[id_qu_rt];

   wire           qu_rs_avail  = prf_avail[id_qu_rs]
                  || ex_wb_occ && ex_wb_idest && ex_wb_dest_p == id_qu_rs;
   wire           qu_rt_avail  = prf_avail[id_qu_rt]
                  || ex_wb_occ && ex_wb_idest && ex_wb_dest_p == id_qu_rt;

   wire [`IQI_2r] await_new_rs
                  = iqi_mask_2( {qu_need_rs & ~qu_rs_avail,1'b0}, iqi_free_idx);
   wire [`IQI_2r] await_new_rt
                  = iqi_mask_2( qu_need_rt & ~qu_rt_avail, iqi_free_idx);
   wire [`IQI_2r] await_new = await_new_rs | await_new_rt;

   reg [`IQI_2r]  pr_to_iqi_flush_mask;

   always @( posedge clk ) begin:IQIF
      integer i;
      
      if( qu_live ) begin

         if( id_qu_idest ) pr_to_iqi_await[id_qu_dest_p] <= 0;
         
         pr_to_iqi_await[id_qu_rs] <= iqi_await_rs | await_new_rs;

         if( id_qu_rs != id_qu_rt )
           pr_to_iqi_await[id_qu_rt] <= iqi_await_rt | await_new_rt;

      end

      // exemplar translate_off
      if( wb_bp_bad )  // For faster simulation.
      // exemplar translate_on
      for(i=0; i<PRF_sze; i=i+1)
        pr_to_iqi_await[i] = pr_to_iqi_await[i] & ~ pr_to_iqi_flush_mask;
      
   end

   wire [`IQI_2r] iqi_await_clr_mask
                  = ~iqi_mask_2({qu_live,qu_live},iqi_free_idx);

   wire [`IQI_2r] iqi_await_next = reset
                      ? 0
                      : iqi_await & ~iqi_await_update & iqi_await_clr_mask
                        | ( qu_live ? await_new : 0 );

   always @( posedge clk ) iqi_await <= iqi_await_next;

   reg iqi_something_ready;
   reg [`IQI_idxr] iqi_ready_idx;

   // exemplar translate_off
   initial begin iqi_ready_idx = 0;  iqi_free_idx = 0; end
   // exemplar translate_on

   reg another_cycle_kludge;
   initial another_cycle_kludge = 0;
   always @( posedge clk ) another_cycle_kludge <= !another_cycle_kludge;

   always @( another_cycle_kludge or iqi_occ or iqi_await ) begin:IQIR

      integer i;

      iqi_something_ready = 0;
      iqi_ready_idx = 0; // Avoid latch.

      for(i=IQI_sze-1; i >= 0; i=i-1) begin

         if( !( $random & SCHED_RDELAY_MASK ) // Randomly skip to shake out bugs.
             && iqi_occ[i]
             && ! ( iqi_await & iqi_mask_2(3,i) ) )

           begin
              iqi_ready_idx = i;
              iqi_something_ready = 1;
           end

      end

   end

   assign wb_bp_mask_bad  = wb_bp_bad <<  ( ex_wb_occ ? ex_wb_bq_idx : 0 );
   assign wb_bp_mask_good = wb_bp_good << ( ex_wb_occ ? ex_wb_bq_idx : 0 );

   wire [`IQI_1r] iqi_remove_mask = iqi_something_ready << iqi_ready_idx;
   wire [`IQI_1r] iqi_insert_mask = qu_live << iqi_free_idx;

   reg [`IQI_1r]  iqi_flush_mask;
   reg            iqi_bmask_update; // A kludge.

   always @( wb_bp_mask_bad or iqi_bmask_update ) begin:FM

      integer i;
      reg     f;

      for(i=0; i<IQI_sze; i=i+1) begin

         f = |( iqi_bmask[i] & wb_bp_mask_bad );
         iqi_flush_mask[i] = f;
         pr_to_iqi_flush_mask[i<<1] = f;
         pr_to_iqi_flush_mask[(i<<1)+1] = f;

      end

   end

   always @( posedge clk )
     if( reset )
       iqi_occ <= 0;
     else
       iqi_occ <= iqi_occ & ~iqi_remove_mask & ~iqi_flush_mask
                  | iqi_insert_mask;

   wire [`IQI_eltr] iqi_ready_elt = iqi[iqi_ready_idx];
   wire [31:0] tb_ds_pc, tb_ds_din;
   assign {tb_ds_pc, tb_ds_din } = iqi_ready_elt;

   always @( posedge clk ) begin:IQIPM

      integer i;

      if( qu_live )
        iqi[iqi_free_idx] <= { id_qu_rs, id_qu_rt, id_qu_sa, id_qu_imm,
                               id_qu_alu_a_src, id_qu_alu_b_src,
                               id_qu_alu_op, id_qu_exc,
                               id_qu_idest, id_qu_dest_p,
                               id_qu_rob_idx,
                               id_qu_bq_idx, id_qu_bp, id_qu_bp_taken,
                               id_qu_done_1, tb_id_qu_pc, tb_id_qu_din };

      iqi_bmask_update <= iqi_bmask_update === 0;

      for(i=0; i<IQI_sze; i=i+1) begin
         if( qu_live && i == iqi_free_idx )
           iqi_bmask[i] <= id_qu_bmask & ~wb_bp_mask_good;
         else
           iqi_bmask[i] <= iqi_bmask[i] & ~wb_bp_mask_good;
      end

   end


   always @( posedge clk )
     if( reset ) begin

        qu_rr_occ <= 0;
        tb_qu_rr_pc <= 0;

     end else begin

        { qu_rr_rs, qu_rr_rt,
          qu_rr_elt,
          tb_qu_rr_pc, tb_qu_rr_din } <= iqi_ready_elt;

        qu_rr_bmask <= iqi_bmask[iqi_ready_idx] & ~wb_bp_mask_good;
        qu_rr_occ <= iqi_something_ready
                     && ! ( iqi_bmask[iqi_ready_idx] & wb_bp_mask_bad );

   end


   ///
   /// Register Read
   ///

   wire [31:0] pr_rs_val = prf_value[qu_rr_rs];
   wire [31:0] pr_rt_val = prf_value[qu_rr_rt];

   always @( posedge clk )
     if( reset ) begin

        rr_ex_occ <= 0;
        tb_rr_ex_pc <= 0;

     end else begin

        {rr_ex_sa, rr_ex_imm,
         rr_ex_alu_a_src, rr_ex_alu_b_src, rr_ex_alu_op, rr_ex_exc,
         rr_ex_elt}  <= qu_rr_elt;
        rr_ex_rs_val <= pr_rs_val;
        rr_ex_rt_val <= pr_rt_val;
        rr_ex_bmask  <= qu_rr_bmask & ~wb_bp_mask_good;
        rr_ex_occ    <= qu_rr_occ && ! ( qu_rr_bmask & wb_bp_mask_bad );
        tb_rr_ex_pc  <= tb_qu_rr_pc;
        tb_rr_ex_din <= tb_qu_rr_din;

     end


   ///
   /// Execute
   ///

   always @( rr_ex_alu_a_src or rr_ex_rs_val or rr_ex_sa  )
     case( rr_ex_alu_a_src )
       SRC_rs: alu_a = rr_ex_rs_val;
       SRC_sa: alu_a = rr_ex_sa;
       SRC_00: alu_a = 0;
       default: `UNEXPECTED(alu_a,rr_ex_alu_a_src);
     endcase

   always @( rr_ex_alu_b_src or rr_ex_rt_val or rr_ex_imm  )
     case( rr_ex_alu_b_src )
       SRC_rt: alu_b = rr_ex_rt_val;
       SRC_im: alu_b = rr_ex_imm;
       SRC_00: alu_b = 0;
       default: `UNEXPECTED(alu_b,rr_ex_alu_b_src);
     endcase

   always @( posedge clk )
     if( reset ) begin

        ex_wb_occ <= 0;
        tb_ex_wb_pc <= 0;

     end else begin

        ex_wb_result <= alu_out;
        ex_wb_exc <= rr_ex_exc;
        tb_ex_wb_pc <= tb_rr_ex_pc;
        tb_ex_wb_din <= tb_rr_ex_din;

        { ex_wb_idest, ex_wb_dest_p,
          ex_wb_rob_idx,
          ex_wb_bq_idx,  ex_wb_bp,  ex_wb_bp_taken,
          ex_wb_done_1
          } <= rr_ex_elt;

        ex_wb_occ <= rr_ex_occ && ! ( rr_ex_bmask & wb_bp_mask_bad );

     end


   ///
   /// Writeback
   ///

   // ROB Complete
   //
   // exemplar translate_off
   integer i;
   initial for(i=0;i<ROB_sze;i=i+1) rob_dexc[i] = 0;
   // exemplar translate_on

   always @( posedge clk )
     if( ex_wb_occ ) rob_dexc[ex_wb_rob_idx] <= {ex_wb_exc,ex_wb_done_1};

   always @( posedge clk )
     if( reset ) begin:PRF

        integer i;

        for(i=0; i<32; i=i+1) prf_avail[i] <= 1;

     end else begin

        if( ex_wb_occ && ex_wb_idest ) begin

           prf_value[ex_wb_dest_p] <= ex_wb_result;
           prf_avail[ex_wb_dest_p] <= 1;

        end

        if( id_qu_occ && id_qu_idest ) prf_avail[id_qu_dest_p] <= 0;

   end


   ///
   /// Commit
   ///

   wire rob_h_complete = !rob_empty && rob_h_done_1 == rob_h_done_2;
   assign rob_h_dest_p = rob_dstp[rob_head];

   always @( posedge clk )
     if( reset ) begin:CM

        integer i;

        for(i=0; i<32; i=i+1) reg_co_map[i] <= i;

        rob_head <= 0;

     end else if( rob_h_complete ) begin

        rob_head <= rob_head + 1;
        if( rob_h_dest_a ) reg_co_map[rob_h_dest_a] <= rob_h_dest_p;

     end

   // ROB Commit
   //
   always @( posedge clk )
     if( reset ) begin:ROBD

        integer i;

        for(i=0; i<ROB_sze; i=i+1) rob_dstp[i] <= 32 + i;

     end else if( rob_h_complete ) begin

        rob_dstp[rob_head] <= rob_h_incumb;

     end

   assign exc = {1'b0,rob_h_complete ? rob_h_exc : EXC_none};


   ///
   /// END OF HARDWARE DESCRIPTION
   ///


   // exemplar translate_off

   ///
   /// CONFIGURATION VALIDITY CHECK
   ///

   initial begin:I1
      reg [`IQI_idxr] test;
      test = IQI_sze - 1;
      if( test !== IQI_sze - 1 ) begin
         $display("Macro IQI_idxr does not match IQI_sze.");
         $stop;
      end
   end

   ///
   /// TESTBENCH INTERFACE CODE
   ///

   reg [31:0] tbi_done_pc;
   reg        tbi_inst_done;

   always @( posedge clk ) tbi_inst_done <= rob_h_complete;
   always @( posedge clk ) tbi_done_pc <= rob_h_pc;

   task tbi_poke_gpr;
      input [5:0] r;
      input [31:0] val;
      if( ^reg_id_map[r] === 1'bx )
        prf_value[r] = val;
      else
        prf_value[reg_id_map[r]] = val;
   endtask

   function [31:0] tbi_peek_gpr;
      input [5:0] r;
      tbi_peek_gpr = prf_value[reg_co_map[r]];
   endfunction

   task tbi_iterate_pipeline_segments;
      output valid;
      output [15:0] name;
      output [31:0] pc;
      output [31:0] din;
      output [7:0] exc;
      output occ;

      reg [88:0] info;
      integer stage;
      reg [`ROB_idxr] rob_idx;

      begin
         if( stage === 32'bx ) begin stage = 0; rob_idx = rob_head; end

         for(valid = 0; stage < 8 && !valid;  valid = occ == 1 ) begin

            case( stage )
              0: info = {"IF",if_pc,   tb_if_din,   next_if_id_exc,1'd1};
              1: info = {"ID",if_id_pc,tb_if_id_din,next_id_qu_exc,if_id_occ};
              2: info = {"Q ",tb_id_qu_pc,tb_id_qu_din,EXC_none,id_qu_occ};
              3: info = {"DS",tb_ds_pc,tb_ds_din,EXC_none,iqi_something_ready};
              4: info = {"RR",tb_qu_rr_pc,tb_qu_rr_din,EXC_none,qu_rr_occ};
              5: info = {"EX",tb_rr_ex_pc,tb_rr_ex_din,EXC_none,rr_ex_occ};
              6: info = {"WB",tb_ex_wb_pc,tb_ex_wb_din,8'b0, ex_wb_occ};
              7: info = {"C ",rob_h_pc,tb_rob_h_din, rob_h_exc, rob_h_complete};
              default `UNEXPECTED(info,stage);
            endcase

            {name,pc,din,exc,occ} = info;
            stage = stage + 1;

         end

         for(valid = valid;
             rob_idx != rob_tail && !valid;
             valid =
             ~ ( din == tb_if_din
                 || din == tb_if_id_din && if_id_occ
                 || din == tb_id_qu_din && id_qu_occ
                 || din == tb_ds_din    && iqi_something_ready
                 || din == tb_qu_rr_din && qu_rr_occ
                 || din == tb_rr_ex_din && rr_ex_occ
                 || din == tb_ex_wb_din && ex_wb_occ
                 || din == tb_rob_h_din && rob_h_complete ) ) begin

            name = "  ";  occ = 1;  exc = 0;
            {pc,din} = rob_main[rob_idx] >> PRF_lg + 5 + 1 + 1;
            rob_idx = rob_idx + 1;

         end

         if( !valid ) stage = 32'bx;

      end

   endtask


   ///
   /// SANITY CHECKS
   ///

   integer tb_iqi_count;

   always @( posedge clk ) if( tb_if_din !== 'bx ) begin:SSC

      integer i;
      reg [0:PRF_sze-1] pr_pending;
      reg [0:PRF_sze-1] pr_in_flight;
      reg [0:PRF_sze-1] pr_ref;
      reg [`ROB_idxr]   pr_ref_who [0:PRF_sze-1];
      reg [`ROB_idxr]   rob_idx;
      reg [`PRF_idxr]   dst, dest_qu_rr, dest_rr_ex;
      reg               done_1,done_2, pending;
      reg [4:0]         dest_a;
      reg [`PRF_idxr]   incumb;

      integer           new_pr_idx;
      reg               avail, idest;
      reg [`IQI_idxr]   rob_to_iq_idx [0:ROB_sze-1];
      reg [`IQI_idxr]   iq_idx;

      reg [`BQ_pmr]     shadow_bmask, compare_bmask;
      reg               qu_rr_pb, rr_ex_pb, pb;
      reg [`BQ_idxr]    qu_rr_bq_idx, rr_ex_bq_idx, bq_idx;

      reg [0:PRF_sze-1]            pr_in_id, pr_in_co;

      new_pr_idx = id_qu_occ && id_qu_idest ? id_qu_dest_p : PRF_sze;

      pr_pending = 0;
      pr_in_flight = 0;

      for(i=0; i<ROB_sze; i=i+1) rob_to_iq_idx[i] = 'bx;

      tb_iqi_count = 0;

      for(i=0; i<IQI_sze; i=i+1) begin
         rob_idx = iqi[i] >> BQ_lg + 1 + 1 + 1 + 2 * 32;
         if( iqi_occ[i] ) begin
            rob_to_iq_idx[rob_idx] = i;
            tb_iqi_count = tb_iqi_count + 1;
         end
      end

      for( rob_idx = rob_head;
           rob_idx != rob_tail;
           rob_idx = rob_idx + 1 ) begin

         dst = rob_dstp[rob_idx];
         {idest,done_1,dest_a,incumb} = rob_main[rob_idx];
         done_2 = rob_dexc[rob_idx];

         pr_in_flight[dst] = idest;
         pending = done_1 ^ done_2;
         pr_pending[dst] = pending && idest;

         avail = dst == new_pr_idx ? 0 : prf_avail[dst];

         if( idest && pending === avail ) $stop;

         iq_idx = rob_to_iq_idx[rob_idx];

         if( pending && iqi_occ[iq_idx] !== 1 ) begin

            dest_qu_rr = qu_rr_elt >> ROB_lg + BQ_lg + 1 + 1 + 1;
            dest_rr_ex = rr_ex_elt >> ROB_lg + BQ_lg + 1 + 1 + 1;
            if( ( !id_qu_occ || id_qu_dest_p !== dst )
                && ( ! qu_rr_occ || dest_qu_rr !== dst )
                && ( ! rr_ex_occ || dest_rr_ex !== dst )
                && ( ! ex_wb_occ || ex_wb_dest_p !== dst ) ) $stop;

         end

      end

      shadow_bmask = 0;
      compare_bmask = bmask_ins;
      if( backup_id_map_sr[1] ) begin
         compare_bmask[bq_tail] = 1;
         if( bmask_ins[bq_tail] ) $stop;
      end


      for(i=0; i<IQI_sze; i=i+1) if( iqi_occ[i] ) begin:SSC1

         reg await_rs, await_rt, rs_avail, rt_avail;
         reg should_await_rs, should_await_rt;
         reg [`PRF_idxr] rs, rt;
         reg [2:0]       alu_a_src, alu_b_src;

         {rs, rt} = iqi[i] >> IQ_elt_sze - 2 * PRF_lg;
         {alu_a_src, alu_b_src} = iqi[i] >> IQ_elt_sze - 2*PRF_lg - 43;

         {await_rs,await_rt} = iqi_await >> {i,1'b0};

         rs_avail = prf_avail[rs];
         rt_avail = prf_avail[rt];

         if( await_rs && rs_avail ) $stop;
         if( await_rt && rt_avail ) $stop;

         should_await_rs = alu_a_src === SRC_rs && !rs_avail;
         if( should_await_rs !== await_rs ) $stop;
         should_await_rt = alu_b_src === SRC_rt && !rt_avail;
         if( should_await_rt !== await_rt ) $stop;

         if( await_rs && !pr_in_flight[rs] ) $stop;
         if( await_rs && !pr_pending[rs] ) $stop;
         if( await_rt && !pr_in_flight[rt] ) $stop;
         if( await_rt && !pr_pending[rt] ) $stop;

         {bq_idx,pb} = iqi[i] >> 1 + 1 + 2 * 32;
         if( pb && shadow_bmask[bq_idx] ) $stop;
         if( pb ) shadow_bmask[bq_idx] = 1;
         if( pb && compare_bmask[bq_idx] !== 1 ) $stop;

      end

      // More Branch Queue

      {qu_rr_bq_idx,qu_rr_pb} = qu_rr_elt >> 2;
      {rr_ex_bq_idx,rr_ex_pb} = rr_ex_elt >> 2;

   `define CHECK(occ,bp,idx) if((occ)&&(bp))begin \
      if(shadow_bmask[idx])$stop;\
         if(compare_bmask[idx]!==1)$stop;shadow_bmask[idx]=1; end

      //`CHECK(id_qu_occ,id_qu_bp,id_qu_bq_idx)
      `CHECK(qu_rr_occ,qu_rr_pb,qu_rr_bq_idx)
      `CHECK(rr_ex_occ,rr_ex_pb,rr_ex_bq_idx)
      `CHECK(ex_wb_occ,ex_wb_bp,ex_wb_bq_idx)

   `undef CHECK

      if( backup_id_map_sr[1] ) shadow_bmask[bq_tail] = 1;

      if( shadow_bmask !== compare_bmask ) $stop;

      // Register Maps

      pr_in_id = 0;
      pr_in_co = 0;
      pr_ref = 0;

      for(i=0; i<32; i=i+1) begin
         if( pr_in_id[reg_id_map[i]] ) $stop;
         pr_in_id[reg_id_map[i]] = 1;
         if( pr_in_co[reg_co_map[i]] ) $stop;
         pr_in_co[reg_co_map[i]] = 1;
      end

      for( rob_idx = rob_head;
           rob_idx != rob_tail;
           rob_idx = rob_idx + 1 ) begin

         dst    = rob_dstp[rob_idx];
         {idest,done_1,dest_a,incumb} = rob_main[rob_idx];

         if( pr_ref[dst] ) $stop;
         pr_ref[dst] = 1;
         pr_ref_who[dst] = rob_idx;

         if( pr_in_co[dst] ) $stop;
         if( !idest && pr_in_id[dst] ) $stop;

         if( idest && pr_in_id[incumb] ) $stop;

      end

      for( rob_idx = rob_idx; // [sic]
           rob_idx != rob_head;
           rob_idx = rob_idx + 1 ) begin

         dst = rob_dstp[rob_idx];
         if( pr_ref[dst] ) begin
            $display("That's who:%d",pr_ref_who[dst]); $stop;
         end
         pr_ref[dst] = 1;
         pr_ref_who[dst] = rob_idx;

         if( {pr_in_id[dst],pr_in_co[dst]} !== 'b00 ) $stop;

      end

   end

   always @( posedge clk ) if( iqi_something_ready ) begin:DB1

      reg [`PRF_idxr] rs, rt;
      reg [2:0] alu_a_src, alu_b_src;

      {rs, rt} = iqi[iqi_ready_idx] >> IQ_elt_sze - 2 * PRF_lg;
      {alu_a_src, alu_b_src} = iqi[iqi_ready_idx] >> IQ_elt_sze - 2*PRF_lg - 43;

      if( alu_a_src === SRC_rs ) begin
         if( rs === 'bx ) $stop;
         if( prf_avail[rs] !== 1 ) $stop;
         if( prf_value[rs] === 'bx ) $stop;
      end
      if( alu_b_src === SRC_rt ) begin
         if( rt === 'bx ) $stop;
         if( prf_avail[rt] !== 1 ) $stop;
         if( prf_value[rt] === 'bx ) $stop;
      end
   end


   // exemplar translate_on

endmodule


module reg_file(data_out_1, data_out_2, addr_1, addr_2,
                addr_3, data_in_3, clk);
   input [4:0] addr_1, addr_2, addr_3;
   input [31:0] data_in_3;
   input        clk;
   output [31:0] data_out_1, data_out_2;

   reg [31:0]    storage [0:31];

   assign data_out_1 = addr_1 && addr_1 == addr_3 ? data_in_3 : storage[addr_1];
   assign data_out_2 = addr_2 && addr_2 == addr_3 ? data_in_3 : storage[addr_2];

   always @( posedge clk ) if( addr_3 ) storage[addr_3] <= data_in_3;

endmodule


module alu(alu_out,alu_a,alu_b,alu_op);
   output [31:0] alu_out;
   input [31:0]  alu_a, alu_b;
   input [4:0]   alu_op;

   reg [31:0]    alu_out;

   task unexpected;
      inout [31:0] var;
      input [10:0] control;
      var = 0;
   endtask

   // Control Signal Value Names
   parameter  OP_xxx = 5'h0;  // Don't care.
   parameter  OP_add = 5'h0;
   parameter  OP_sll = 5'h1;
   parameter  OP_srl = 5'h2;
   parameter  OP_xor = 5'h3;
   parameter  OP_sub = 5'h4;
   parameter  OP_or  = 5'h5;
   parameter  OP_and = 5'h6;
   parameter  OP_slt = 5'h7;
   parameter  OP_seq = 5'h8;
   parameter  OP_b   = 5'h9;
   parameter  OP_sne = 5'ha;

   always @( alu_a or alu_b or alu_op )
     case( alu_op )
       OP_add  : alu_out = alu_a + alu_b;
       OP_and  : alu_out = alu_a & alu_b;
       OP_or   : alu_out = alu_a | alu_b;
       OP_xor  : alu_out = alu_a ^ alu_b;
       OP_sub  : alu_out = alu_a - alu_b;
       OP_slt  : alu_out = {alu_a[31],alu_a} < {alu_b[31],alu_b};
       OP_sll  : alu_out = alu_b << alu_a[4:0];
       OP_srl  : alu_out = alu_b >> alu_a[4:0];
       OP_seq  : alu_out = alu_a == alu_b;
       OP_sne  : alu_out = alu_a != alu_b;
       OP_b    : alu_out = alu_b;
       default : `UNEXPECTED(alu_out,alu_op);
     endcase

endmodule

// exemplar translate_off

module system_p1(exc,reset,clk);
   input reset,clk;
   output [7:0] exc;

   wire [31:0] cpu_data_out_2, addr_1, addr_2, mem_data_out_1, mem_data_out_2;
   wire [2:0]  mem_err_out_1, mem_err_out_2;
   wire [1:0]  size_2;
   wire        we_2;


   cpu_p1 cpu1(exc,
               cpu_data_out_2, addr_1, addr_2, size_2, we_2,
               mem_data_out_1, mem_data_out_2,
               mem_err_out_1,mem_err_out_2,
               reset,clk);

   memory_2p m1( mem_data_out_1, mem_err_out_1, addr_1,
                 mem_data_out_2 ,mem_err_out_2, addr_2, size_2, we_2,
                 cpu_data_out_2,
                 clk);

endmodule

// A makefile would better...
`include "mips_memory.v"

`include "mipspipetb.v"