/// LSU EE 4720 -- Spring 2002 -- Computer Architecture
//
/// Pipelined Hardwired Control MIPS with Bypassing and FP
//
// Implements a small subset of MIPS32 instructions and features.

//
// Time-stamp: <18 March 2002, 10:33:03 CST, koppel@sol>
//

/// Limitations
//
//  This code will probably never implement all of MIPS32, but the
//  limitations below may be addressed at some time.
//
//  Omits division and other FP instructions.
//  Does not handle double-precision numbers.
//  Does not perform format conversions.


 /// Specify Program to Load
//
// A quick-and-dirty method of loading a program is used. (For now.)
// Therefore the description must be re-compiled each time the program
// is changed.  (The program is assembled by SPIM and some perl code.)
//
`define MIPS_PROG "fptest.v"



 /// Unexpected Macro
//
// Used in the default case item of case statements when the default
// item should not be executed.  Used in case statements that assign
// the same register in each item.  If the default item is executed
// (indicating a programming bug) simulation is stopped and hopefully
// the bug is fixed.  For synthesis, the unexpected macro would assign
// the variable, avoiding the synthesis of a latch.  Alas, Leonardo
// Spectrum does not recognize macros with parameters so a task is
// used to assign the variable.  That task must be included in each
// module. (If Leonardo understood hierarchical references, the task
// could be put in a special utility module, but it can't.)

`ifdef exemplar
 `define UNEXPECTED unexpected
`else
 `define UNEXPECTED(var,ctrl) begin var = 0; if( ctrl[0] !== 1'bx ) $stop; end 0:
`endif

   
/// 
/// MIPS Processor 
///
   
module cpu_p1(exc,data_out_2, addr_1,addr_2,size_2,we_2,
           data_in_1,data_in_2, mem_error_in_1,mem_error_in_2,reset,clk);
   input [31:0] data_in_1, data_in_2;
   input [2:0]  mem_error_in_1, mem_error_in_2;
   input reset,clk;
   output [7:0] exc;
   output [31:0] data_out_2, addr_1, addr_2;
   output [1:0]  size_2;
   output        we_2;

   task unexpected;
      inout [31:0] var;
      input [10:0] control;
      var = 0;
   endtask

   ///
   /// MIPS CONSTANTS
   ///
   //
   // Defined by the ISA

   /// "opcode" Field Values
   //
   parameter  O_rfmt = 6'h0;
   parameter  O_j    = 6'h2;
   parameter  O_beq  = 6'h4;
   parameter  O_bne  = 6'h5;
   parameter  O_addi = 6'h8;
   parameter  O_slti = 6'ha;
   parameter  O_andi = 6'hc;
   parameter  O_ori  = 6'hd;
   parameter  O_lui  = 6'hf;
   parameter  O_cop1 = 6'h11;
   parameter  O_lw   = 6'h23;
   parameter  O_lbu  = 6'h24;
   parameter  O_sw   = 6'h2b;
   parameter  O_sb   = 6'h28;
   parameter  O_lwc1 = 6'h31;

   /// Not-So-Special Special function Field Values
   //
   parameter  F_sll = 6'h0;
   parameter  F_srl = 6'h2;
   parameter  F_sys = 6'hc;
   parameter  F_add = 6'h20;
   parameter  F_sub = 6'h22;
   parameter  F_and = 6'h24;
   parameter  F_or  = 6'h25;
   parameter  F_xor = 6'h26;

   /// Coprocessor 1 (FP) function Field Values
   //
   parameter  C1_add = 6'h0;
   parameter  C1_sub = 6'h1;
   parameter  C1_mul = 6'h2;

   /// Coprocessor 1 (FP) fmt Field Values
   //
   parameter  FMT_S = 5'o20;
   parameter  FMT_D = 5'o21;
   parameter  FMT_W = 5'o24;
   

   ///
   /// IMPLEMENTATION CONSTANTS
   ///
   //
   // Defined for this implementation, not standardized.  These codes
   // are used by other modules in this implementation, so they should
   // be changed consistently.

   /// Processor Exception Codes
   //
   parameter   EXC_none   = 8'd0;
   parameter   EXC_if_bus = 8'd1; // Bus Error (Mis-aligned address.)
   parameter   EXC_if_seg = 8'd2; // Bad Address
   parameter   EXC_id_ins = 8'd3; // Illegal (Reserved) Instruction
   parameter   EXC_id_sys = 8'd5; // Syscall Instruction
   parameter   EXC_me_bus = 8'd6; // Bus Error (Mis-aligned address.)
   parameter   EXC_me_seg = 8'd7; // Bad Address
   //
   // The exception codes above are not MIPS codes, though they are
   // similar.  (For one thing, MIPS does not use the bus exception
   // for a misaligned address.)  For the real codes see Table 6-17 in
   //http://www.ece.lsu.edu/ee4720/mips32v3.pdf

   
   /// ALU Operations + Extra Bit
   //
   // The lower 5 bits of the codes below are used by the ALU, the
   // sixth bit is used for special cases and is removed in the ID
   // stage.
   //
   parameter  OP_xxx = 6'h0;  // Don't care.
   parameter  OP_add = 6'h0;
   parameter  OP_sll = 6'h1;
   parameter  OP_srl = 6'h2;
   parameter  OP_xor = 6'h3;
   parameter  OP_sub = 6'h4;
   parameter  OP_or  = 6'h5;
   parameter  OP_and = 6'h6;
   parameter  OP_slt = 6'h7;
   parameter  OP_seq = 6'h8;
   parameter  OP_b   = 6'h9;
   parameter  OP_ill = 6'h20;  // Illegal Instruction
   parameter  OP_sys = 6'h21;  // Syscall

   /// Memory Exception Codes
   //
   parameter   MEM_ERR_none = 0;
   parameter   MEM_ERR_bus  = 1;   // Bad alignment.
   parameter   MEM_ERR_seg  = 2;   // Bad address.


   ///
   /// MODULE CONSTANTS
   ///
   //
   // Defined for this module, intended to improve readability of code.

   /// PC Mux
   //
   parameter  PC_npc = 2'd0;
   parameter  PC_dsp = 2'd1;  // Displacement (Branches)
   parameter  PC_rgn = 2'd2;  // Region (Jumps)
   parameter  PC_rs  = 2'd3;

   /// ALU Muxen
   //
   parameter  SRC_xx = 3'd4; // Won't be used, avoids stalls on alu_a and alu_b.

   parameter  SRC_me = 3'd1;
   parameter  SRC_wb = 3'd2;

   parameter  SRC_rs = 3'd3;
   parameter  SRC_sa = 3'd4;
   
   parameter  SRC_rt = 3'd3;
   parameter  SRC_im = 3'd4;
   parameter  SRC_np = 3'd5;


   /// Register Number to Write Back
   //
   parameter  WB_00 = 3'd0;
   parameter  WB_rd = 3'd1;
   parameter  WB_rt = 3'd2;
   parameter  WB_ft = 3'd3;

   /// Immediate Formatting
   //
   parameter  IMM_x = 3'd0; // Don't care.
   parameter  IMM_s = 3'd0;
   parameter  IMM_u = 3'd1;
   parameter  IMM_l = 3'd2;

   /// Memory Access Size
   //
   parameter  ME_SIZE_0 = 2'd0;
   parameter  ME_SIZE_1 = 2'd1;
   parameter  ME_SIZE_2 = 2'd2;
   parameter  ME_SIZE_4 = 2'd3;
   parameter  ME_CONTROL_nop = { ME_SIZE_0, 1'd0 };

   /// FP Stuff
   //
   parameter INT_stages = 2;
   parameter AFU_stages = 4;
   parameter MFU_stages = 6;
   //                          FU  WB  DST Ver PC   DIN
   parameter FPRR_entry_size = 3 + 1 + 5 + 3 + 32 + 32;
`define FPRR_range FPRR_entry_size*(MFU_stages+2)-1:0
`define FPRR_add  FPRR_entry_size*(AFU_stages+2)-1:FPRR_entry_size*(AFU_stages+1)
`define FPRR_mul  FPRR_entry_size*(MFU_stages+2)-1:FPRR_entry_size*(MFU_stages+1)
`define FPRR_int  FPRR_entry_size*(INT_stages+2)-1:FPRR_entry_size*(INT_stages+1)
`define FPRR_neck FPRR_entry_size*2-1:FPRR_entry_size
`define FPRR_head FPRR_entry_size-1:0


   // Reservation Register Entries.
   //
   // Specifies which functional unit will be writing which register file.
   // FP functional units always write FP registers, so the register
   // file is omitted.  (Only the last few entries explicitly specify
   // a register file.)
   //
   parameter FP_no  = 3'd0;  // No instruction writing back.
   parameter FP_add = 3'd1;
   parameter FP_mul = 3'd2;
   parameter FP_div = 3'd3;
   parameter FP_itf = 3'd4;  // Integer pipeline to FP register.
   parameter FP_iti = 3'd5;  // Integer pipeline to integer register.

   parameter FOP_xxx = 6'd0;
   parameter FOP_add = 6'd0;
   parameter FOP_sub = 6'd2;
   parameter FOP_mul = 6'd3;

   parameter FWB_00  = 2'd0; // No fp register written
   parameter FWB_fd  = 2'd1;
   parameter FWB_rt  = 2'd2;

   parameter FSRC_xxx   = 2'd0;
   parameter FSRC_fpr   = 2'd0;
   parameter FSRC_wb    = 2'd1;
   parameter FSRC_stall = 2'd2;


   ///
   /// DECLARATIONS
   ///

   /// Pipeline Latches and Some Pipeline-Latch Inputs
   //
   reg [31:0] if_pc, next_if_pc;
   reg [31:0] if_id_ir;
   reg [31:0] if_id_npc, if_id_pc;
   reg [7:0]  if_id_exc, next_if_id_exc;
   reg        if_id_occ;
   reg [31:0] tb_if_din, tb_if_id_din;

   reg [31:0] id_ex_npc, id_ex_pc;
   reg [31:0] id_ex_rs_val, id_ex_fs_val;
   reg [4:0]  id_ex_sa,        next_id_ex_sa;
   reg [31:0] id_ex_imm,       next_id_ex_imm;
   reg [4:0]  id_ex_dst,       next_id_ex_dst;
   reg [2:0]  id_ex_alu_a_src, next_id_ex_alu_a_src;
   reg [4:0]  id_ex_alu_op,    next_id_ex_alu_op;
   reg [2:0]  id_ex_alu_b_src, next_id_ex_alu_b_src;
   reg [2:0]  id_ex_me,        next_id_ex_me;
   reg [7:0]  id_ex_exc,       next_id_ex_exc;
   reg [31:0] id_ex_rt_val, id_ex_ft_val;
   reg [2:0]  id_fu_needed;
   reg [5:0]  id_ex_fp_op,     next_id_ex_fp_op;
   reg [1:0]  id_ex_fs_src,    next_id_ex_fs_src;
   reg [1:0]  id_ex_ft_src,    next_id_ex_ft_src;
   reg [2:0]  id_next_ver;
   reg        id_ex_occ;
   reg        id_fp_op_wb;
   reg [31:0] tb_id_ex_din;
   reg [`FPRR_range] fp_res_reg;

   reg [31:0] ex_me_npc, ex_me_pc;
   reg [31:0] ex_me_alu;
   reg [31:0] ex_me_rt_val; 
   reg [1:0]  ex_me_size;
   reg        ex_me_we;
   reg [4:0]  ex_me_dst;
   reg [7:0]  ex_me_exc;
   wire [7:0] next_ex_me_exc;
   reg [31:0] tb_ex_me_din;
   reg        ex_me_occ;

   reg [31:0] me_wb_pc;
   reg [31:0] me_wb_npc;
   reg [31:0] me_wb_alu;
   reg [31:0] me_wb_md;
   reg [4:0]  me_wb_dst;
   reg [31:0] tb_me_wb_din;
   reg        me_wb_occ;
   reg [7:0]  me_wb_exc, next_me_wb_exc;
   reg        me_wb_from_mem;

   /// Interstage Signals
   //
   reg        hold_id;
   wire       squash_if_id, squash_id;
   reg [1:0]  pc_src;
   wire       load_in_ex;

   /// Instruction Fields
   //
   reg [4:0]  rs, rt, rd, sa, fs, ft, fd, fmt;
   reg [5:0]  opcode, func;
   reg [25:0] ii;
   reg [15:0] immed;

   /// Some ALU AND GPR Connections
   //
   wire [31:0] alu_out;
   reg [31:0]  alu_a, alu_b;
   wire [31:0] gpr_rs_val, gpr_rt_val, gpr_write_val;
   
   /// Some Memory Connections
   //
   assign     addr_1     = if_pc;
   assign     addr_2     = ex_me_alu;
   assign     we_2       = ex_me_we;
   assign     size_2     = ex_me_size;
   assign     data_out_2 = ex_me_rt_val;


   ///
   /// INSTANTIATIONS
   ///
   
   alu our_alu(alu_out, alu_a, alu_b, id_ex_alu_op);
   reg_file gpr(gpr_rs_val, gpr_rt_val, rs, rt, me_wb_dst, gpr_write_val, clk);

   reg [31:0] fpr [0:31];
   reg [2:0]  fpr_id_ver [0:31];
   reg [2:0]  fpr_wb_ver [0:31];

   wire        wn_fpr_write;
   
   reg [31:0]  fpr_write_val;
   reg         wb_fpr_write;

   wire [2:0]  wb_unit, wb_ver;
   wire [4:0]  wb_fp_dst;
   wire [31:0] wb_fp_pc, tb_wb_din;
   wire        wb_fpr_writer;
   assign {wb_unit,wb_fpr_writer,wb_fp_dst,wb_ver,wb_fp_pc,tb_wb_din}
          = fp_res_reg[`FPRR_head];

   wire [2:0]  wn_unit, wn_ver;  // Write Next (or neck)
   wire [4:0]  wn_fp_dst;
   wire [31:0] wn_fp_pc, tb_wn_din;
   wire        wn_fpr_writer;
   assign {wn_unit,wn_fpr_writer,wn_fp_dst,wn_ver,wn_fp_pc,tb_wn_din}
          = fp_res_reg[`FPRR_neck];

   wire [`FPRR_head] fp_res_pos_add = fp_res_reg[`FPRR_add];
   wire [`FPRR_head] fp_res_pos_mul = fp_res_reg[`FPRR_mul];
   wire [`FPRR_head] fp_res_pos_int = fp_res_reg[`FPRR_int];

   wire [31:0] fpr_ft_val 
               = wb_fpr_write && wb_fp_dst == ft ? fpr_write_val : fpr[ft];
   wire [31:0] fpr_fs_val 
               = wb_fpr_write && wb_fp_dst == fs ? fpr_write_val : fpr[fs];
   

   ///
   /// PIPELINE STARTS HERE
   ///

   ///   
   /// Instruction Fetch
   ///
   always @( if_pc or pc_src or ex_me_alu or if_id_npc or immed
             or gpr_rs_val or ii )
     case( pc_src )
       PC_npc  : next_if_pc = if_pc + 4;
       PC_dsp  : next_if_pc = if_id_npc + {immed[15]?14'h3fff:14'h0,immed,2'b0};
       PC_rgn  : next_if_pc = { if_id_npc[31:28], ii, 2'b0 };
       PC_rs   : next_if_pc = gpr_rs_val;
       default : `UNEXPECTED(next_if_pc,pc_src);
     endcase

   always @( mem_error_in_1 )
     case( mem_error_in_1 )
       MEM_ERR_none : next_if_id_exc = 0;
       MEM_ERR_seg  : next_if_id_exc = EXC_if_seg;
       MEM_ERR_bus  : next_if_id_exc = EXC_if_bus;
       default      : `UNEXPECTED(next_if_id_exc, mem_error_in_1);
     endcase

   always @( posedge clk )
     if( reset ) 
       begin
          if_id_occ <= 0;
          // The value below is the usual entry point for SPIM-compiled
          // code.  Real MIPS processors reset PC to 'hbfc00000.
          if_pc     <= 'h400000;
          tb_if_din <= 1;
       end
     else if( ~hold_id || squash_if_id ) 
       begin
          if_id_pc  <= if_pc;
          if_pc     <= next_if_pc;
          if_id_ir  <= data_in_1;
          if_id_npc <= next_if_pc;
          if_id_exc <= next_if_id_exc;
          if_id_exc <= {5'b0, mem_error_in_1};
          if_id_occ <= ~squash_if_id;
          tb_if_id_din <= tb_if_din;
          tb_if_din <= tb_if_din + 1;

       end


   ///
   /// Instruction Decode
   ///

   // Stage-Local Declarations
   //
   reg [17:0] d_a_op_b;  // Dest <- operand_a op operand_b, immed_fmt
   reg [2:0]  size_we;   // Memory control bits.
   reg [2:0]  immed_fmt; // Formating to apply to immediate.
   reg [2:0]  dest_field;
   reg [1:0]  fdest_field;
   reg        extra_op_bit;
   reg        op_rs, op_rt;
   reg        cant_bypass;
   reg        branch_in_id;
   reg [2:0]  alu_a_src_maybe, alu_b_src_maybe;
   reg [2+3+6+3-1:0] fp_info;
   reg [2:0]  runit, rver;
   reg        rwb;
   reg [31:0] rpc, rdin;
   reg [4:0]  rdst, id_fp_dst;
   reg        fp_hold_id;
   reg        fp_op_fs, fp_op_ft;
   reg [2:0]  cur_ver;
   reg        illegal_format;

   wire [2:0] fp_op_fs_ver    = fpr_id_ver[fs];
   wire [2:0] fp_op_ft_ver    = fpr_id_ver[ft];
   wire [2:0] fp_op_fs_wb_ver = fpr_wb_ver[fs];
   wire [2:0] fp_op_ft_wb_ver = fpr_wb_ver[ft];
   wire [2:0] id_incumb_ver   = fpr_id_ver[id_fp_dst];

   always @( if_id_ir or id_ex_dst or ex_me_dst or me_wb_dst
             or squash_id or if_id_occ or if_id_npc or load_in_ex
             or fp_op_fs_ver or fp_op_ft_ver
             or fp_op_fs_wb_ver or fp_op_ft_wb_ver
             or fp_res_pos_add or fp_res_pos_mul or fp_res_pos_int
             or wn_ver or wn_fp_dst or id_incumb_ver )

     begin

        {opcode,rs,rt,rd,sa,func} = if_id_ir;
        {fmt,ft,fs,fd}            = {rs,rt,rd,sa};
        ii                        = if_id_ir[25:0];
        immed                     = if_id_ir[15:0];

        // Note: Case statements below could be synthesized as a memory
        // which is why only constants appear on the RHS of the assignments.
        case( opcode )

          O_rfmt:

            // R-Format Instructions

            case( func )
              F_sll   : d_a_op_b = {WB_rd, SRC_sa, OP_sll, SRC_rt, IMM_x};
              F_sys   : d_a_op_b = {WB_00, SRC_xx, OP_sys, SRC_xx, IMM_x};
              F_add   : d_a_op_b = {WB_rd, SRC_rs, OP_add, SRC_rt, IMM_x};
              F_sub   : d_a_op_b = {WB_rd, SRC_rs, OP_sub, SRC_rt, IMM_x};
              F_and   : d_a_op_b = {WB_rd, SRC_rs, OP_and, SRC_rt, IMM_x};
              F_or    : d_a_op_b = {WB_rd, SRC_rs, OP_or,  SRC_rt, IMM_x};
              F_xor   : d_a_op_b = {WB_rd, SRC_rs, OP_xor, SRC_rt, IMM_x};
              default : d_a_op_b = {WB_00, SRC_rs, OP_ill, SRC_rt, IMM_x};
            endcase

          O_cop1      : d_a_op_b = {WB_00, SRC_xx, OP_xxx, SRC_xx, IMM_x};

          // I- and J-Format Instructions

          O_lw, O_lbu 
                    : d_a_op_b = {WB_rt, SRC_rs, OP_add, SRC_im, IMM_s};
          O_lwc1    : d_a_op_b = {WB_ft, SRC_rs, OP_add, SRC_im, IMM_s};
          O_sb      : d_a_op_b = {WB_00, SRC_rs, OP_add, SRC_im, IMM_s};
          O_lui     : d_a_op_b = {WB_rt, SRC_rs, OP_or,  SRC_im, IMM_l};
          O_addi    : d_a_op_b = {WB_rt, SRC_rs, OP_add, SRC_im, IMM_s};
          O_andi    : d_a_op_b = {WB_rt, SRC_rs, OP_and, SRC_im, IMM_u};
          O_ori     : d_a_op_b = {WB_rt, SRC_rs, OP_or,  SRC_im, IMM_u};
          O_slti    : d_a_op_b = {WB_rt, SRC_rs, OP_slt, SRC_im, IMM_s};
          O_j       : d_a_op_b = {WB_00, SRC_xx, OP_xxx, SRC_xx, IMM_x};
          O_bne, O_beq 
                    : d_a_op_b = {WB_00, SRC_xx, OP_xxx, SRC_xx, IMM_x};
          default   : d_a_op_b = {WB_00, SRC_rs, OP_ill, SRC_im, IMM_s};

        endcase

        {  dest_field,
           alu_a_src_maybe,
           extra_op_bit,
           next_id_ex_alu_op,
           alu_b_src_maybe,
           immed_fmt 
           }                    = d_a_op_b;

        if( opcode == O_cop1 ) begin
           case( func )
             C1_add: fp_info = { FWB_fd, FP_add, FOP_add, 3'b111 };
             C1_sub: fp_info = { FWB_fd, FP_add, FOP_sub, 3'b111 };
             C1_mul: fp_info = { FWB_fd, FP_mul, FOP_mul, 3'b111 };
             default: `UNEXPECTED(fp_info, func);
           endcase
        end else if( dest_field == WB_ft )
          fp_info = { FWB_rt, FP_itf, FOP_xxx, 3'b100 };
        else
          fp_info = { FWB_00, FP_iti, FOP_xxx, 3'b000 };

        {fdest_field, id_fu_needed, next_id_ex_fp_op, id_fp_op_wb,
         fp_op_fs, fp_op_ft} = fp_info;

        case( fdest_field )
          FWB_fd: id_fp_dst = fd;
          FWB_rt: id_fp_dst = rt;
          default: id_fp_dst = rt; // Dummy
        endcase

        case( id_fu_needed )
          FP_add, FP_mul: illegal_format = fmt != FMT_S;
          default:        illegal_format = 0;
        endcase
        
        case( id_fu_needed )
          FP_add : {runit,rwb,rdst,rver,rpc,rdin} = fp_res_pos_add;
          FP_mul : {runit,rwb,rdst,rver,rpc,rdin} = fp_res_pos_mul;
          FP_iti,
          FP_itf : {runit,rwb,rdst,rver,rpc,rdin} = fp_res_pos_int;
          FP_no  : {runit,rwb,rdst,rver,rpc,rdin} = 0;
          default : begin $stop; {runit,rwb,rdst,rver,rpc,rdin} = 0;  end
        endcase

        case( 1 )
          !fp_op_fs:      next_id_ex_fs_src = FSRC_xxx;
          fp_op_fs_ver == fp_op_fs_wb_ver:
                          next_id_ex_fs_src = FSRC_fpr;
          {fs,fp_op_fs_ver} == {wn_fp_dst,wn_ver}:
                          next_id_ex_fs_src = FSRC_wb;
          1:              next_id_ex_fs_src = FSRC_stall;
        endcase

        case( 1 )
          !fp_op_ft:      next_id_ex_ft_src = FSRC_xxx;
          fp_op_ft_ver == fp_op_ft_wb_ver:
                          next_id_ex_ft_src = FSRC_fpr;
          {ft,fp_op_ft_ver} == {wn_fp_dst,wn_ver}:
                          next_id_ex_ft_src = FSRC_wb;
          1:              next_id_ex_ft_src = FSRC_stall;
        endcase

        id_next_ver = id_incumb_ver + 1;

        fp_hold_id = runit != FP_no
                     || next_id_ex_fs_src == FSRC_stall
                     || next_id_ex_ft_src == FSRC_stall;

        case( opcode )
          O_lbu        : size_we = {ME_SIZE_1, 1'b0};
          O_lw, O_lwc1 : size_we = {ME_SIZE_4, 1'b0};
          O_sb         : size_we = {ME_SIZE_1, 1'b1};
          default      : size_we = {ME_SIZE_0, 1'b0};
        endcase

        case( {extra_op_bit,next_id_ex_alu_op} )
          OP_sys  : next_id_ex_exc = EXC_id_sys;
          OP_ill  : next_id_ex_exc = EXC_id_ins;
          default : next_id_ex_exc = illegal_format ? EXC_id_ins : EXC_none;
        endcase

        case( opcode )
          O_bne, O_beq: {op_rs, op_rt} = 2'd3;
          O_sb:         {op_rs, op_rt} = 2'd3;
          default:      {op_rs, op_rt} = {alu_a_src_maybe == SRC_rs,
                                          alu_b_src_maybe == SRC_rt};
        endcase

        case( opcode )
          O_bne, O_beq: branch_in_id = 1;
          default:      branch_in_id = 0;
        endcase

        case( 1 )
          !rs || alu_a_src_maybe != SRC_rs : 
                            next_id_ex_alu_a_src = alu_a_src_maybe;
          rs == id_ex_dst : next_id_ex_alu_a_src = SRC_me;
          rs == ex_me_dst : next_id_ex_alu_a_src = SRC_wb;
          default :         next_id_ex_alu_a_src = alu_a_src_maybe;
        endcase

        if( !rt || alu_b_src_maybe != SRC_rt )
          next_id_ex_alu_b_src = alu_b_src_maybe;
        else if( rt == id_ex_dst )
          next_id_ex_alu_b_src = SRC_me;
        else if( rt == ex_me_dst )
          next_id_ex_alu_b_src = SRC_wb;
        else
          next_id_ex_alu_b_src = alu_b_src_maybe;
        
        //Check for:  Store          Branch
        cant_bypass = size_we & 1 || branch_in_id
        //    Load
           || load_in_ex && (    next_id_ex_alu_a_src == SRC_me
                              || next_id_ex_alu_b_src == SRC_me );

        hold_id = fp_hold_id 
                  || if_id_occ && ~squash_id && cant_bypass
                  && ( ( op_rs && rs
                         && ( rs == id_ex_dst || rs == ex_me_dst) )
                       ||
                       ( op_rt && rt
                         && ( rt == id_ex_dst || rt == ex_me_dst) ) );

        case( immed_fmt )
          IMM_s: next_id_ex_imm = { immed[15] ? 16'hffff : 16'h0, immed };
          IMM_l: next_id_ex_imm = { immed, 16'h0 };
          IMM_u: next_id_ex_imm = { 16'h0, immed };
          default: `UNEXPECTED(next_id_ex_imm,immed_fmt);
        endcase

        case ( dest_field )
          WB_ft,
          WB_00: next_id_ex_dst = 0;
          WB_rd: next_id_ex_dst = rd;
          WB_rt: next_id_ex_dst = rt;
          default: `UNEXPECTED(next_id_ex_dst,dest_field);
        endcase

        next_id_ex_me = size_we;
        next_id_ex_sa = sa;
        
     end

   assign squash_id = squash_if_id && id_ex_occ;

   wire id_live = if_id_occ && ~squash_id;
   reg [1:0] pc_src_maybe;
     
   always @( opcode or gpr_rs_val or gpr_rt_val or id_live ) begin

      case( opcode )
        O_bne   : pc_src_maybe = gpr_rs_val != gpr_rt_val ? PC_dsp : PC_npc;
        O_beq   : pc_src_maybe = gpr_rs_val == gpr_rt_val ? PC_dsp : PC_npc;
        O_j     : pc_src_maybe = PC_rgn;
        default : pc_src_maybe = PC_npc;
      endcase

      pc_src = id_live ? pc_src_maybe : PC_npc;
      
   end

   assign squash_if_id = 0;

   reg [`FPRR_head] res;

   wire [2:0] next_id_ex_fu = hold_id || !id_live ? FP_no : id_fu_needed;

   reg [`FPRR_range] fp_res_reg_cpy;

   always @( posedge clk ) begin

      res = {next_id_ex_fu, id_fp_op_wb, id_fp_dst, id_next_ver,
             if_id_pc, tb_if_id_din};

      fp_res_reg_cpy = fp_res_reg;
      
      case( next_id_ex_fu )
        FP_add         : fp_res_reg_cpy[`FPRR_add] = res;
        FP_mul         : fp_res_reg_cpy[`FPRR_mul] = res;
        FP_iti, FP_itf : fp_res_reg_cpy[`FPRR_int] = res;
        FP_no          : ;
        default        : `UNEXPECTED(res,next_id_ex_fu);
      endcase

      fp_res_reg   <= reset ? 0 : fp_res_reg_cpy >> FPRR_entry_size;
      
      id_ex_ft_val <= fpr_ft_val;
      id_ex_fs_val <= fpr_fs_val;
      id_ex_fs_src <= next_id_ex_fs_src;
      id_ex_ft_src <= next_id_ex_ft_src;

   end


   always @( posedge clk )
     if( reset ) begin:ID_VER
        integer i;
        for( i = 0; i < 32; i = i + 1 ) fpr_id_ver[i] <= 0;
     end else begin

        if( !hold_id && id_fp_op_wb ) fpr_id_ver[id_fp_dst] <= id_next_ver;

     end


   always @( posedge clk )
       if ( hold_id || reset ) begin

        id_ex_dst <= 0;
        id_ex_me  <= ME_CONTROL_nop;
        id_ex_occ <= 0;
     
     end else begin

        id_ex_fp_op <= next_id_ex_fp_op;
        
        id_ex_npc       <= if_id_npc;
        id_ex_pc        <= if_id_pc;
        id_ex_rs_val    <= gpr_rs_val;
        id_ex_rt_val    <= gpr_rt_val;
        id_ex_sa        <= next_id_ex_sa;
        id_ex_alu_a_src <= next_id_ex_alu_a_src;
        id_ex_alu_b_src <= next_id_ex_alu_b_src;
        id_ex_alu_op    <= next_id_ex_alu_op;
        id_ex_imm       <= next_id_ex_imm;
        id_ex_me        <= id_live ? next_id_ex_me  : ME_CONTROL_nop;
        id_ex_dst       <= id_live ? next_id_ex_dst : 5'b0;
        id_ex_exc       <= if_id_exc ? if_id_exc : next_id_ex_exc;
        tb_id_ex_din    <= tb_if_id_din;
        id_ex_occ       <= id_live;

     end

   
   ///
   /// FP Execute
   ///

   wire [31:0] op_a = id_ex_fs_src == FSRC_fpr ? id_ex_fs_val : fpr_write_val;
   wire [31:0] op_b = id_ex_ft_src == FSRC_fpr ? id_ex_ft_val : fpr_write_val;

   wire [31:0] afu_result, mfu_result;
   wire        afu_rdy, mfu_rdy;

   // Note: Initiation interval not yet coded and so rdy not yet used.

   add_fu #(AFU_stages,2) afu(afu_rdy,afu_result,op_a,op_b,id_ex_fp_op,clk);
   add_fu #(MFU_stages,2) mfu(mfu_rdy,mfu_result,op_a,op_b,id_ex_fp_op,clk);


   ///
   /// FP Writeback
   ///

   wire wn_matches = wn_ver == fpr_id_ver[wn_fp_dst];

   assign wn_fpr_write = wn_fpr_writer && wn_matches;

   always @( posedge clk ) wb_fpr_write <= wn_fpr_write;


   always @( wb_unit or gpr_write_val or afu_result or mfu_result )
     case( wb_unit )
       FP_add: fpr_write_val = afu_result;
       FP_mul: fpr_write_val = mfu_result;
       FP_itf: fpr_write_val = gpr_write_val;
       FP_iti,
       FP_no : fpr_write_val = 0;
       default: `UNEXPECTED(fpr_write_val,wb_unit);
     endcase


   always @( posedge clk ) if( wb_fpr_write ) fpr[wb_fp_dst] <= fpr_write_val;

   always @( posedge clk )
     if( reset ) begin:WB_VER
        integer i;
        for(i=0; i<32; i=i+1) fpr_wb_ver[i] <= 0;
     end
     else if( wn_fpr_write )           fpr_wb_ver[wn_fp_dst] <= wn_ver;

   
   ///
   /// Execute
   ///

   always @( id_ex_alu_a_src or id_ex_rs_val or id_ex_sa or id_ex_npc
             or ex_me_alu or gpr_write_val )
     case( id_ex_alu_a_src )
       SRC_rs: alu_a = id_ex_rs_val;
       SRC_np: alu_a = id_ex_npc;
       SRC_sa: alu_a = {27'd0, id_ex_sa};
       SRC_me: alu_a = ex_me_alu;
       SRC_wb: alu_a = gpr_write_val;
       default: `UNEXPECTED(alu_a,id_ex_alu_a_src);
     endcase

   always @( id_ex_alu_b_src or id_ex_rt_val or id_ex_imm 
             or ex_me_alu or gpr_write_val )
     case( id_ex_alu_b_src )
       SRC_rt: alu_b = id_ex_rt_val;
       SRC_im: alu_b = id_ex_imm;
       SRC_me: alu_b = ex_me_alu;
       SRC_wb: alu_b = gpr_write_val;
       default: `UNEXPECTED(alu_b,id_ex_alu_b_src);
     endcase


   wire [1:0] id_ex_size;
   wire       id_ex_we;
   assign     {id_ex_size,id_ex_we} = id_ex_me;
   assign     load_in_ex            = id_ex_occ && !id_ex_we && id_ex_size;
   assign     next_ex_me_exc        = 0;
        
   always @( posedge clk ) begin
      ex_me_npc    <= id_ex_npc;
      ex_me_pc     <= id_ex_pc;
      ex_me_alu    <= alu_out;
      ex_me_rt_val <= id_ex_rt_val;
      { ex_me_size,
        ex_me_we } <= id_ex_exc || reset ? ME_CONTROL_nop : id_ex_me;
      ex_me_dst    <= reset ? 5'd0 : id_ex_dst;
      ex_me_exc    <= id_ex_exc ? id_ex_exc : next_ex_me_exc;
      ex_me_occ    <= ~reset & id_ex_occ;
      tb_ex_me_din <= tb_id_ex_din;
   end

   /// Memory

   always @( mem_error_in_2 )
     case( mem_error_in_2 )
       MEM_ERR_none : next_me_wb_exc = 0;
       MEM_ERR_seg  : next_me_wb_exc = EXC_me_seg;
       MEM_ERR_bus  : next_me_wb_exc = EXC_me_bus;
       default      : `UNEXPECTED(next_me_wb_exc, mem_error_in_2);
     endcase


   always @( posedge clk ) begin

      me_wb_npc      <= ex_me_npc;
      me_wb_pc       <= ex_me_pc;
      me_wb_dst      <= next_me_wb_exc || reset ? 5'd0 : ex_me_dst;
      me_wb_from_mem <= ex_me_size != 0;
      me_wb_alu      <= ex_me_alu;
      me_wb_md       <= data_in_2;
      me_wb_exc      <= ex_me_exc ? ex_me_exc : next_me_wb_exc;
      me_wb_occ      <= ~reset & ex_me_occ;
      tb_me_wb_din   <= tb_ex_me_din;
      
   end

   /// Writeback

   assign gpr_write_val = me_wb_from_mem ? me_wb_md : me_wb_alu;
   assign exc           = me_wb_occ ? {1'b0,me_wb_exc} : 8'd0;


   ///
   /// TESTBENCH INTERFACE CODE
   ///
   // exemplar translate_off

   wire wb_fp_occ          = wb_unit != FP_iti && wb_unit != FP_no;
   reg  tbi_inst_done;
   reg [31:0] tbi_done_pc;

   always @( posedge clk ) tbi_inst_done <= me_wb_occ;
   always @( posedge clk ) tbi_done_pc <= me_wb_pc;

   task tbi_poke_gpr;
      input [4:0] r;
      input [31:0] value;
      gpr.storage[r] = value;
   endtask

   function [31:0] tbi_peek_gpr;
      input [4:0] r;
      tbi_peek_gpr = gpr.storage[r];
   endfunction

   task tbi_iterate_pipeline_segments;
      output valid;
      output [15:0] name;
      output [31:0] pc;
      output [31:0] din;
      output [7:0] exc;
      output occ;

      reg [88:0] info;
      integer stage;

      begin
         if( stage === 32'bx ) stage = 0;

         for(valid = 0; stage < 5 && !valid;  valid = occ == 1 ) begin

            case( stage )
              0: info = {"IF",if_pc,   tb_if_din,   next_if_id_exc,1'd1};
              1: info = {"ID",if_id_pc,tb_if_id_din,next_id_ex_exc,if_id_occ};
              2: info = {"EX",id_ex_pc,tb_id_ex_din,next_ex_me_exc,id_ex_occ};
              3: info = {"ME",ex_me_pc,tb_ex_me_din,next_me_wb_exc,ex_me_occ};
              4: info = {"WB",me_wb_pc,tb_me_wb_din,8'b0,          me_wb_occ};
              default `UNEXPECTED(info,stage);
            endcase

            {name,pc,din,exc,occ} = info;
            stage = stage + 1;

         end

         if( !valid ) iterate_rr(valid,name,pc,din,exc,occ);
         if( !valid ) stage = 0;

      end

   endtask
   
   task iterate_rr;
      output valid;
      output [15:0] name;
      output [31:0] pc;
      output [31:0] din;
      output [7:0] exc;
      output occ;

      integer pos;
      reg [2:0] unit, ver;
      reg [4:0] dst;
      reg [`FPRR_range] res_reg;
      reg wb;

      begin
         exc = 0;

         if( pos === 32'bx ) pos = 0;
         if( pos == 0 ) res_reg = fp_res_reg;

         for( occ = 0;  res_reg && !occ;  occ = name != 0)  begin
            
            {unit,wb,dst,ver,pc,din} = res_reg[`FPRR_head];
            
            case( unit )
              FP_add: name = pos == 0 ? "WB" : "A1"+AFU_stages-pos;
              FP_mul: name = pos == 0 ? "WB" : "M1"+MFU_stages-pos;
              FP_iti,
              FP_itf,
              FP_no:  name = 0;
              default: `UNEXPECTED(name,unit);
            endcase

            res_reg = res_reg >> FPRR_entry_size;
            pos = pos + 1;
         end

         valid = occ;
         if( !valid ) pos = 0;
         
      end

   endtask
   // exemplar translate_on

endmodule


module reg_file(data_out_1, data_out_2, addr_1, addr_2,
                addr_3, data_in_3, clk);
   input [4:0] addr_1, addr_2, addr_3;
   input [31:0] data_in_3;
   input        clk;
   output [31:0] data_out_1, data_out_2;

   reg [31:0]    storage [0:31];

   assign data_out_1 = addr_1 && addr_1 == addr_3 ? data_in_3 : storage[addr_1];
   assign data_out_2 = addr_2 && addr_2 == addr_3 ? data_in_3 : storage[addr_2];

   always @( posedge clk ) if( addr_3 ) storage[addr_3] <= data_in_3;

endmodule

module add_fu(ready_next_cycle,result,a,b,op,clk);
   input [31:0] a, b;
   input [5:0]  op;
   input        clk;
   output [31:0] result;
   output        ready_next_cycle;

   parameter     Stages = 1;
   parameter     II     = 1;
   // exemplar translate_off
   real          ar, br, resultr;
   reg [64*Stages-1:0] simulated_afu_really_long_shift_register;
   reg [1:0]           busy;

   assign              result = simulated_afu_really_long_shift_register[31:0];

   always @( posedge clk ) begin

      ar = test_proc.ftor(a);
      br = test_proc.ftor(b);

      case( op )

        cpu_p1.FOP_add: resultr = ar + br;
        cpu_p1.FOP_sub: resultr = ar - br;
        cpu_p1.FOP_mul: resultr = ar * br;
        default: `UNEXPECTED(resultr,op);

      endcase

      simulated_afu_really_long_shift_register
        <= { 32'd0, test_proc.rtof(resultr),
             simulated_afu_really_long_shift_register[64*Stages-1:64] };
   end
   // exemplar translate_on

endmodule

module alu(alu_out,alu_a,alu_b,alu_op);
   output [31:0] alu_out;
   input [31:0]  alu_a, alu_b;
   input [4:0]   alu_op;

   reg [31:0]    alu_out;

   task unexpected;
      inout [31:0] var;
      input [10:0] control;
      var = 0;
   endtask

   // Control Signal Value Names
   parameter  OP_xxx = 5'h0;  // Don't care.
   parameter  OP_add = 5'h0;
   parameter  OP_sll = 5'h1;
   parameter  OP_srl = 5'h2;
   parameter  OP_xor = 5'h3;
   parameter  OP_sub = 5'h4;
   parameter  OP_or  = 5'h5;
   parameter  OP_and = 5'h6;
   parameter  OP_slt = 5'h7;
   parameter  OP_seq = 5'h8;
   parameter  OP_b   = 5'h9;
   
   always @( alu_a or alu_b or alu_op )
     case( alu_op )
       OP_add  : alu_out = alu_a + alu_b;
       OP_and  : alu_out = alu_a & alu_b;
       OP_or   : alu_out = alu_a | alu_b;
       OP_xor  : alu_out = alu_a ^ alu_b;
       OP_sub  : alu_out = alu_a - alu_b;
       OP_slt  : alu_out = {alu_a[31],alu_a} < {alu_b[31],alu_b};
       OP_sll  : alu_out = alu_b << alu_a[4:0];
       OP_srl  : alu_out = alu_b >> alu_a[4:0];
       OP_seq  : alu_out = alu_a == alu_b;
       OP_b    : alu_out = alu_b;
       default : `UNEXPECTED(alu_out,alu_op);
     endcase
   
endmodule

// exemplar translate_off

module system_p1(exc,reset,clk);
   input reset,clk;
   output [7:0] exc;

   wire [31:0] cpu_data_out_2, addr_1, addr_2, mem_data_out_1, mem_data_out_2;
   wire [2:0]  mem_err_out_1, mem_err_out_2;
   wire [1:0]  size_2;
   wire        we_2;


   cpu_p1 cpu1(exc,
               cpu_data_out_2, addr_1, addr_2, size_2, we_2,
               mem_data_out_1, mem_data_out_2, 
               mem_err_out_1,mem_err_out_2,
               reset,clk);
   
   memory_2p m1( mem_data_out_1, mem_err_out_1, addr_1, 
                 mem_data_out_2 ,mem_err_out_2, addr_2, size_2, we_2,
                 cpu_data_out_2,
                 clk);

endmodule

`define FP_PROC
`include "mipspipetb.v"