`define MEMBASE 'h400000
`define DATABASE 'h10010000
`define TEXTSIZE 'h100
`define MEMSIZE 'h200
`define MEMRANGE `MEMBASE:`MEMBASE+`MEMSIZE-1
`define A(addr) ((addr)-`DATABASE+`MEMBASE+`TEXTSIZE)
`define MEM(addr) mem[((addr)-`DATABASE+`MEMBASE+`TEXTSIZE)]

`ifdef CYCLE_LIMIT
`else
 `define CYCLE_LIMIT 1000
`endif

`ifdef WATCH_LIMIT
`else
 `define WATCH_LIMIT 500
`endif

`ifdef PTRACE_LIMIT
`else
 `define PTRACE_LIMIT 10000
`endif

module test_proc();

   parameter shadow_trace = 1;
   parameter tr_size = 100;
   parameter cycle_delta = 2;  // Cycle count for first cpu cycle after reset.

   // Exception Codes
   //
   parameter   EXC_none   = 8'd0;
   parameter   EXC_if_bus = 8'd1;
   parameter   EXC_if_seg = 8'd2;
   parameter   EXC_id_ins = 8'd3; // Illegal Instruction
   parameter   EXC_id_opr = 8'd4; // Illegal Operand
   parameter   EXC_id_sys = 8'd5;
   parameter   EXC_me_bus = 8'd6;
   parameter   EXC_me_seg = 8'd7;

   wire [7:0] exception;
   reg        reset, clk, clk2;
   wire       exc;

   system_p1 dut(exception,reset,clk);
   tb_proc p2(exc,clk2);
   
   reg [31:0] gpr_shadow [0:31];  // DUT
   reg [31:0] gpr_shadow2 [0:31];
   reg [31:0] fpr_shadow [0:31];
   reg [31:0] fpr_shadow2 [0:31];
   reg [31:0] fpr_expecting_wb [0:31];
   integer    fpr_expecting_wb_cnt [0:31];
   
   reg [319:0] isource[dut.m1.text_base>>2:dut.m1.text_base+dut.m1.text_size>>2];
   reg [31:0]  regname [0:31];
   reg [19*8-1:0] excname [0:10];

   integer    i, reg_old, reg_new, reg_sim;
   real       freg_old, freg_new;
   reg        go;
   real       cycle_count;
   integer    ptrace_count;
   integer    icount;
   integer    changed_reg_count;
   integer    changed_freg, changed_freg_count;
   reg [31:0] freg_wb_pc;
   reg [31:0] tr_pc_a [0:tr_size];
   reg [31:0] tr_regv_a [0:tr_size];
   reg [4:0]  tr_regno_a [0:tr_size];
   reg [31:0] tr_pc;
   reg [31:0] tr_regv;
   reg [4:0]  tr_regno;
   reg [31:0] dut_pc;
   integer    tr_fd, tr_icount_end;
   integer    pt_fd;

   always wait( go ) begin clk = 0; #8; clk = 1; #2; end

   function [31:0] dtof;
      input [63:0] d;

      begin
         dtof[31]    = d[63];
         dtof[30:23] = d[62:52] - 1023 + 127;
         dtof[22:0]  = d[51:29];
      end

   endfunction

   function [63:0] ftod;
      input [31:0] f;

      begin
         ftod[63]    = f[31];
         ftod[62:52] = f[30:23] - 127 + 1023;
         ftod[51:0]  = {f[22:0],29'd0};
      end

   endfunction

   function real ftor;
      input [31:0] f;

      ftor = $bitstoreal(ftod(f));

   endfunction

   function [31:0] rtof;
      input r;
      real r;

      rtof = dtof($realtobits(r));

   endfunction

   function [4:0] ito5;
      input i;
      integer i;
      ito5 = i;
   endfunction

   function real itor;
      input i;
      integer i;
      itor = i;
   endfunction

   task initmem;
      input [31:0] addr;
      input [31:0] text;
      input [319:0] source;

      reg [2:0] err;

      begin

         err = dut.m1.tbi_poke_word(addr,text);

         if( err ) begin
            $display("Error %d initializing text address 0x%h.",
                     err, addr);
            $stop;
         end

         {p2.mem[addr],p2.mem[addr+1],p2.mem[addr+2],p2.mem[addr+3]} = text;

         while( source[319:312] === 8'b0 ) source = {source[311:0]," "};
         isource[addr>>2] = source;

      end

   endtask

   task initdmem;
      input [31:0] addr;
      input [31:0] word;

      reg [31:0] daddr;
      reg [2:0] err;

      begin

         err = dut.m1.tbi_poke_word(addr,word);

         if( err ) begin
            $display("*** Error %d initializing data address 0x%h. ***",
                     err, addr);
            $stop;
         end

         daddr = `A(addr);
         {p2.mem[daddr],p2.mem[daddr+1],p2.mem[daddr+2],p2.mem[daddr+3]} = word;

      end

   endtask

   task get_trace_record;
      output [31:0] tr_pc;
      output [5:0] tr_regno;
      output [31:0] tr_regv;

      integer i;
      reg [31:0] cpy_reg [0:31];

      if ( shadow_trace ) begin

         tr_pc = p2.pc;
         p2.step;
         
         tr_regno = 0;
         tr_regv  = 0;

         for(i=0; i<32; i=i+1) if( gpr_shadow2[i] != p2.gpr[i] )
           begin
              tr_regno       = i;
              tr_regv        = p2.gpr[i];
              gpr_shadow2[i] = tr_regv;
           end

         `ifdef FP_PROC
         for(i=0; i<32; i=i+1) if( fpr_shadow2[i] !== p2.fpr[i] )
           begin
              fpr_expecting_wb[i] = tr_pc;
              fpr_expecting_wb_cnt[i] = fpr_expecting_wb_cnt[i] + 1;
              fpr_shadow2[i] = p2.fpr[i];
           end
         `endif

      end

   endtask

   
   task write_segments;

      begin:PTRR
         reg valid;
         reg [15:0] name;
         reg [31:0] pc;
         reg [31:0] din;
         reg [7:0]  exc;
         reg        occ;
         real       fdin;


         if( ptrace_count < `PTRACE_LIMIT ) forever begin
            
            dut.cpu1.tbi_iterate_pipeline_segments(valid,name,pc,din,exc,occ);
            if( !valid ) disable PTRR;
            ptrace_count = ptrace_count + 1;
            if( ptrace_count == `PTRACE_LIMIT )
              $display("*** Pipeline tracing stopping after cycle %.0f. ***",
                       cycle_count);

            // Modelsim bug workaround: Modelsim formats integers with
            // about 8 characters of space regardless of format specifier.
            fdin = din;

            $fwrite(pt_fd,"%2s %.0f %d %d",
                    name,
                    fdin,
                    occ,
                    exc != 0);
            if( name == "IF" )
              $fwrite(pt_fd," 0x%h %-s\n", pc, isource[pc>>2]);
            else
              $fwrite(pt_fd,"\n");
         end

      end

   endtask

   integer rno;

   task initregs;
      input [31:0] name;
      input [3:0] cnt;
      integer i;
      for(i=0; i<cnt; i=i+1) begin
         regname[rno] = name + i;
         rno = rno + 1;
      end
   endtask

   task cpi_and_stop;
      real cpi;
      integer i;
      begin
         $fclose(pt_fd);

         `ifdef FP_PROC
         for(i=0; i<32; i=i+1) begin
            if( fpr_expecting_wb[i] )
              $display("FAIL: Expecting wb to f%d: %f (c) =? %f by 0x%h",
                       ito5(i),ftor(fpr_shadow2[i]),ftor(dut.cpu1.fpr[i]),
                       fpr_expecting_wb[i]);
         end
         `endif
         
         $display("");
         $display("-----------------------------------------------------------------------------");

         $display("Pipeline execution diagram of last few cycles:\n");
         $system("ped end");
         $display("-----------------------------------------------------------------------------");
         cpi = icount ? cycle_count / icount : 0;
         $display("Executed %d instructions at %.2f CPI.", icount, cpi);
         $display("\nEnd of Testbench Run\n");
         $stop;
      end
   endtask

   initial begin

      go = 0;
      cycle_count = -cycle_delta;
      icount = 0;
      reset = 1;
      ptrace_count = 0;

      begin:INITNAMES
         integer i;

         regname[0] = "zero";
         regname[1] = "at";
         rno = 2;
         initregs("v0",2);
         initregs("a0",4);
         initregs("t0",8);
         initregs("s0",8);
         initregs("t8",2);
         initregs("k0",2);
         regname[28] = "gp";
         regname[29] = "sp";
         regname[30] = "fp";
         regname[31] = "ra";

         excname[EXC_none] = "None";
         excname[EXC_if_bus] = "PC Bus";
         excname[EXC_if_seg] = "PC Segmentation";
         excname[EXC_id_ins] = "Illegal Instruction";
         excname[EXC_id_opr] = "Illegal Operand";
         excname[EXC_id_sys] = "System Call";
         excname[EXC_me_bus] = "L/S Bus";
         excname[EXC_me_seg] = "L/S Segmentation";

      end

      pt_fd = $fopen("pipetrace.txt");

`include `MIPS_PROG

      for(i=0; i<32; i=i+1)
        begin:B
           reg [31:0] val, fval;
           val = i * 10;
           fval = rtof(i * 100.0 + i/10.0);
           dut.cpu1.tbi_poke_gpr(i,val);
`ifdef FP_PROC           
           dut.cpu1.fpr[i] = fval;
`endif
           p2.gpr[i] = val;
           p2.fpr[i] = fval;
           gpr_shadow[i] = val;
           gpr_shadow2[i] = val;
           fpr_shadow[i] = fval;
           fpr_shadow2[i] = fval;
           fpr_expecting_wb[i] = 0;
           fpr_expecting_wb_cnt[i] = 0;
        end

      p2.pc = 'h400000;
      p2.npc = p2.pc + 4;

      icount = 0;
      go = 1;
      changed_freg = 0;
      changed_reg_count = 0;
      changed_freg_count = 0;

      wait( clk == 0 );
      @( posedge clk ); @( negedge clk );
      #1;
      reset = 0;

      $display("=============================================================================");
      $display("Pipelined Processor Testbench");
      $display("\nMIPS Program %s",`MIPS_PROG);
      $display("Cycle Limit %d. (See CYCLE_LIMIT)",`CYCLE_LIMIT);
      $display("Watch Limit %d. (See WATCH_LIMIT)",`WATCH_LIMIT);
      $display("Pipeline Trace Limit %d. (See PTRACE_LIMIT)",`PTRACE_LIMIT);

      $display("-----------------------------------------------------------------------------");

      fork:MAIN
         
         forever begin

            cycle_count = cycle_count + 1;

            if( cycle_count > `CYCLE_LIMIT ) begin

               $display("\n*** Cycle count limit reached. ***");
               cpi_and_stop;

            end

            if( cycle_count == `WATCH_LIMIT )
              $display("\n*** Watch limit reached, registers and instruction printing stopped. ***");

            write_segments;
            
            if( dut.cpu1.tbi_inst_done === 1 ) begin

               dut_pc = dut.cpu1.tbi_done_pc;

               if( cycle_count < `WATCH_LIMIT ) 
                 $display("  PC 0x%h:  %-s", dut_pc, isource[dut_pc>>2] );

               if( shadow_trace ) begin
               
                  get_trace_record(tr_pc,tr_regno,tr_regv);
                  
                  if( tr_pc !== dut_pc ) begin
                     $display("PC mismatch at instruction %d, 0x%h (correct) != %h",
                              icount, tr_pc, dut_pc );
                     cpi_and_stop;
                  end

                  for(i=0; i<32; i=i+1) begin

                     reg_old = gpr_shadow[i];
                     reg_new = dut.cpu1.tbi_peek_gpr(i);
                     reg_sim = gpr_shadow2[i];
                     
                     if( reg_old !== reg_new ) begin

                        if( cycle_count < `WATCH_LIMIT )
                          $display("  Register $%2.0f (%s): 0x%h (%d) -> 0x%h (%d)",
                                   itor(i), regname[i],
                                   reg_old, reg_old, reg_new, reg_new);

                        gpr_shadow[i] = reg_new;

                     end

                     if( reg_sim !== reg_new ) begin
                        $display("FAIL: Wrong value in %d  0x%h (correct) != 0x%h",
                                 ito5(i), reg_sim, reg_new);
                        cpi_and_stop;
                       end

                    end

               end
               
               icount = icount + 1;

            end

            if( exception ) disable MAIN;
            @( negedge clk );

            // Memory is clocked on negative edge, give simulator time
            // for combinational logic driven my memory outputs.
            #1; 
            
            
         end

         // Watch FP regs
`ifdef FP_PROC         
         while( !exception ) begin

            if( shadow_trace ) begin

               #1; // Wait for loop above to step functional simulator.

               if( changed_freg_count ) begin

                  if( !fpr_expecting_wb[changed_freg] ) begin
                     $display("FAIL: Did not expect a writeback to f%d.",
                              ito5(changed_freg));
                     cpi_and_stop;
                  end

                  if( fpr_shadow2[changed_freg] !== fpr_shadow[changed_freg]
                      && fpr_expecting_wb_cnt[changed_freg] < 2 )
                    begin
                       $display("FAIL: Wrong value written to f%d: %f (c) != %f",
                                ito5(changed_freg),
                                ftor(fpr_shadow2[changed_freg]),
                                freg_new);
                       cpi_and_stop;
                    end
                  
                  if( fpr_expecting_wb[changed_freg] == freg_wb_pc )
                    begin
                       fpr_expecting_wb_cnt[changed_freg] = 0;
                       fpr_expecting_wb[changed_freg] = 0;
                    end
                  else if( fpr_expecting_wb_cnt[changed_freg] == 1 ) begin
                     $display("FAIL: Wrong instruction writing f%d, 0x%h(c) != 0x%h",
                              ito5(changed_freg),
                              fpr_expecting_wb[changed_freg], freg_wb_pc);
                     cpi_and_stop;
                  end else begin
                     fpr_expecting_wb_cnt[changed_freg]
                             = fpr_expecting_wb_cnt[changed_freg] - 1;
                     if( cycle_count < `WATCH_LIMIT )
                       $display("f%d sort of okay %d",changed_freg,
                                fpr_expecting_wb_cnt[changed_freg]);
                  end
               end
               changed_freg_count = 0;
               changed_freg = 0;
            end

            freg_wb_pc = dut.cpu1.wb_fp_pc;

            @( negedge clk );

            // Memory is clocked on negative edge, give simulator time
            // for combinational logic driven my memory outputs.
            #1; 

            for(i=0; i<32; i=i+1)
              if( fpr_shadow[i] !== dut.cpu1.fpr[i] ) begin
                 freg_old = ftor(fpr_shadow[i]);
                 freg_new = ftor(dut.cpu1.fpr[i]);
                 changed_freg = i;
                 changed_freg_count = changed_freg_count + 1;
                 if( cycle_count < `WATCH_LIMIT )
                   $display(" Register f%d: %f -> %f",
                            ito5(i), freg_old, freg_new);
                 fpr_shadow[i] = dut.cpu1.fpr[i];
              end

         end
         
`endif
         
      join

      $display("");
      $display("-----------------------------------------------------------------------------");
      if( exception ) begin
         if( exception == EXC_id_sys ) begin
            $display("Ending normally at a syscall instruction.");
            if( shadow_trace && tr_pc !== dut_pc ) begin
               $display("PC mismatch at instruction %d, 0x%h (correct) != %h",
                        icount, tr_pc, dut_pc );
            end
         end else
           $display("%s exception for instruction at address 0x%h",
                    excname[exception],
                    dut.cpu1.tbi_done_pc);
      end

      cpi_and_stop;

   end

endmodule


module tb_proc(exc,clk);
   input clk;
   output exc;

   reg    exc;

   reg [7:0] mem [`MEMRANGE];

   reg [31:0] pc, npc, nnpc, ir;
   reg [31:0] gpr [0:31];
   reg [63:0] fpr[0:31];

   reg [5:0]  opcode, funct;
   reg [4:0]  rs, rt, rd, sa, fs, ft, fd, fmt;
   reg [15:0] immed;
   reg [25:0] ii;
   reg [31:0] uimm16, simm16;
   reg [31:0] branch_target;

   real       fs_v, ft_v;

   // Values for funct field.
   parameter  F_sll  = 6'h0;
   parameter  F_srl  = 6'h2;
   parameter  F_sllv = 6'h4;
   parameter  F_srlv = 6'h6;
   parameter  F_add  = 6'h20;
   parameter  F_sub  = 6'h22;
   parameter  F_and  = 6'h24;
   parameter  F_or   = 6'h25;
   parameter  F_xor  = 6'h26;

   // Values for opcode field.
   parameter  O_rfmt  = 6'h0;
   parameter  O_j     = 6'h2;
   parameter  O_beq   = 6'h4;
   parameter  O_bne   = 6'h5;
   parameter  O_addi  = 6'h8;
   parameter  O_slti  = 6'ha;
   parameter  O_sltiu = 6'hb;
   parameter  O_andi  = 6'hc;
   parameter  O_ori   = 6'hd;
   parameter  O_lui   = 6'hf;
   parameter  O_cop1  = 6'h11;
   parameter  O_lw    = 6'h23;
   parameter  O_lbu   = 6'h24;
   parameter  O_sw    = 6'h2b;
   parameter  O_sb    = 6'h28;
   parameter  O_lwc1  = 6'h31;

   /// Coprocessor 1 (FP) function Field Values
   //
   parameter  C1_add = 6'h0;
   parameter  C1_sub = 6'h1;
   parameter  C1_mul = 6'h2;

   initial begin
      exc = 0;
      pc = 0;
      npc = 4;
   end

   task step;

      begin

         ir = {mem[pc],mem[pc+1],mem[pc+2],mem[pc+3]};

         // R Format
         {opcode,rs,rt,rd,sa,funct} = ir;
         {fmt,ft,fs,fd}             = {rs,rt,rd,sa};

         nnpc = npc + 4; // May be reassigned below.

         // I Format  (Also uses opcode, rs, and rt.)
         immed = ir[15:0];
         // J Format  (Also uses opcode.)
         ii = ir[25:0];

         uimm16 = { 16'b0, immed };
         simm16 = immed[15] ? { 16'hffff, immed } : uimm16;

         branch_target = npc + ( simm16 << 2 );

         case( opcode )
           O_rfmt:
             //
             // R-Format Instructions
             case( funct )
               F_sllv  : gpr[rd] = gpr[rt] << ( gpr[rs] & 32'h1f );
               F_srlv  : gpr[rd] = gpr[rt] >> ( gpr[rs] & 32'h1f );
               F_sll   : gpr[rd] = gpr[rt] << sa;
               F_srl   : gpr[rd] = gpr[rt] >> sa;
               F_add   : gpr[rd] = gpr[rs] + gpr[rt];
               F_and   : gpr[rd] = gpr[rs] & gpr[rt];
               F_or    : gpr[rd] = gpr[rs] | gpr[rt];
               F_xor   : gpr[rd] = gpr[rs] ^ gpr[rt];
               F_sub   : gpr[rd] = gpr[rs] - gpr[rt];
               default : exc = 1;
             endcase

           O_cop1: begin
              fs_v = ftor( fpr[fs] );
              ft_v = ftor( fpr[ft] );
              
             case( funct )
               C1_add: fpr[fd] = rtof(fs_v + ft_v);
               C1_sub: fpr[fd] = rtof(fs_v - ft_v);
               C1_mul: fpr[fd] = rtof(fs_v * ft_v);
               default : exc = 1;
             endcase

           end

           //
           // I- and J-Format Instructions

           O_j     : nnpc = {npc[31:28],ii,2'b0};
           O_beq   : if( gpr[rs] == gpr[rt] ) nnpc = branch_target;
           O_bne   : if( gpr[rs] != gpr[rt] ) nnpc = branch_target;
           O_andi  : gpr[rt] = gpr[rs] & uimm16;
           O_sltiu : gpr[rt] = gpr[rs] < simm16;
           O_slti  : 
             begin:A
                integer a, b;
                a = gpr[rs];  b = simm16;
                gpr[rt] = a < b;
             end
           O_addi  : gpr[rt] = gpr[rs] + simm16;
           O_ori   : gpr[rt] = gpr[rs] | uimm16;
           O_lui   : gpr[rt] = { immed, 16'b0 };
           O_lbu   : gpr[rt] = { 24'b0, `MEM( gpr[rs] + simm16 ) };
           O_lw    : begin:LW
              reg [31:0] ea;
              ea = gpr[rs] + simm16;
              gpr[rt] = {`MEM(ea),`MEM(ea+1),`MEM(ea+2),`MEM(ea+3)};
           end
           O_sw    : begin:SW
              reg [31:0] ea;
              ea = gpr[rs] + simm16;
              {`MEM(ea),`MEM(ea+1),`MEM(ea+2),`MEM(ea+3)} = gpr[rt];
           end
           O_lwc1  : 
             begin:LFC1
                reg [31:0] ea;
                ea = gpr[rs] + simm16;
                fpr[rt] = {`MEM(ea),`MEM(ea+1),`MEM(ea+2),`MEM(ea+3)};
             end
           O_sb    : `MEM( gpr[rs] + simm16 ) = gpr[rt];
           default : exc = 1;
         endcase

      gpr[0] = 0;

      pc = npc;
      npc = nnpc;

   end

endtask


endmodule