////////////////////////////////////////////////////////////////////////////////
///
/// Template for LSU EE 3755 Fall 2001 Homework 7
///

 /// Name:  Don't bother, the TA-bot knows who you are.

 /// Instructions:
  //
  // Copy this to a file named hw07.v to directory ~/hw in your
  // class account. (~ is your home directory.)  Use this
  // file for your solution.  Your entire solution should be in
  // this file.
  //
  // Do not rename the modules in this file and be sure to use the
  // directory and filename given above.
  //
  // Modify the same cpu module for Problems 1 and 2.

  // Assignment: http://www.ece.lsu.edu/ee3755/2001f/hw07.pdf

////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////////////////////////////////////
/// Problems 1 and 2

 /// General Instructions
//
// For clickable links visit an html version of this file:
// http://www.ece.lsu.edu/ee3755/2001f/hw07.html
//
// Put the solutions to Problems 1 and 2 in the cpu module below.
// Don't duplicate or rename it.
//
// The testbench for both problems is "test_proc".
//
//   It should issue PASS and FAIL messages for each problem,
//   and finish by displaying "End of testbench run."
//
//   By default it runs test program: 
//   Web:   http://www.ece.lsu.edu/ee3755/2001f/hw07test.html
//   Local: /home/classes/ee3755/com/v/hw07test.s
//
//   To use a different test program put its name in the macro below
//   (end it with a .v even though the assembler code is in a file
//   with a .s extension) and "assemble" the program by pressing
//   [S-f9] while in an Emacs buffer containing the program.
//
`define MIPS_PROG "/home/classes/ee3755/com/v/hw07test.v"
//
//   The testbench will not pass Problem 1 unless the test program
//   above is used.
//
// Additional states (st_if, st_id, etc.) can be added but st_id must
// be used for decode and its value must remain 2.
//
// Solutions must be reasonably efficient, fast, and synthesizable.
//
//   Do not use delays or do anything on the negative edge of the clock.
//


 /// Problem 1
//
// Use the shifter module to implement the sll, srl, sllv, and srlv
// instructions.
//
// See:  http://www.ece.lsu.edu/ee4720/mips32v2.pdf  for instruction info.
//
// DO NOT use the alu to implement these instructions.
//
// The shifter module is in this file and is complete.  Do not modify it.
//
// The shifter module is instantiated in cpu but types for the
// connections (shifter_out, shifter_in, etc.) are not declared.
// That's an easy thing to do first.
//
// There is a subtlety to sllv and srlv that might be overlooked.
// Read the documentation carefully to find it or if you're bold take
// a look at the testbench.


 /// Problem 2
//
// Modify cpu so that while one instruction is executing the next one
// is fetched.
//
// If done correctly, the CPI should be reduced by nearly
// 1. (E.g., from 3.11 to 2.11).
//
// The st_if state should only be used after a reset, after that
// instructions should "start" in st_id.  (That is, except for the
// reset code, lines such as "state = st_if" should be changed to
// "state = st_id".)
//
// Places in the code have lines such as: "ir = data_in".  Do not use
// the value written to ir until the next cycle. (Here's why: The
// value will be written to ir late in the cycle, so there's not much
// time to do anything with it.)
//
// If the rule above is broken the testbench may display a message such as:
//
//   "FAIL: wrong instruction in ir: 0x34130003 (correct) != 0x20100000"
//
// The code fragment below shows an incorrect and a correct way of
// assigning ir.

`ifdef SAMPLE

    // Example of what should not be done.
    st_id:
      begin
         ir = data_in;

         // The line below is NOT ALLOWED because ir just written from memory.
         {opcode,rs,rt,rd,sa,func} = ir;  
         ii     = ir[25:0];
         immed  = ir[15:0];

         // more code here
         
      end

    // Example of what might be done.
    st_id:
      begin

         {opcode,rs,rt,rd,sa,func} = ir;  
         ii     = ir[25:0];
         immed  = ir[15:0];

         // More code below.

         // More code above.
         
         // Line below is okay because data from memory (data_in) is
         // not used in this cycle.
         ir = data_in;
         
      end

`endif


 /// Modify The Module Below
//

module cpu(exc,data_out,addr,size,we,data_in,mem_error_in,reset,clk);
   input [31:0] data_in;
   input [2:0]  mem_error_in;
   input reset,clk;
   output [7:0] exc;
   output [31:0] data_out, addr;
   output [1:0]  size;
   output        we;

   reg [31:0]    data_out, addr;
   reg [1:0]     size;
   reg           we;
   reg [7:0]     exc;

   // MIPS Registers
   //
   reg [31:0] gpr [0:31];
   reg [31:0] pc, npc;
   reg [31:0] ir;

   // Instruction Fields
   //
   reg [4:0]  rs, rt, rd, sa;
   reg [5:0]  opcode, func;
   reg [25:0] ii;
   reg [15:0] immed;

   // Values Derived From Immediates and Read From Register File
   //
   reg [31:0] simmed, uimmed, limmed;
   reg [31:0] rs_val, rt_val;

   // ALU Connections
   //
   wire [31:0] alu_out;
   reg [31:0]  alu_a, alu_b;
   reg [5:0]   alu_op;

   // Processor Control Logic and State
   //
   reg [2:0]  state;
   reg [4:0]  wb_rd;     // Register number to write.
   reg        me_we;     // we value to use in state st_me
   reg [1:0]  me_size;   // size value to use in state st_me

   
   alu our_alu(alu_out, alu_a, alu_b, alu_op);
   shifter our_shifter(shifter_out, shifter_in, shift_amt, shift_dir);

   
   // Values for the MIPS funct field.
   //
   parameter  f_sll = 6'h0;
   parameter  f_srl = 6'h2;
   parameter  f_add = 6'h20;
   parameter  f_sub = 6'h22;
   parameter  f_or  = 6'h25;

   // Values for the MIPS opcode field.
   //
   parameter  o_rfmt = 6'h0;
   parameter  o_j    = 6'h2;
   parameter  o_beq  = 6'h4;
   parameter  o_bne  = 6'h5;
   parameter  o_addi = 6'h8;
   parameter  o_slti = 6'ha;
   parameter  o_andi = 6'hc;
   parameter  o_ori  = 6'hd;
   parameter  o_lui  = 6'hf;
   parameter  o_lw   = 6'h23;
   parameter  o_lbu  = 6'h24;
   parameter  o_sw   = 6'h2b;
   parameter  o_sb   = 6'h28;

   // Processor Control Logic States
   //
   parameter  st_if      = 1;
   parameter  st_id      = 2;
   parameter  st_ex      = 3;
   parameter  st_ex_addr = 5;
   parameter  st_ex_cond = 6;
   parameter  st_ex_targ = 7;
   parameter  st_me      = 4;

   // ALU Operations
   //
   parameter  op_nop = 0;
   parameter  op_sll = 1;
   parameter  op_srl = 2;
   parameter  op_add = 3;
   parameter  op_sub = 4;
   parameter  op_or  = 5;
   parameter  op_and = 6;
   parameter  op_slt = 7;
   parameter  op_seq = 8;


   /// Set Memory Connection Values: addr, we, and size.
   //
   always @( state or pc or alu_out or me_size or me_we )
     case( state )
       st_if   : begin addr = pc;       we = 0;      size = 3;       end
       st_me   : begin addr = alu_out;  we = me_we;  size = me_size; end
       default : begin addr = pc;       we = 0;      size = 0;       end
       // Note: addr is set for default case to simplify synthesized hardware.
     endcase


   always @( posedge clk )
     if( reset ) begin

        state = st_if;
        exc   = 0;
        
     end else
     case ( state )

       ///       Instruction Fetch
       st_if:  
         begin
            ir    = data_in;
            state = st_id;
         end

       ///       Instruction Decode (and Register Read)
       st_id:
         begin

            {opcode,rs,rt,rd,sa,func} = ir;
            ii     = ir[25:0];
            immed  = ir[15:0];
            
            simmed = { immed[15] ? 16'hffff : 16'h0, immed };
            uimmed = { 16'h0, immed };
            limmed = { immed, 16'h0 };

            rs_val = gpr[rs];
            rt_val = gpr[rt];

            // Set alu_a, alu_b, alu_op, and wb_rd.
            //
            case( opcode )

              o_rfmt:
                // R-Format Instructions
                case ( func )
                  f_add   : begin alu_a = rs_val;   alu_op = op_add; 
                                  alu_b = rt_val;   wb_rd  = rd;         end
                  f_sub   : begin alu_a = rs_val;   alu_op = op_sub;
                                  alu_b = rt_val;   wb_rd  = rd;         end
                  f_sll   : begin alu_a = sa;       alu_op = op_sll;
                                  alu_b = rt_val;   wb_rd  = rd;         end
                  default : begin alu_a = rs_val;   alu_op = op_nop;
                                  alu_b = rt_val;   wb_rd  = 0; exc = 1; end
                endcase

              // I- and J-Format Instructions
              o_lbu:  begin alu_a = rs_val;   alu_op = op_add;
                            alu_b = simmed;   wb_rd  = rt;         end
              o_sb:   begin alu_a = rs_val;   alu_op = op_add;
                            alu_b = simmed;   wb_rd  = 0;          end
              o_lui:  begin alu_a = rs_val;   alu_op = op_or;
                            alu_b = limmed;   wb_rd  = rt;         end
              o_addi: begin alu_a = rs_val;   alu_op = op_add;
                            alu_b = simmed;   wb_rd  = rt;         end
              o_andi: begin alu_a = rs_val;   alu_op = op_and;
                            alu_b = uimmed;   wb_rd  = rt;         end
              o_ori:  begin alu_a = rs_val;   alu_op = op_or;
                            alu_b = uimmed;   wb_rd  = rt;         end
              o_slti: begin alu_a = rs_val;   alu_op = op_slt;
                            alu_b = simmed;   wb_rd  = rt;         end
              o_j:    begin alu_a = rs_val;   alu_op = op_nop;
                            alu_b = simmed;   wb_rd  = 0;          end
              o_bne, o_beq:
                      begin alu_a = rs_val;   alu_op = op_seq;
                            alu_b = rt_val;   wb_rd  = 0;          end
              default:begin alu_a = rs_val;   alu_op = op_nop;
                            alu_b = simmed;   wb_rd  = 0; exc = 1; end
            endcase

            // Needed for a store instruction, doesn't hurt others.
            data_out = rt_val;

            // Set me_size and me_wb
            //
            case( opcode )
              o_lbu   : begin me_size = 1;  me_we = 0; end
              o_sb    : begin me_size = 1;  me_we = 1; end
              default : begin me_size = 0;  me_we = 0; end
            endcase

            pc = npc;

            // Set npc, branch instruction may change npc.
            //
            case( opcode )
              o_j     : npc = { pc[31:28], ii, 2'b0 };
              default : npc = pc + 4;
            endcase

            case( opcode )
              o_lbu, o_sb  : state = st_ex_addr;
              o_bne, o_beq : state = st_ex_cond;
              o_j          : state = st_if;
              default      : state = st_ex;
            endcase
         end

       ///       Execute (ALU instructions)
       st_ex:
         begin
            if( wb_rd ) gpr[wb_rd] = alu_out;
            state = st_if;
         end

       ///       Execute (Compute Effective Address for Loads and Stores)
       st_ex_addr:
         begin
            state = st_me;
         end

       ///       Execute (Compute Branch Condition)
       st_ex_cond:
         begin
            if( opcode == o_beq == alu_out ) begin
               alu_a  = pc;
               alu_b  = simmed << 2;
               alu_op = op_add;
               state  = st_ex_targ;
            end else begin
               state = st_if;
            end
         end

       ///       Execute (Compute Branch Target)
       st_ex_targ:
         begin
            npc = alu_out;
            state = st_if;
         end

       ///       Memory
       st_me:
         begin
            if( wb_rd ) gpr[wb_rd] = data_in;
            state = st_if;
         end

       default:
         begin
            // exemplar translate_off
            $display("Unexpected state");
            exc = 1;
            // exemplar translate_on
         end
       
     endcase

endmodule


////////////////////////////////////////////////////////////////////////////////
/// Shifter and ALU Modules
//

module shifter(shift_out,val,amt,direction);
   output [31:0] shift_out;
   input [31:0]  val;
   input [4:0]   amt;
   input         direction;

   assign        shift_out = direction ? val << amt : val >> amt;
   
endmodule

module alu(alu_out,alu_a,alu_b,alu_op);
   output [31:0] alu_out;
   input [31:0]  alu_a, alu_b;
   input [5:0]   alu_op;

   reg [31:0]    alu_out;

   // Control Signal Value Names
   parameter  op_nop = 0;
   parameter  op_add = 3;
   parameter  op_sub = 4;
   parameter  op_or  = 5;
   parameter  op_and = 6;
   parameter  op_slt = 7;
   parameter  op_seq = 8;

   // DO NOT add shift operations to this module.
   
   always @( alu_a or alu_b or alu_op )
     case( alu_op )
       op_add  : alu_out = alu_a + alu_b;
       op_and  : alu_out = alu_a & alu_b;
       op_or   : alu_out = alu_a | alu_b;
       op_sub  : alu_out = alu_a - alu_b;
       op_slt  : alu_out = {alu_a[31],alu_a} - {alu_b[31],alu_b} >> 32;
       op_seq  : alu_out = alu_a == alu_b;
       op_nop  : alu_out = 0;
       default :
         begin
            alu_out = 0;
            // exemplar translate_off
            $display("Unrecognized alu operation, %d", alu_op);
            // exemplar translate_on
         end
     endcase
   
endmodule

////////////////////////////////////////////////////////////////////////////////
/// System Module and Testbench Include

// exemplar translate_off

module system(exc,reset,clk);
   input reset,clk;
   output [7:0] exc;

   wire [31:0] cpu_data_out, addr, mem_data_out;
   wire [2:0]  mem_err_out;
   wire [1:0]  size;
   wire        we;

   cpu cpu1(exc,cpu_data_out,addr,size,we,mem_data_out,mem_err_out,reset,clk);
   memory_3 m1(mem_data_out,mem_err_out,addr,size,we,cpu_data_out,clk);

endmodule


// Include the testbench.

`include "/home/classes/ee3755/com/v/hw07sup.v"