////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2021 Homework 4
//
/// SOLUTION

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2021/hw04.pdf
 /// Solution Discussion  https://www.ece.lsu.edu/koppel/v/2021/hw04_sol.pdf


`default_nettype none


//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
 ///   Complete bit_keeper so that it applies input commands as described
 ///   in the handout.
 ///
//
//     [✔] Only modify module bit_keeper.
//     [✔] Instantiate two rot_left instances two rotate bits.
//     [✔] APPLY AT MOST ONE rotate per cycle.
//     [✔] ONLY WRITE DATA to the least-significant w bits.
//
//     [✔] Use SimVision to debug. Use command: xrun -gui hw04.v
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl
//
//     [✔] Don't assume any particular parameter values.
//
//     [✔] Code must be written clearly.
//     [✔] Pay attention to cost and performance.
//
//     [ ] Students can work in teams. List team members in this file



typedef enum
  { Cmd_Reset = 0, Cmd_Nop, Cmd_Write, Cmd_Rot_To, Cmd_SIZE } Command;

module rot_left
  #( int w = 10, amt = 1 )
   ( output uwire [w-1:0] r, input uwire [w-1:0] a);
   assign r = { a[w-amt-1:0], a[w-1:w-amt] };
endmodule

module bit_keeper
  #( int wb = 64, wi = 8, ws = $clog2(wb) )
   ( output logic [wb-1:0] bits,
     output uwire ready,
     input uwire [3:0] cmd,
     input uwire [wi-1:0] din,
     input uwire [ws-1:0] pos,
     input uwire clk );

   /// SOLUTION

   // Specify Rotation Amounts
   //
   localparam int rot_amt_a = 1;
   localparam int rot_amt_b = 1 << ( ws >> 1 );
   //
   // To minimize the number of rotations, rot_amt_b should be set to
   // the square root of wb. But, to minimize delay it should be set
   // to a power of 2. Here it is set to a power of 2 close to the
   // square root of wb.

   // Instantiate Rotation Modules
   //
   uwire [wb-1:0] ra, rb;
   rot_left #(wb,rot_amt_a) rl1(ra,bits);
   rot_left #(wb,rot_amt_b) rl8(rb,bits);

   logic [ws-1:0] rot_to_do;      // Remaining amount of rotation to do.
   logic [ws-1:0] rot_to_return;  // Amount of rotation needed after write.
   logic [wi-1:0] wval;           // Value to write.

   // The module is ready if there is no remaining rotation to do.
   //
   assign ready = rot_to_do == 0 && rot_to_return == 0;

   always_ff @( posedge clk ) begin

      case ( cmd )

        Cmd_Reset: begin
           //
           // Perform Reset

           bits = 0;
           rot_to_do = 0;
           rot_to_return = 0;
        end

        Cmd_Rot_To: begin
           //
           // Set Amount of Rotation
           //
           // The rotation will be performed in subsequent cycles.

           rot_to_do = pos;
        end

        Cmd_Write:

          if ( pos == 0 ) begin
             //
             // Perform Write Immediately

             bits[wi-1:0] = din;

          end else begin
             //
             // Perform Write Later

             // Set amount of rotation needed before the write, ..
             //
             rot_to_do = wb - pos;
             //
             // .. save the value that will be written, ..
             //
             wval = din;
             //
             // .. and save the amount of rotation needed after the write.
             //
             rot_to_return = pos;

          end

        Cmd_Nop: begin
           //
           // Continue Executing a Cmd_Rot_To or Cmd_Write.

           // If necessary, set bits to a rotated value.
           //
           if ( rot_to_do >= rot_amt_b ) begin
              //
              // Still need to rotate by at least rot_amt_b bits.

              bits = rb;                // Use output of larger rot module.
              rot_to_do -= rot_amt_b;   // Decrement remaining rot amt.

           end else if ( rot_to_do >= rot_amt_a ) begin
              //
              // Still need to rotate by at least rot_amt_a (1) bit.

              bits = ra;                // Use output of smaller rot module.
              rot_to_do -= rot_amt_a;   // Decrement remaining rot amt.
           end

           // Check whether a write is pending and can now be performed.
           // 
           if ( rot_to_do == 0 && rot_to_return !=0 ) begin
              //
              // Write value, and set amount of rotation to return to
              // original positioning.

              bits[wi-1:0] = wval;
              rot_to_do = rot_to_return;
              rot_to_return = 0;
           end

        end

      endcase

   end

endmodule



//////////////////////////////////////////////////////////////////////////////
/// Testbench Code



// cadence translate_off


program reactivate
   (output uwire clk_reactive, output int cycle_reactive,
    input uwire clk, input var int cycle);
   assign clk_reactive = clk;
   assign cycle_reactive = cycle;
endprogram

module testbench;

   localparam int npsets = 2;
   localparam int pset[npsets][2] =
              '{ { 40, 4 }, { 28, 8 } };

   int t_errs;     // Total number of errors.
   initial t_errs = 0;
   final $write("Total number of errors: %0d\n",t_errs);

   uwire d[npsets:-1];    // Start / Done signals.
   assign d[-1] = 1;  // Initialize first at true.

   // Instantiate a testbench at each size.
   //
   for ( genvar i=0; i<npsets; i++ )
     testbench_n #(pset[i][0],pset[i][1]) t2( .done(d[i]), .tstart(d[i-1]) );

endmodule


module testbench_n
  #( int bsize = 40, isize = 5 )
   ( output logic done, input uwire tstart );

   localparam int bslg = $clog2(bsize);
   localparam int n_tests = bsize * 5;
   localparam int cyc_max = n_tests * bsize * 2;
   bit clk;
   int cycle, cycle_limit;
   logic clk_reactive;
   int cycle_reactive;
   reactivate ra(clk_reactive,cycle_reactive,clk,cycle);

   string cmd_str[int];
   initial begin
      cmd_str[Cmd_Reset] = "Cmd_Reset";
      cmd_str[Cmd_Nop] = "Cmd_Nop";
      cmd_str[Cmd_Write] = "Cmd_Write";
      cmd_str[Cmd_Rot_To] = "Cmd_Rot_To";
   end

   string event_trace, history_trace;

   initial begin
      clk = 0;
      cycle = 0;

      done = 0;
      cycle_limit = cyc_max;
      wait( tstart );

      fork
         while ( !done ) #1 cycle += clk++;
         wait( cycle >= cycle_limit ) begin
            $write("Exit from clock loop at cycle %0d, limit %0d, %s\n",
                   cycle, cycle_limit, "** CYCLE LIMIT EXCEEDED **");
            $write("** Preceding Commands **\n%s", history_trace);
            $write("** In-Progress Command **\n%s\n", event_trace);
         end
      join_any;

      done = 1;
   end


   uwire [bsize-1:0] bits;
   uwire rdy;
   bit [bsize-1:0] bits_shadow, bcpy;
   logic [bslg-1:0] pos;

   logic [3:0] cmd;
   logic [isize-1:0] din;

   bit_keeper #(bsize,isize) bk1(bits, rdy, cmd, din, pos, clk);

   typedef struct {int pos; int lat_cnt[int];} Lat_Range;

   Lat_Range lat_range[Cmd_SIZE][int];
   Lat_Range pos_range[Cmd_SIZE][int];

   initial begin

      automatic int n_err = 0;
      int n_cmd[Cmd_SIZE], n_cyc[Cmd_SIZE];
      int n_cycles;
      string cmd_info;
      for ( int i=0; i<Cmd_SIZE; i++ ) begin n_cmd[i] = 0; n_cmd[i] = 0; end

      cmd = Cmd_Reset;
      bits_shadow = bsize'(0);

      wait( tstart );

      $write("\nStarting tests for (wb=%0d,wi=%0d)\n",bsize,isize);

      @( negedge clk_reactive );
      @( negedge clk_reactive );
      cmd = Cmd_Nop;
      while ( rdy !== 1'b1 ) @( negedge clk_reactive );

      for ( int tn = 0; tn < n_tests; tn++ ) begin

         bit expect_rdy_0;
         logic [bslg-1:0] pos_given;
         logic [3:0] cmd_given;

         event_trace = $sformatf("test %2d: ",tn);

         cmd = {$random} % ( Cmd_SIZE - 1 ) + 1;
         if ( ( {$random} & 15 ) == 0 ) cmd = Cmd_Reset;
         pos = {$random} % (bsize-1) + 1;
         if ( cmd == Cmd_Write && ( {$random} & 1 ) == 0 ) pos = 0;
         din = {$random};
         cmd_given = cmd;
         pos_given = pos;

         event_trace = { event_trace, $sformatf("%-10s ",cmd_str[cmd]) };

         case ( cmd )

           Cmd_Reset: begin
              bits_shadow = 0; expect_rdy_0 = 0;
           end
           Cmd_Write: begin
              event_trace =
                { event_trace,
                  $sformatf("pos %0d, data %h", pos_given, din) };
              expect_rdy_0 = pos != 0;
              for ( int i=0; i<isize; i++ )
                bits_shadow[(i+pos)%bsize] = din[i];
           end
           Cmd_Rot_To: begin
              event_trace =
                { event_trace,
                  $sformatf("pos %0d", pos_given) };
              expect_rdy_0 = pos != 0;
              bcpy = bits_shadow;
              for ( int i=0; i<bsize; i++ )
                bits_shadow[(i+pos)%bsize] = bcpy[i];
           end
           Cmd_Nop: begin
              expect_rdy_0 = 0;
           end
           default begin
              $write("This can't happen.\n");
              $fatal(1);
           end
         endcase

         cmd_info = event_trace;
         event_trace = { event_trace, "\n" };

         @( negedge clk_reactive );

         // Wait for rdy to go to zero.
         if ( expect_rdy_0 )
           begin
              automatic int cyc_start = cycle;
              event_trace = { cmd_info, "\n -- Awaiting ready = 0.\n" };
              while ( rdy !== 1'b0 ) @( negedge clk_reactive );
              event_trace = { cmd_info, "\n -- Awaiting ready = 1.\n" };
              cmd = Cmd_Nop;
              pos = {$random};
              din = {$random};
              while ( rdy !== 1'b1 ) @( negedge clk_reactive );
              event_trace = { cmd_info, "\n -- About to check outputs.\n" };
              n_cycles = cycle - cyc_start;
           end else begin
              n_cycles = 0;
           end

         if ( bits_shadow === bits ) begin
            if ( expect_rdy_0 ) begin
               n_cmd[cmd_given]++;
               n_cyc[cmd_given] += n_cycles;
               lat_range[cmd_given][pos_given].lat_cnt[n_cycles]++;
               pos_range[cmd_given][n_cycles].lat_cnt[pos_given]++;
            end
         end else begin
            n_err++;
            if ( n_err < 5 ) begin
               $write("%s",history_trace);
               $write("Error in %-35s: %h != %h (correct)\n",
                      cmd_info, bits, bits_shadow);
            end
            history_trace = "";
            bits_shadow = bits;
         end

         if ( cmd_given == Cmd_Reset ) history_trace = "";

         history_trace =
           { history_trace,
             $sformatf("Cycle %3d -- %-35s: bits = %h\n",
                       cycle, cmd_info, bits) };

      end

      $write("Finished %0d tests for (wb=%0d,wi=%0d), %0d data errors.\n",
             n_tests, bsize, isize, n_err );

      begin
         automatic bit double_check = 0;
         automatic Command mcc[] = '{ Cmd_Rot_To, Cmd_Write };
         automatic string err_str =
           $sformatf("Error: (wb=%0d,wi=%0d)",bsize,isize);
         $write("Avg cyc");
         foreach ( mcc[i] )
           $write("  %s %.1f (%0d)",
                  cmd_str[mcc[i]],
                  n_cmd[mcc[i]] ? real'(n_cyc[mcc[i]])/n_cmd[mcc[i]] : 0.0,
                  n_cmd[mcc[i]]);
         $write("\n");
         if ( double_check ) begin
         $write("Avg cyc");
         foreach ( mcc[i] ) begin
            automatic Command c = mcc[i];
            automatic int tot_cyc = 0, tot_cmd = 0;
            foreach ( lat_range[c][pos] ) begin
               foreach ( lat_range[c][pos].lat_cnt[nc] ) begin
                  automatic int ncmd = lat_range[c][pos].lat_cnt[nc];
                  tot_cyc += nc * ncmd;
                  tot_cmd += ncmd;
               end
            end
            $write("  %s %.1f (%0d)",
                   cmd_str[mcc[i]],
                   real'(tot_cyc)/tot_cmd, tot_cmd);
         end
         end
         $write("\n");
         foreach ( mcc[i] ) begin
            automatic Command c = mcc[i];
            automatic int n_one = 0, n_zero = 0;
            string n_z_str, n_o_str;
            n_o_str = $sformatf(" %s 1-cyc pos ",cmd_str[c]);
            foreach ( pos_range[c][1].lat_cnt[pos] ) begin
               n_o_str = { n_o_str, $sformatf("%0d ",pos) };
               n_one++;
            end
            n_z_str = $sformatf(" %s 1-cyc pos ",cmd_str[c]);
            foreach ( pos_range[c][0].lat_cnt[pos] ) begin
               n_z_str = { n_z_str, $sformatf("%0d ",pos) };
               n_zero++;
            end
            if ( n_one ) $write("%s\n",n_o_str);
            if ( n_zero )
              $write("%s\n%s Zero-Cycle %s. Should never be zero when pos!=0\n",
                     n_z_str,err_str,cmd_str[c]);
            if ( c == Cmd_Rot_To && n_one > 2 )
              $write("%s One-Cycle Cmd_Rot_To for more than 2 pos values.\n",
                     err_str);
            if ( c == Cmd_Write && n_one > 0 )
              $write("%s One-Cycle Cmd_Write at least one time. Should never happen.\n",err_str);
         end
      end
      testbench.t_errs += n_err;
      done = 1;

   end

endmodule

// cadence translate_on