/// EE 4755 - Digital Design Using HDLs
//

 /// Classroom Code Examples
 //

 //  Sequential Shifter
 //  Order-d Sequential Shifter
 //
 //  For lecture slides, including diagrams:
 //        http://www.ece.lsu.edu/v/2016/lsli-syn-seq.pdf


 /// Left Shift Using Operator
//
module shift_lt_behav_1
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );

   assign               shifted = unshifted << amt;

endmodule

 /// Left Shift Moving Bits
//
//   Disadvantage is large number of multiplexors.
//
module shift_lt_behav
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     input wire [wid-1:0] unshifted,
     input wire [wid_lg-1:0] amt );

   always_comb
     for ( int i=0; i<wid; i++ )
       shifted[i] = i >= amt ? unshifted[i-amt] : 0;

endmodule



 /// w-bit Left Shift Using lg w Stages
//

module shift_fixed
  #( int wid_lg = 4,
     int amt = 1,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input shift );

   assign  shifted = shift ? unshifted << amt : unshifted;

endmodule

module shift_lt_comb
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );

   wire [wid-1:0]       step[wid_lg-1:-1];

   assign step[-1] = unshifted;
   assign shifted = step[wid_lg-1];

   for ( genvar i=0; i<wid_lg; i++ )
     shift_fixed #(wid_lg,1<<i) sf( step[i], step[i-1], amt[i] );

endmodule

 /// w-bit Left Shift Using w Fixed Shifters
//


module shift_lt_comb_w_shifters
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );

   wire [wid-1:0]       step[wid-1:-1];

   assign step[-1] = unshifted;
   assign shifted = step[wid-1];

   for ( genvar i=0; i<wid; i++ )
     shift_fixed #(wid_lg,1) sf( step[i], step[i-1], i < amt );

endmodule



 /// Sequential Version of w-shifter Shifter
//

module shift_lt_seq
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output wire ready,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt,
     input start,
     input clk );

   logic [wid_lg-1:0]   cnt;

   wire [wid-1:0]       sf_out;

   shift_fixed #(wid_lg,1) sf( sf_out, shifted, 1'b1 );

   always_ff @( posedge clk ) begin

      if ( start == 1 ) begin

         shifted = unshifted;
         cnt = amt;

      end else if ( cnt > 0 ) begin

         shifted = sf_out;
         cnt--;

      end else begin  shifted = shifted;  cnt = cnt;  end

   end

   assign ready = cnt == 0;

endmodule

 /// Unoptimized:
// :

 /// Optimized:
// :

module shift_lt_seq_alt
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output wire ready,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt,
     input start,
     input clk );

   logic [wid_lg-1:0]   cnt;

   wire [wid-1:0]       sf_out;

   shift_fixed #(wid_lg,1) sf( sf_out, shifted, 1'b1 );

   always_ff @( posedge clk )

      if ( start == 1 ) begin

         shifted = unshifted;

      end else if ( cnt > 0 ) begin

         shifted = sf_out;

      end

   always_ff @( posedge clk )

      if ( start == 1 ) begin

         cnt <= amt;

      end else if ( cnt > 0 ) begin

         cnt <= cnt-1;

      end


   assign ready = cnt == 0;


endmodule


module shift_lt_seq_d
  #( int wid_lg = 4,
     int num_shifters = 2,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output wire ready,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt,
     input start,
     input clk );

   localparam int cnt_bits = ( wid_lg + num_shifters - 1 ) / num_shifters;

   logic [num_shifters-1:0][cnt_bits-1:0] cnt;

   wire [wid-1:0]      inter_sh[num_shifters-1:-1];
   assign              inter_sh[-1] = shifted;

   for ( genvar i = 0; i < num_shifters; i++ ) begin
      localparam int shift_amt = 1 << i * cnt_bits;
      wire       shift = cnt[i] != 0;
      shift_fixed #(wid_lg,shift_amt) sf( inter_sh[i], inter_sh[i-1], shift );
   end

   always_ff @( posedge clk )

      if ( start == 1 ) begin

         shifted = unshifted;
         cnt = amt;

      end else if ( cnt > 0 ) begin

         shifted = inter_sh[num_shifters-1];
         for ( int i=0; i<num_shifters; i++ ) if ( cnt[i] ) cnt[i]--;

      end

   assign ready = cnt == 0;


endmodule



// cadence translate_off

program reactivate(output wire clk_reactive, input wire clk);
   assign clk_reactive = clk;
endprogram

module testbench;

   localparam int wid_lg = 6;
   localparam int wid = 1 << wid_lg;

   localparam int max_units = 11;

   logic      clk;
   bit        done;
   int cycle;

   uwire [wid-1:0] sout[max_units];
   uwire ready[max_units];
   logic [wid-1:0] sin;
   logic [wid_lg-1:0] amt;
   logic              start;

   typedef struct { int idx; int err_count = 0; bit seq = 0;
                    logic [wid-1:0] sout = 'h111; int cyc_tot = 0; } Info;
   Info pi[string];

   shift_lt_behav #(wid_lg) my_sr1(sout[0], sin, amt);
   initial pi["Behavioral"].idx = 0;

   shift_lt_comb #(wid_lg) my_sr2(sout[1], sin, amt);
   initial pi["Combinational"].idx = 1;

   shift_lt_comb_w_shifters #(wid_lg) my_sr10(sout[10], sin, amt);
   initial pi["Combinational W"].idx = 10;

   shift_lt_seq_live #(wid_lg) my_sll(sout[7], ready[7], sin, amt, start, clk);
   initial if ( 1 ) begin
      automatic string m = "Sequential Lv";
      pi[m].idx = 7; pi[m].seq = 1;
   end

   shift_lt_seq #(wid_lg) my_sl3(sout[2], ready[2], sin, amt, start, clk);
   initial begin
      automatic string m = "Sequential";
      pi[m].idx = 2; pi[m].seq = 1;
   end

   shift_lt_seq_alt #(wid_lg) my_sl4(sout[8], ready[8], sin, amt, start, clk);
   initial begin
      automatic string m = "Seq Alt";
      pi[m].idx = 8; pi[m].seq = 1;
   end

   shift_lt_seq_d #(wid_lg,1) my_sld1(sout[6], ready[6], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 1";
      pi[m].idx = 6; pi[m].seq = 1;
   end

   shift_lt_seq_d #(wid_lg,2) my_sld(sout[3], ready[3], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 2";
      pi[m].idx = 3; pi[m].seq = 1;
   end

   shift_lt_seq_d #(wid_lg,3) my_sld3(sout[4], ready[4], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 3";
      pi[m].idx = 4; pi[m].seq = 1;
   end

   shift_lt_seq_d #(wid_lg,4) my_sld4(sout[5], ready[5], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 4";
      pi[m].idx = 5; pi[m].seq = 1;
   end

   shift_lt_seq_d_live #(wid_lg,3) my_sld9(sout[9], ready[9], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 3 live";
      pi[m].idx = 9; pi[m].seq = 1;
   end

   localparam int tests_per_sa = 50;
   localparam int num_tests = wid * tests_per_sa;
   localparam int cycle_limit = num_tests * wid * 2;

   reactivate ra(clk_reactive,clk);

   initial begin
      clk = 0;
      cycle = 0;

      fork
         forever #10 cycle += clk++;
         wait( done );
         wait( cycle >= cycle_limit )
           $write("*** Cycle limit exceeded, ending.\n");
      join_any;

      $finish();
   end

   initial begin

      // Number of test inputs (stimuli).
      //
      automatic int test_count = 0;

      done = 0;

      @( posedge clk_reactive ); @( posedge clk_reactive );


      // Provide one test pattern per shift amount.
      //
      for ( int i=0; i<num_tests; i++ ) begin
         automatic int cyc_start = cycle;
         logic [wid-1:0] shadow_sout;
         int awaiting;
         test_count++;

         for ( int p=0; p<wid; p+=32 ) sin[p+:32] = $random;

         amt = i / tests_per_sa;

         shadow_sout = sin << amt;

         start = 1;
         @( posedge clk_reactive );
         start = 0;

         // Collect output as ready signals go to 1, or immediately
         // for non-sequential modules.
         //
         awaiting = pi.num();
         foreach ( pi[muti] ) begin
            automatic string mut = muti; // Bug workaround?
            fork begin
               while ( pi[mut].seq && ready[pi[mut].idx] !== 1 )
                 @( posedge clk_reactive );
               awaiting--;
               pi[mut].sout = sout[pi[mut].idx];
               pi[mut].cyc_tot += cycle - cyc_start;
            end join_none;
         end
         wait ( awaiting == 0 );

         // Check the output of each Module Under Test.
         //
         foreach ( pi[ mut ] )
           if ( shadow_sout !== pi[mut].sout ) begin
              pi[mut].err_count++;
              if ( pi[mut].err_count < 5 )
                $write
                  ("%-20s wrong result for 0x%0h << %0d:  0x%0h != 0x%0h (correct)\n",
                   mut, sin, amt, pi[mut].sout, shadow_sout);
           end

      end

      done = 1;

      foreach ( pi[ mut ] )
         $write("Ran %4d tests for %-15s, %4d errors found. Avg cyc %.1f\n",
                  test_count, mut, pi[mut].err_count,
                pi[mut].seq ? real'(pi[mut].cyc_tot) / test_count : 1
                );
   end

endmodule

// cadence translate_on