/// EE 4755 - Digital Design Using HDLs
//

 /// Classroom Code Examples
 //

 //  Sequential Shifter
 //  Order-d Sequential Shifter
 //
 //  For lecture slides, including diagrams:
 //        https://www.ece.lsu.edu/v/2020/lsli-syn-seq.pdf


 /// Left Shift Using Operator
//
module shift_lt_behav_1
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );

   assign shifted = unshifted << amt;

endmodule

 /// Left Shift Moving Bits
//
//   Disadvantage is large number of multiplexors.
//
module shift_lt_behav
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     input uwire [wid-1:0] unshifted,
     input uwire [wid_lg-1:0] amt );

   always_comb
     for ( int i=0; i<wid; i++ )
       shifted[i] = i >= amt ? unshifted[i-amt] : 0;

endmodule



 /// w-bit Left Shift Using lg w Stages
//

module shift_fixed
  #( int wid_lg = 4,
     int amt = 1,
     int wid = 1 << wid_lg )
   ( output uwire [wid-1:0] shifted,
     input uwire [wid-1:0] unshifted,
     input uwire shift );

   assign  shifted = shift ? unshifted << amt : unshifted;

endmodule

module shift_lt_comb
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );

   uwire [wid-1:0]       step[wid_lg-1:-1];

   assign step[-1] = unshifted;
   assign shifted = step[wid_lg-1];

   for ( genvar i=0; i<wid_lg; i++ )
     shift_fixed #(wid_lg,1<<i) sf( step[i], step[i-1], amt[i] );

endmodule

 /// w-bit Left Shift Using w Fixed Shifters
//

module shift_lt_comb_w_shifters
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );

   uwire [wid-1:0]       step[wid-1:-1];

   assign step[-1] = unshifted;
   assign shifted = step[wid-1];

   for ( genvar i=0; i<wid; i++ )
     shift_fixed #(wid_lg,1) sf( step[i], step[i-1], i < amt );

endmodule



 /// LIVE Sequential Version of w-shifter Shifter
//

module shift_lt_seq_live
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output uwire ready,
     input uwire [wid-1:0] unshifted,
     input uwire [wid_lg-1:0] amt,
     input uwire start,
     input uwire clk );

endmodule

 /// Sequential Version of w-shifter Shifter
//

module shift_lt_seq
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output uwire ready,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt,
     input start,
     input clk );

   logic [wid_lg-1:0]   cnt;
   uwire [wid_lg-1:0]   cnt_m1 = cnt - 1;

   always_ff @( posedge clk ) begin

      if ( start == 1 ) begin

         shifted = unshifted;
         cnt = amt;

      end else if ( cnt > 0 ) begin

         shifted = shifted << 1;
         //  cnt--;
         cnt = cnt_m1;

      end else begin  shifted = shifted;  cnt = cnt;  end

   end

   assign ready = cnt_m1 == 0;

endmodule

 /// Unoptimized:
// :

 /// Optimized:
// :

module shift_lt_seq_alt
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output uwire ready,
     input uwire [wid-1:0] unshifted,
     input uwire [wid_lg-1:0] amt,
     input uwire start,
     input uwire clk );

   logic [wid_lg-1:0]   cnt;

   uwire [wid-1:0]       sf_out;

   shift_fixed #(wid_lg,1) sf( sf_out, shifted, 1'b1 );

   always_ff @( posedge clk )
      if ( start == 1 ) begin
         shifted = unshifted;
      end else if ( cnt > 0 ) begin
         shifted = sf_out;
      end

   always_ff @( posedge clk )
      if ( start == 1 ) begin
         cnt <= amt;
      end else if ( cnt > 0 ) begin
         cnt <= cnt-1;
      end


   assign ready = cnt == 0;


endmodule


module shift_lt_seq_d
  #( int wid_lg = 4,
     int num_shifters = 2,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output uwire ready,
     input uwire [wid-1:0] unshifted,
     input uwire [wid_lg-1:0] amt,
     input uwire start,
     input uwire clk );

   localparam int cnt_bits = ( wid_lg + num_shifters - 1 ) / num_shifters;

   logic [num_shifters-1:0][cnt_bits-1:0] cnt;

   uwire [wid-1:0] inter_sh[num_shifters-1:-1];
   assign inter_sh[-1] = shifted;

   for ( genvar i = 0; i < num_shifters; i++ ) begin
      localparam int shift_amt = 1 << i * cnt_bits;
      uwire       shift = cnt[i] != 0;
      shift_fixed #(wid_lg,shift_amt) sf( inter_sh[i], inter_sh[i-1], shift );
   end

   always_ff @( posedge clk )

      if ( start == 1 ) begin

         shifted = unshifted;
         cnt = amt;

      end else if ( cnt > 0 ) begin

         shifted = inter_sh[num_shifters-1];
         for ( int i=0; i<num_shifters; i++ ) if ( cnt[i] ) cnt[i]--;

      end

   assign ready = cnt == 0;


endmodule

`ifdef XXX
Sourcing './seq-sh.tcl' (Wed Nov 01 09:07:59 -0500 2017)...
Synthesizing with args "-to_mapped -effort high"

Wid   Module Name                       Area   Delay   Delay
                                              Actual  Target
 2 shift_lt_behav_1                     1400     363    5000
 2 shift_lt_comb                        1480     383    5000
 2 shift_lt_seq                         4380    1170    5000
 2 shift_lt_seq_d                       4820    1217    5000
 4 shift_lt_behav_1                     9788    1260    5000
 4 shift_lt_comb                       10204    1184    5000
 4 shift_lt_seq                        14932    2000    5000
 4 shift_lt_seq_d                      17004    2571    5000
 6 shift_lt_behav_1                    57440    2892    5000
 6 shift_lt_comb                       57320    2796    5000
 6 shift_lt_seq                        51036    2030    5000
 6 shift_lt_seq_d                      59560    4126    5000
 2 shift_lt_behav_1                     2632     194     100
 2 shift_lt_comb                        2632     194     100
 2 shift_lt_seq                         5464     967     100
 2 shift_lt_seq_d                       6628     996     100
 4 shift_lt_behav_1                    29176     490     100
 4 shift_lt_comb                       26800     482     100
 4 shift_lt_seq                        20436    1215     100
 4 shift_lt_seq_d                      21716    1273     100
 6 shift_lt_behav_1                   157420     832     100
 6 shift_lt_comb                      122896     886     100
 6 shift_lt_seq                        68216    1440     100
 6 shift_lt_seq_d                      78784    1503     100
Normal exit.
`endif



// cadence translate_off

program reactivate(output uwire clk_reactive, input uwire clk);
   assign clk_reactive = clk;
endprogram

module testbench;

   localparam int wid_lg = 6;
   localparam int wid = 1 << wid_lg;

   localparam int max_units = 11;

   logic      clk;
   bit        done;
   int cycle;

   uwire [wid-1:0] sout[max_units];
   uwire ready[max_units];
   logic [wid-1:0] sin;
   logic [wid_lg-1:0] amt;
   logic              start;

   typedef struct { int idx; int err_count = 0; bit seq = 0;
                    logic [wid-1:0] sout = 'h111; int cyc_tot = 0; } Info;
   Info pi[string];

   shift_lt_behav #(wid_lg) my_sr1(sout[0], sin, amt);
   initial pi["Behavioral"].idx = 0;

   shift_lt_comb #(wid_lg) my_sr2(sout[1], sin, amt);
   initial pi["Combinational"].idx = 1;

   shift_lt_comb_w_shifters #(wid_lg) my_sr10(sout[10], sin, amt);
   initial pi["Combinational W"].idx = 10;

   shift_lt_seq_live #(wid_lg) my_sll(sout[7], ready[7], sin, amt, start, clk);
   initial if ( 1 ) begin
      automatic string m = "Sequential Lv";
      pi[m].idx = 7; pi[m].seq = 1;
   end

   shift_lt_seq #(wid_lg) my_sl3(sout[2], ready[2], sin, amt, start, clk);
   initial begin
      automatic string m = "Sequential";
      pi[m].idx = 2; pi[m].seq = 1;
   end

   shift_lt_seq_alt #(wid_lg) my_sl4(sout[8], ready[8], sin, amt, start, clk);
   initial begin
      automatic string m = "Seq Alt";
      pi[m].idx = 8; pi[m].seq = 1;
   end

   shift_lt_seq_d #(wid_lg,1) my_sld1(sout[6], ready[6], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 1";
      pi[m].idx = 6; pi[m].seq = 1;
   end

   shift_lt_seq_d #(wid_lg,2) my_sld(sout[3], ready[3], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 2";
      pi[m].idx = 3; pi[m].seq = 1;
   end

   shift_lt_seq_d #(wid_lg,3) my_sld3(sout[4], ready[4], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 3";
      pi[m].idx = 4; pi[m].seq = 1;
   end

   shift_lt_seq_d #(wid_lg,4) my_sld4(sout[5], ready[5], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 4";
      pi[m].idx = 5; pi[m].seq = 1;
   end

   //  shift_lt_seq_d_live #(wid_lg,3) my_sld9(sout[9], ready[9], sin, amt, start, clk);
   shift_lt_seq_d #(wid_lg,3) my_sld9(sout[9], ready[9], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 3 live";
      pi[m].idx = 9; pi[m].seq = 1;
   end

   localparam int tests_per_sa = 50;
   localparam int num_tests = wid * tests_per_sa;
   localparam int cycle_limit = num_tests * wid * 2;

   reactivate ra(clk_reactive,clk);

   initial begin
      clk = 0;
      cycle = 0;

      fork
         forever #10 cycle += clk++;
         wait( done );
         wait( cycle >= cycle_limit )
           $write("*** Cycle limit exceeded, ending.\n");
      join_any;

      $finish();
   end

   initial begin

      // Number of test inputs (stimuli).
      //
      automatic int test_count = 0;

      done = 0;

      @( posedge clk_reactive ); @( posedge clk_reactive );


      // Provide one test pattern per shift amount.
      //
      for ( int i=0; i<num_tests; i++ ) begin
         automatic int cyc_start = cycle;
         logic [wid-1:0] shadow_sout;
         int awaiting;
         test_count++;

         for ( int p=0; p<wid; p+=32 ) sin[p+:32] = $random;

         amt = i / tests_per_sa;

         shadow_sout = sin << amt;

         start = 1;
         @( posedge clk_reactive );
         start = 0;

         // Collect output as ready signals go to 1, or immediately
         // for non-sequential modules.
         //
         awaiting = pi.num();
         foreach ( pi[muti] ) begin
            automatic string mut = muti; // Bug workaround?
            fork begin
               while ( pi[mut].seq && ready[pi[mut].idx] !== 1 )
                 @( posedge clk_reactive );
               awaiting--;
               pi[mut].sout = sout[pi[mut].idx];
               pi[mut].cyc_tot += cycle - cyc_start;
            end join_none;
         end
         wait ( awaiting == 0 );

         // Check the output of each Module Under Test.
         //
         foreach ( pi[ mut ] )
           if ( shadow_sout !== pi[mut].sout ) begin
              pi[mut].err_count++;
              if ( pi[mut].err_count < 5 )
                $write
                  ("%-20s wrong result for 0x%0h << %0d:  0x%0h != 0x%0h (correct)\n",
                   mut, sin, amt, pi[mut].sout, shadow_sout);
           end

      end

      done = 1;

      foreach ( pi[ mut ] )
         $write("Ran %4d tests for %-15s, %4d errors found. Avg cyc %.1f\n",
                  test_count, mut, pi[mut].err_count,
                pi[mut].seq ? real'(pi[mut].cyc_tot) / test_count : 1
                );
   end

endmodule

// cadence translate_on