module shift_lt_behav_1
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );
   assign shifted = unshifted << amt;
endmodule
 module shift_lt_behav
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     input uwire [wid-1:0] unshifted,
     input uwire [wid_lg-1:0] amt );
   always_comb
     for ( int i=0; i<wid; i++ )
       shifted[i] = i >= amt ? unshifted[i-amt] : 0;
endmodule
 
module shift_fixed
  #( int wid_lg = 4,
     int amt = 1,
     int wid = 1 << wid_lg )
   ( output uwire [wid-1:0] shifted,
     input uwire [wid-1:0] unshifted,
     input uwire shift );
   assign  shifted = shift ? unshifted << amt : unshifted;
endmodule
module shift_lt_comb
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );
   uwire [wid-1:0]       step[wid_lg-1:-1];
   assign step[-1] = unshifted;
   assign shifted = step[wid_lg-1];
   for ( genvar i=0; i<wid_lg; i++ )
     shift_fixed #(wid_lg,1<<i) sf( step[i], step[i-1], amt[i] );
endmodule
 
module shift_lt_comb_w_shifters
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output [wid-1:0] shifted,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt );
   uwire [wid-1:0]       step[wid-1:-1];
   assign step[-1] = unshifted;
   assign shifted = step[wid-1];
   for ( genvar i=0; i<wid; i++ )
     shift_fixed #(wid_lg,1) sf( step[i], step[i-1], i < amt );
endmodule
 
module shift_lt_seq_live
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output uwire ready,
     input uwire [wid-1:0] unshifted,
     input uwire [wid_lg-1:0] amt,
     input uwire start,
     input uwire clk );
endmodule
 
module shift_lt_seq
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output uwire ready,
     input [wid-1:0] unshifted,
     input [wid_lg-1:0] amt,
     input start,
     input clk );
   logic [wid_lg-1:0]   cnt;
   uwire [wid_lg-1:0]   cnt_m1 = cnt - 1;
   always_ff @( posedge clk ) begin
      if ( start == 1 ) begin
         shifted = unshifted;
         cnt = amt;
      end else if ( cnt > 0 ) begin
         shifted = shifted << 1;
                  cnt = cnt_m1;
      end else begin  shifted = shifted;  cnt = cnt;  end
   end
   assign ready = cnt_m1 == 0;
endmodule
 
 
module shift_lt_seq_alt
  #( int wid_lg = 4,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output uwire ready,
     input uwire [wid-1:0] unshifted,
     input uwire [wid_lg-1:0] amt,
     input uwire start,
     input uwire clk );
   logic [wid_lg-1:0]   cnt;
   uwire [wid-1:0]       sf_out;
   shift_fixed #(wid_lg,1) sf( sf_out, shifted, 1'b1 );
   always_ff @( posedge clk )
      if ( start == 1 ) begin
         shifted = unshifted;
      end else if ( cnt > 0 ) begin
         shifted = sf_out;
      end
   always_ff @( posedge clk )
      if ( start == 1 ) begin
         cnt <= amt;
      end else if ( cnt > 0 ) begin
         cnt <= cnt-1;
      end
   assign ready = cnt == 0;
endmodule
module shift_lt_seq_d
  #( int wid_lg = 4,
     int num_shifters = 2,
     int wid = 1 << wid_lg )
   ( output logic [wid-1:0] shifted,
     output uwire ready,
     input uwire [wid-1:0] unshifted,
     input uwire [wid_lg-1:0] amt,
     input uwire start,
     input uwire clk );
   localparam int cnt_bits = ( wid_lg + num_shifters - 1 ) / num_shifters;
   logic [num_shifters-1:0][cnt_bits-1:0] cnt;
   uwire [wid-1:0] inter_sh[num_shifters-1:-1];
   assign inter_sh[-1] = shifted;
   for ( genvar i = 0; i < num_shifters; i++ ) begin
      localparam int shift_amt = 1 << i * cnt_bits;
      uwire       shift = cnt[i] != 0;
      shift_fixed #(wid_lg,shift_amt) sf( inter_sh[i], inter_sh[i-1], shift );
   end
   always_ff @( posedge clk )
      if ( start == 1 ) begin
         shifted = unshifted;
         cnt = amt;
      end else if ( cnt > 0 ) begin
         shifted = inter_sh[num_shifters-1];
         for ( int i=0; i<num_shifters; i++ ) if ( cnt[i] ) cnt[i]--;
      end
   assign ready = cnt == 0;
endmodule
`ifdef XXX
Sourcing './seq-sh.tcl' (Wed Nov 01 09:07:59 -0500 2017)...
Synthesizing with args "-to_mapped -effort high"
Wid   Module Name                       Area   Delay   Delay
                                              Actual  Target
 2 shift_lt_behav_1                     1400     363    5000
 2 shift_lt_comb                        1480     383    5000
 2 shift_lt_seq                         4380    1170    5000
 2 shift_lt_seq_d                       4820    1217    5000
 4 shift_lt_behav_1                     9788    1260    5000
 4 shift_lt_comb                       10204    1184    5000
 4 shift_lt_seq                        14932    2000    5000
 4 shift_lt_seq_d                      17004    2571    5000
 6 shift_lt_behav_1                    57440    2892    5000
 6 shift_lt_comb                       57320    2796    5000
 6 shift_lt_seq                        51036    2030    5000
 6 shift_lt_seq_d                      59560    4126    5000
 2 shift_lt_behav_1                     2632     194     100
 2 shift_lt_comb                        2632     194     100
 2 shift_lt_seq                         5464     967     100
 2 shift_lt_seq_d                       6628     996     100
 4 shift_lt_behav_1                    29176     490     100
 4 shift_lt_comb                       26800     482     100
 4 shift_lt_seq                        20436    1215     100
 4 shift_lt_seq_d                      21716    1273     100
 6 shift_lt_behav_1                   157420     832     100
 6 shift_lt_comb                      122896     886     100
 6 shift_lt_seq                        68216    1440     100
 6 shift_lt_seq_d                      78784    1503     100
Normal exit.
`endif
cadence
program reactivate(output uwire clk_reactive, input uwire clk);
   assign clk_reactive = clk;
endprogram
module testbench;
   localparam int wid_lg = 6;
   localparam int wid = 1 << wid_lg;
   localparam int max_units = 11;
   logic      clk;
   bit        done;
   int cycle;
   uwire [wid-1:0] sout[max_units];
   uwire ready[max_units];
   logic [wid-1:0] sin;
   logic [wid_lg-1:0] amt;
   logic              start;
   typedef struct { int idx; int err_count = 0; bit seq = 0;
                    logic [wid-1:0] sout = 'h111; int cyc_tot = 0; } Info;
   Info pi[string];
   shift_lt_behav #(wid_lg) my_sr1(sout[0], sin, amt);
   initial pi["Behavioral"].idx = 0;
   shift_lt_comb #(wid_lg) my_sr2(sout[1], sin, amt);
   initial pi["Combinational"].idx = 1;
   shift_lt_comb_w_shifters #(wid_lg) my_sr10(sout[10], sin, amt);
   initial pi["Combinational W"].idx = 10;
   shift_lt_seq_live #(wid_lg) my_sll(sout[7], ready[7], sin, amt, start, clk);
   initial if ( 1 ) begin
      automatic string m = "Sequential Lv";
      pi[m].idx = 7; pi[m].seq = 1;
   end
   shift_lt_seq #(wid_lg) my_sl3(sout[2], ready[2], sin, amt, start, clk);
   initial begin
      automatic string m = "Sequential";
      pi[m].idx = 2; pi[m].seq = 1;
   end
   shift_lt_seq_alt #(wid_lg) my_sl4(sout[8], ready[8], sin, amt, start, clk);
   initial begin
      automatic string m = "Seq Alt";
      pi[m].idx = 8; pi[m].seq = 1;
   end
   shift_lt_seq_d #(wid_lg,1) my_sld1(sout[6], ready[6], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 1";
      pi[m].idx = 6; pi[m].seq = 1;
   end
   shift_lt_seq_d #(wid_lg,2) my_sld(sout[3], ready[3], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 2";
      pi[m].idx = 3; pi[m].seq = 1;
   end
   shift_lt_seq_d #(wid_lg,3) my_sld3(sout[4], ready[4], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 3";
      pi[m].idx = 4; pi[m].seq = 1;
   end
   shift_lt_seq_d #(wid_lg,4) my_sld4(sout[5], ready[5], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 4";
      pi[m].idx = 5; pi[m].seq = 1;
   end
      shift_lt_seq_d #(wid_lg,3) my_sld9(sout[9], ready[9], sin, amt, start, clk);
   initial begin
      automatic string m = "Degree 3 live";
      pi[m].idx = 9; pi[m].seq = 1;
   end
   localparam int tests_per_sa = 50;
   localparam int num_tests = wid * tests_per_sa;
   localparam int cycle_limit = num_tests * wid * 2;
   reactivate ra(clk_reactive,clk);
   initial begin
      clk = 0;
      cycle = 0;
      fork
         forever #10 cycle += clk++;
         wait( done );
         wait( cycle >= cycle_limit )
           $write("*** Cycle limit exceeded, ending.\n");
      join_any;
      $finish();
   end
   initial begin
                  automatic int test_count = 0;
      done = 0;
      @( posedge clk_reactive ); @( posedge clk_reactive );
                  for ( int i=0; i<num_tests; i++ ) begin
         automatic int cyc_start = cycle;
         logic [wid-1:0] shadow_sout;
         int awaiting;
         test_count++;
         for ( int p=0; p<wid; p+=32 ) sin[p+:32] = $random;
         amt = i / tests_per_sa;
         shadow_sout = sin << amt;
         start = 1;
         @( posedge clk_reactive );
         start = 0;
                                    awaiting = pi.num();
         foreach ( pi[muti] ) begin
            automatic string mut = muti;             fork begin
               while ( pi[mut].seq && ready[pi[mut].idx] !== 1 )
                 @( posedge clk_reactive );
               awaiting--;
               pi[mut].sout = sout[pi[mut].idx];
               pi[mut].cyc_tot += cycle - cyc_start;
            end join_none;
         end
         wait ( awaiting == 0 );
                           foreach ( pi[ mut ] )
           if ( shadow_sout !== pi[mut].sout ) begin
              pi[mut].err_count++;
              if ( pi[mut].err_count < 5 )
                $write
                  ("%-20s wrong result for 0x%0h << %0d:  0x%0h != 0x%0h (correct)\n",
                   mut, sin, amt, pi[mut].sout, shadow_sout);
           end
      end
      done = 1;
      foreach ( pi[ mut ] )
         $write("Ran %4d tests for %-15s, %4d errors found. Avg cyc %.1f\n",
                  test_count, mut, pi[mut].err_count,
                pi[mut].seq ? real'(pi[mut].cyc_tot) / test_count : 1
                );
   end
endmodule
cadence