/// EE 4755 - Digital Design Using HDLs
//
//  Pipelining

//////////////////////////////////////////////////////////////////////////////
/// Pipelining Concept

 /// Pipelining Concept    <-- Very Important but Tricky Concept, Pay Attention!
 //
 //
 //  :Def: Pipelining
 //   Performing an operation in *stages* on multiple data items.
 //

// :Example:
//
// Output x has the value that input a had two cycles in the past.
//
module very_simple_pipe
  #( int w = 16 )
   ( output logic [w-1:0] x,
     input uwire [w-1:0] a,
     input uwire clk );

   logic [w-1:0] r;

   always_ff @( posedge clk ) r <= a;
   always_ff @( posedge clk ) x <= r;

endmodule

// :


// :Example:
//
// Pipeline that passes data through unchanged.
// Output x has the value that input a had nstages cycles in the past.
//
module simple_pipe2
  #( int w = 16, int nstages = 4 )
   ( output uwire [w-1:0] x,
     input uwire [w-1:0] a,
     input uwire clk );

   logic [w-1:0] r[nstages];

   always_ff @( posedge clk ) begin

      r[0] <= a; // Non-blocking assignment here, blocking in simple_pipe2_ba.
      for ( int i=1; i<nstages; i++ ) r[i] <= r[i-1];

   end

   assign x = r[nstages-1];

endmodule
//
 /// Important thing to notice:
 //
 //  At any moment the module holds the most recent nstages values of a.


// :



// :Example:
//
// The pipeline below, simple_pipe2_ba, is almost identical to the one above,
// simple_pipe2. The only difference is that here, r[0] is just wire
// whereas in simple_pipe2 r[0] is a register. The difference is due
// to the way in which r[0] is assigned.
//
// Output x has the value that input a had nstages-1 cycles in the past.
//
module simple_pipe2_ba
  #( int w = 16, int nstages = 4 )
   ( output uwire [w-1:0] x,
     input uwire [w-1:0] a,
     input uwire clk );

   logic [w-1:0] r[nstages];

   always_ff @( posedge clk ) begin

      r[0] = a; // Blocking assignment here, non-blocking in simple_pipe2.
      for ( int i=1; i<nstages; i++ ) r[i] <= r[i-1];

   end

   assign x = r[nstages-1];

endmodule

// :



// :Example:
//
// Compute a running average of data arriving one element per clock
// cycle. The pipeline holds recent values. A sum is computed by
// adding together the values in the pipeline stages each cycle.
//
 /// Warning: Uses more adders than are necessary.
//   See pipe_r_avg2, further below, for a version that uses fewer adders.
//
module simple_pipe_avg
  #( int w = 16, int nstages = 4 )
   ( output uwire [w-1:0] x,
     output logic [w-1:0] avg,
     input uwire [w-1:0] a,
     input uwire clk );

   logic [w-1:0] r[nstages];

   assign r[0] = a;

   always_ff @( posedge clk ) begin

      for ( int i=1; i<nstages; i++ ) r[i] <= r[i-1];

   end

   assign x = r[nstages-1];

   logic [w+$clog2(nstages):0] sum;

   always_comb begin

      sum = 0;
      for ( int i=0; i<nstages; i++ ) sum += r[i];
      avg = sum / nstages;

   end

endmodule

 /// Inferred Hardware for simple_pipe_avg

// 

// 


// :Example:
//
// Compute a running average of data arriving one element per clock
// cycle. Do so using just one adder and one subtractor.
//
// Based on 2021 Final Exam Problem 1
//
module pipe_r_avg2
  #( int w = 8, n_samples = 4 )
   ( output logic [w-1:0] r_avg,
     input uwire [w-1:0] sample,
     input uwire reset, clk );

   logic [w-1:0] samples[n_samples];

   parameter int wm = $clog2( n_samples );
   parameter int ws = w + wm;
   logic [ws-1:0] tot;

   always_ff @( posedge clk ) begin

      samples[0] <= sample;

      for ( int i=1; i<n_samples; i++ ) samples[i] <= samples[i-1];

      tot <= tot - samples[n_samples-1] + samples[0];

   end

   assign r_avg = tot / n_samples;

endmodule

//