////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2025 Homework 5
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2025/hw05.pdf

 /// Instructions:
  //
  // (1) Find the undergraduate workstation laboratory, room 2241 Patrick
  //     F. Taylor Hall.
  //
  // (2) Locate your account.  If you did not get an account please
  //     E-mail: koppel@ece.lsu.edu
  //
  // (3) Log in to a Linux workstation.
  //
  // (4) If you haven't already, follow the account setup instructions here:
  //     https://www.ece.lsu.edu/koppel/v/proc.html
  //
  // (5) Copy this assignment, local path name
  //     /home/faculty/koppel/pub/ee4755/hw/2025/hw05
  //     to a directory ~/hw05 in your class account. (~ is your home
  //     directory.) Use this file for your solution.
  ///      BE SURE THAT YOUR FILE IS CORRECTLY NAMED AND IN THE RIGHT PLACE.
  //
  // (6) Find the problems in this file and solve them.
  //
  //     Your entire solution should be in this file.
  //
  //     Do not change module names.
  //
  // (7) Your solution will automatically be copied from your account by
  //     the TA-bot.
  //

 /// Additional Resources
  //
  // Verilog Documentation
  //    The Verilog Standard
  //      https://ieeexplore.ieee.org/document/10458102
  //    Introductory Treatment (Warning: Does not include SystemVerilog)
  //      Brown & Vranesic, Fundamentals of Digital Logic with Verilog, 3rd Ed.
  //
  // Account Setup and Emacs (Text Editor) Instructions
  //      https://www.ece.lsu.edu/koppel/v/proc.html
  //      To learn Emacs look for Emacs tutorial.
  //



`default_nettype none

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
  /// Complete best_rot_pipe
  //  
//
//     [ ] Complete best_rot_pipe so that it computes the correct outputs.
//     [ ] The module must be pipelined and should have a low critical path.
//     [ ] Instantiate as many pop modules as needed.
//
//     [ ] Make sure that the testbench does not report errors.
//     [ ] Do not assume particular parameter values.
//     [ ] The module must be synthesizable.
//
//     [ ] Only modify best_rot_pipe.
//     [ ] Code must be written clearly.
//     [ ] Pay attention to cost and performance.


module best_rot_pipe
    #( int wv = 17, wp = $clog2(wv+1) )
   ( output logic [wp-1:0] pos,
     output logic [wp-1:0] dif,
     output logic ready,
     input uwire [wv-1:0] val, key,
     input uwire start, clk );

   // Put Homework solution in this module.

   logic [31:0] watch_value;

   logic [wv:0] pl_occ;

   assign ready = pl_occ[wv];

   pop #(wv+1,32) p( watch_value, pl_occ );

   always_ff @( posedge clk ) begin

      pl_occ[0] <= start;

      for ( int stage=0; stage<wv; stage++ ) begin

         pl_occ[stage+1] <= pl_occ[stage];

      end

   end

endmodule



module best_rot_seq
  #( int wv = 17, wp = $clog2(wv+1) )
   ( output logic [wp-1:0] pos,
     output logic [wp-1:0] dif,
     output logic ready,
     input uwire [wv-1:0] val, key,
     input uwire start, clk );

   // Do not modify this module.

   logic [31:0] watch_value;

   logic [wv-1:0] val_rot;
   logic [wp-1:0] pos_curr;
   uwire last_pos = pos_curr == wv-1;

   assign watch_value = pos_curr;

   always_ff @( posedge clk )
     begin
        val_rot <= start ? val : { val_rot[0], val_rot[wv-1:1] };
        pos_curr <= start ? 0 : last_pos ? pos_curr : pos_curr + 1;
        ready <= start ? 0 : last_pos;
     end

   uwire [wp-1:0] dif_here;
   pop #(wv,wp) p( dif_here, val_rot ^ key );

   uwire new_low = dif_here < dif;

   always_ff @( posedge clk )
     if ( start ) begin

        dif <= wv;
        pos <= 0;

     end else if ( new_low ) begin

        dif <= dif_here;
        pos <= pos_curr;

     end

endmodule

module pop
  #( int w = 16, wp = $clog2(w+1) )
   ( output uwire [wp-1:0] p,
     input uwire [w-1:0] v );

   // Do not modify this module.

   if ( w == 1 ) begin

      assign p = v[0];

   end else begin

      localparam int wlo = w/2;
      localparam int whi = w - wlo;
      localparam int wphi = $clog2(whi+1);
      uwire [wphi-1:0] plo,phi;

      pop #(wlo,wphi)  pilo( plo, v[wlo-1:0] );
      pop #(whi,wphi)  pihi( phi, v[w-1:wlo] );
      assign p = plo + phi;
   end

endmodule


module best_rot_procedural
  #( int wv = 17, wp = $clog2(wv+1) )
   ( output logic [wp-1:0] pos,
     output logic [wp-1:0] dif,
     input uwire [wv-1:0] val, key );

   int watch_value;

   always_comb begin

      dif = wv + 1;
      pos = 0;

      for ( int pos_look=0; pos_look<wv; pos_look++ ) begin
         logic [wp:0] pos_dif;
         pos_dif = 0;
         for ( int i=0; i<wv; i++ )
           if ( key[i] != val[(pos_look + i)%wv] ) pos_dif++;
         if ( pos_dif < dif ) begin
            dif = pos_dif;
            pos = pos_look;
         end
      end

   end

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
// It is okay to modify the testbench code to facilitate the coding
// and debugging of your modules. Keep in mind that your submission
// will be tested using a different testbench, so no one will be
// accused of dishonesty for modifying the testbench below. If you do
// modify the testbench be sure to make sure all of the original tests
// are performed to make sure that your code passes the original
// testbench.


// cadence translate_off

program reactivate
   (output uwire clk_reactive, output int cycle_reactive,
    input uwire clk, input var int cycle);
   assign clk_reactive = clk;
   assign cycle_reactive = cycle;
endprogram


// Module names. (Used by the testbench.)
//
typedef enum { M_proc, M_seq, M_pipe } M_Type;

module testbench;

   localparam int npsets = 3; // This MUST be set to the size of pset.
   localparam int pset[npsets][1] =
              '{ { 4 }, { 8 }, { 20 } };

   `ifdef xxx
   localparam int nmsets = 3;
   localparam M_Type mset[3] = '{ M_seq, M_proc, M_pipe };
   `else
   localparam int nmsets = 1;
   localparam M_Type mset[1] = '{ M_pipe };
   `endif

   string mtype_str[M_Type] =
          '{ M_proc: "best_rot_proc",
             M_seq: "best_rot_seq", M_pipe: "best_rot_pipe" };
   string mtype_abbr[M_Type] =
          '{ M_proc: "proc", M_seq: "seq", M_pipe: "pipe" };

   int t_errs_each_pos[M_Type][int];
   int t_errs_each_dif[M_Type][int];
   int t_errs_pos, t_errs_dif;

   localparam int nsets = npsets * nmsets;

   logic d[nsets:-1]; // Start / Done signals.

   initial begin
      t_errs_dif = 0;
      t_errs_pos = 0;
      for ( int m=0; m<nmsets; m++ )
        for ( int i=0; i<npsets; i++ ) begin
           automatic int n = pset[i][0];
           t_errs_each_pos[mset[m]][n] = 0;
           t_errs_each_dif[mset[m]][n] = 0;
        end

      d[-1] = 1;
   end

   final begin
      for ( int mi=0; mi<nmsets; mi++ )
        for ( int i=0; i<npsets; i++ ) begin
           automatic M_Type m = mset[mi];
           automatic int n = pset[i][0];
           $write("Total %s n=%2d: Errors: %0d pos, %0d dif.\n",
                  mtype_str[m], n,
                  t_errs_each_pos[m][n],
                  t_errs_each_dif[m][n]);
           t_errs_pos += t_errs_each_pos[m][n];
           t_errs_dif += t_errs_each_dif[m][n];
        end

      $write("Grand Total Errors:  %0d pos, %0d dif.\n",
             t_errs_pos, t_errs_dif);

   end

   for ( genvar m=0; m<nmsets; m++ )
     for ( genvar i=0; i<npsets; i++ ) begin
        localparam int idx = m * npsets + i;
        testbench_n
          #( .w(pset[i][0]), .mtype(mset[m]) )
        t2( .done(d[idx]), .tstart(d[idx-1]) );
     end

endmodule

module testbench_n
  #( int w = 6, M_Type mtype = M_proc )
   ( output logic done, input uwire tstart );

   localparam int wv = w;
   localparam int wp = $clog2(wv+1);

   localparam int n_tests = 1000;
   localparam int cyc_max = n_tests * w * 2;

   int seed;
   initial seed = 475505;

   function automatic bit rand_bern( int period );
      rand_bern = $dist_uniform(seed,1,period) == 1;
   endfunction

   function automatic int rand_n( int n );
      rand_n = $dist_uniform(seed,0,n-1);
   endfunction

   function automatic logic [wv-1:0] rand_wht( int wht );
      logic [wv-1:0] val = ( 1 << wht ) - 1;
      for ( int i=0; i<wv; i++ ) begin
         int j = rand_n(wv);
         logic [1:0] b = val[j]; val[j] = val[i]; val[i] = b;
      end
      rand_wht = val;
   endfunction

   bit clk;
   int cycle, cycle_limit;
   logic clk_reactive;
   int cycle_reactive;
   reactivate ra(clk_reactive,cycle_reactive,clk,cycle);

   string trace_lines[$];

   initial begin
      clk = 0;
      cycle = 0;

      done = 0;
      cycle_limit = cyc_max;
      wait( tstart );

      fork
         while ( !done ) #5 cycle += ++clk;
         wait( cycle >= cycle_limit ) begin
           $write("Exit from clock loop at cycle %0d, limit %0d.  %s\n",
                  cycle, cycle_limit, "** CYCLE LIMIT EXCEEDED **");
            foreach ( trace_lines[i] ) $write(trace_lines[i]);
            testbench.t_errs_each_pos[mtype][w] = n_tests;
            testbench.t_errs_each_dif[mtype][w] = n_tests;
         end
      join_any;

      done = 1;
   end

   uwire [wp-1:0] pos, dif;
   logic [wv-1:0] val, key, valp, keyp;
   uwire ready;
   logic start, startp, use_others;

   case ( mtype )
     M_proc: begin:gen_m
        best_rot_procedural #(wv) best_rot(pos,dif,val,key);
     end
     M_seq: begin:gen_m
        best_rot_seq #(wv,wp) best_rot(pos,dif,ready,val,key,start,clk);
     end
     M_pipe: begin:gen_m
       best_rot_pipe #(wv,wp) best_rot(pos,dif,ready,valp,keyp,startp,clk);
     end
   endcase

   string trace_prefix, trace_heading;

   function string fmtvk( logic [wv-1:0] val );
      automatic string txt = $sformatf( wv <= 8 ? "%b" : "%h",val);
      automatic int len = txt.len();
      fmtvk = len < 3 ? { {(3-len){" "}}, txt } : txt;
   endfunction
   function string fmtvkh( string heading );
      localparam int len = wv <= 8 ? wv : (wv+3)/4;
      fmtvkh = heading.len() >= len ? heading
        : { {(len-heading.len()){" "}}, heading };
   endfunction
   function string fmtl( logic [wp-1:0] a );
      fmtl = $isunknown(a) ? "  x" : $sformatf("%3d",a);
   endfunction

   initial begin

      while ( !done ) @( posedge clk_reactive ) begin
         automatic int latency = mtype == M_proc ? 0 : wv;
         string entry;
         trace_prefix =
           $sformatf("Tr %4d %s  %s  %s",
                     cycle, fmtvk(valp), fmtvk(keyp),
                     start ? "S" : "_" );
         entry =
           $sformatf("%s  Mod Out %4d --> %s  %s  %s   %5h\n",
                     trace_prefix,
                     cycle - latency,
                     ready === 1 ? "R" : ready === 0 ? "_" : "X",
                     fmtl(pos), fmtl(dif), gen_m.best_rot.watch_value);
         if ( trace_lines.size() > 20 ) trace_lines.delete(10);
         trace_lines.push_back(entry);
      end
   end

   initial begin

      while ( !done ) @( negedge clk_reactive )

         if ( use_others ) begin

            valp = val;
            keyp = key;
            startp = start;
            use_others = 0;

         end else begin

            valp = cycle;
            keyp = {$random};
            startp = {$random}%2;

         end

   end

   initial begin

      automatic int n_err_pos = 0, n_err_dif = 0, n_tests_completed = 0;
      automatic string prefix_txt =
        $sformatf("%s w=%0d",testbench.mtype_str[mtype],w);

      trace_heading =
        $sformatf("   %4s %s  %s  S           Cyc-In  R  Pos  Dif   Debug Val\n",
                  "Cyc", fmtvkh("Val"), fmtvkh("Key"));

      wait ( tstart );

      $write("Starting tests for %s.\n", prefix_txt);
      @( negedge clk );

      // Force a reset.
      start = 1;
      val = 0;  key = 0;
      @( negedge clk );
      start = 0;
      @( negedge clk );
      if ( mtype != M_proc ) wait( ready );
      @( negedge clk );

      for ( int i=0; i<n_tests; i++ ) begin

         localparam int n_tests_phase = 1;
         automatic int target_pos = rand_n(wv);
         automatic logic [wp-1:0] shadow_dif = wv + 2;
         automatic int shadow_dif_at_pos = wv + 2;
         automatic int start_cycle = cycle + 1; // Start at next + edge.
         automatic int eta_cycle = start_cycle + wv;
         automatic int phase = i / n_tests_phase;
         automatic int wht_val = phase < 1 ? 1 : phase < 2 ? 2 :
           1 + rand_n(wv);
         automatic int wht_key = phase < 1 ? 1 : phase < 2 ? 2 :
           1 + rand_n(wv);
         bit show_sample;
         int eta;
         bit err_pos, err_dif, show_err_pos, show_err_dif;
         string err_pos_txt, err_dif_txt;
         logic [wp-1:0] shadow_pos;
         logic [wv*2-1:0] valval;

         val = rand_wht(wht_val);
         valval = {val,val};
         key = rand_bern(2) ? valval[ target_pos +: wv ] : rand_wht(wht_key);
         start = 1;
         use_others = 1;
         trace_lines.delete();

         @( negedge clk );
         start = 0;

         case ( mtype )
           M_proc:;
           M_seq:
             begin
                while ( ready === 1 ) @( negedge clk );
                while ( ready !== 1 ) @( negedge clk );
             end
           M_pipe:begin
              while ( cycle < eta_cycle ) @( negedge clk );
           end
         endcase

         for ( int p=0; p<wv; p++ ) begin
            automatic int dif_here = $countones( valval[p+:wv] ^ key );
            if ( dif_here >= shadow_dif ) continue;
            shadow_dif = dif_here;
            shadow_pos = p;
         end

         shadow_dif_at_pos = $countones( valval[pos+:wv] ^ key );

         err_pos = pos !== shadow_pos;
         err_dif = dif !== shadow_dif_at_pos;

         show_err_pos = err_pos && n_err_pos++ < 5;
         show_err_dif = err_dif && n_err_dif++ < 5;

         n_tests_completed++;

         show_sample =
           mtype == M_pipe && n_tests_completed < 0
             || show_err_pos || show_err_dif;

         if ( show_sample ) begin
            if ( mtype != M_proc || i == 0 )
              $write(trace_heading);
            foreach ( trace_lines[i] ) $write(trace_lines[i]);
            trace_lines.delete();
         end

         err_pos_txt = 1 || err_pos ? fmtl(shadow_pos) : "   ";
         err_dif_txt = 1 || err_dif ? fmtl(shadow_dif) : "   ";

         if ( show_sample )
           begin
              $write("Cr %4d %s  %s     Cor Out %4d --> %s  %s  %s \n",
                     cycle, fmtvkh(" "), fmtvkh(" "),
                     start_cycle,
                     "R", err_pos_txt, err_dif_txt);
              //  $write(trace_heading);
           end

      end

      n_err_pos += n_tests - n_tests_completed;
      n_err_dif += n_tests - n_tests_completed;

      $write("Done with %0d tests for %s, %0d pos %0d dif errors.\n",
             n_tests_completed, prefix_txt, n_err_pos, n_err_dif);

      testbench.t_errs_each_pos[mtype][w] = n_err_pos;
      testbench.t_errs_each_dif[mtype][w] = n_err_dif;

      done = 1;

   end


endmodule


// cadence translate_on