////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2023 Homework 3 -- SOLUTION
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2023/hw03.pdf

`default_nettype none

  //////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
  ///  Complete perm so that it permutes its inputs and computes the next perm num.
 ///
//
//     [✔] Only modify perm.
//     [✔] perm must be recursive.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] Module must be synthesizable. Use command: genus -files syn.tcl
//
//     [✔] Don't assume any particular parameter values.
//
//     [✔] Pay attention to cost. Don't grossly oversize things.
//     [✔] Code must be written clearly.


module perm
  #( int w = 8, n = 20, dw = $clog2(n) )
   ( output uwire [w-1:0] pdata_out[n],
     output uwire [dw-1:0] pnum_out[n],
     output uwire carry_out,
     input uwire [w-1:0] pdata_in[n],
     input uwire [dw-1:0] pnum_in[n] );

   /// SOLUTION

   if ( n == 1 ) begin

      /// SOLUTION -- Problem 1a
      //
      // For n=1 the permutation is always identity ..
      //  .. so the pdata out is set to pdata in ..
      //  .. the permutation number remains zero (it always is 0 at n=1) ..
      //  .. and the carry_out is set to 1.
      //
      assign pdata_out[0] = pdata_in[0];
      assign carry_out = 1;
      assign pnum_out[0] = 0;

   end else begin

      /// SOLUTION -- Problem 1a
      //
      //  Set pos to the position of the element to be moved.
      //
      uwire [dw-1:0] pos = n - 1 - pnum_in[n-1];
      //
      //  Copy the element at position pos to position n-1 in the output.
      //
      assign pdata_out[n-1] = pdata_in[pos];
      //
      //  Prepare an array of n-1 elements and set to ..
      //  .. the elements of pdata_in except for the element at pos.
      //
      uwire [w-1:0] prdata_in[n-1];
      for ( genvar i=0; i<n-1; i++ )
        assign prdata_in[i] =  i < pos  ?  pdata_in[i]  :  pdata_in[i+1];

      uwire co;
      perm #(w,n-1,dw) rp( pdata_out[0:n-2], pnum_out[0:n-2], co,
                           prdata_in, pnum_in[0:n-2] );

      /// SOLUTION -- Problem 1b
      //
      //  Compute a tentative next value of digit n-1.
      //
      uwire [dw-1:0] dnext = pnum_in[n-1] + co;
      //
      //  Determine whether there is a carry.
      //
      assign carry_out = dnext >= n;
      //
      //  Set the next value of digit n-1 based on whether there is a carry.
      //
      assign pnum_out[n-1] = carry_out ? 0 : dnext;

   end

endmodule


module perm_behavioral
  #( int w = 8, n = 20, dw = $clog2(n) )
   ( output logic [w-1:0] pdata_out[n],
     output logic [dw-1:0] pnum_out[n],
     output logic carry_out,
     input uwire [w-1:0] pdata_in[n],
     input uwire [dw-1:0] pnum_in[n] );

   /// DO NOT modify this module.  The testbench uses it.

   always_comb begin
      // Permute values

      pdata_out = pdata_in;

      for ( int i=n-1; i>0; i-- ) begin
         automatic logic [dw-1:0] pos = i-pnum_in[i];
         automatic logic [w-1:0] x = pdata_out[pos];
         for ( int j=pos; j<i; j++ ) pdata_out[j] = pdata_out[j+1];
         pdata_out[i] = x;
      end
   end

   /// DO NOT modify this module.  The testbench uses it.

   always_comb begin
      // Compute next permutation number.

      carry_out = 1;

      for ( int i=0; i<n; i++ ) begin
         automatic int radix = i + 1;
         automatic logic [dw:0] next_val = pnum_in[i] + carry_out;
         if ( next_val < radix ) begin
            pnum_out[i] = next_val;
            carry_out = 0;
         end else begin
            pnum_out[i] = 0;
         end
      end
   end

   /// DO NOT modify this module.  The testbench uses it.

endmodule

//////////////////////////////////////////////////////////////////////////////
/// Testbench Code


// cadence translate_off

function int char_or_q(int c);
      return c >= "a" && c <= "z" ? c : "?";
endfunction

module testbench;
   localparam int npsets = 4; // This MUST be set to the size of pset.
   // { w n }
   localparam int pset[npsets][2] =
              '{
                { 8, 3 },
                { 7, 4 },
                { 8, 8 },
                { 8, 10 } };

   logic d[npsets:-1]; // Start / Done signals.

   int t_errs_v[npsets];
   int t_errs_i[npsets];
   int t_n_tests[npsets];

   int t_errs;     // Total number of errors.
   initial begin
      t_errs = 0;
      for ( int i=0; i<npsets; i++ ) begin
         t_errs_v[i] = -1;
         t_errs_i[i] = -1;
         t_n_tests[i] = -1;
      end
      d[-1] = 1;

      wait( d[npsets-1] );

      for ( int p=0; p<npsets; p++ )
        $write("End of tests n=%2d, %0d perm errors, %0d next idx errors for %0d tests.\n",
               pset[p][1], t_errs_v[p], t_errs_i[p], t_n_tests[p]);

   end

   for ( genvar p=0; p<npsets; p++ ) begin
      testbench_n #( .w(pset[p][0]), .n(pset[p][1]), .idx(p) )
      tb( .done(d[p]), .tstart(d[p-1]) );
   end

endmodule

module testbench_n
  #( int w=8, n=3, idx=0 )
   ( output logic done, input uwire tstart );

   localparam int dw = $clog2(n);
   localparam int max_tests = 1000;

   uwire [w-1:0] p_out[n], pb_out[n];
   uwire [dw-1:0] i_out[n], ib_out[n];
   uwire co, cob;
   logic [w-1:0] p_in[n];
   logic [dw-1:0] i_in[n];

   perm_behavioral #(w,n,dw) pb( pb_out, ib_out, cob, p_in, i_in );
   perm #(w,n,dw) pmut( p_out, i_out, co, p_in, i_in );

   initial begin

      automatic int n_v_err = 0, n_i_err = 0;
      automatic longint nfact = 1;
      automatic int run_curr = 0;
      int n_tests;
      int run_length; // Number of consecutive permutations.
      for ( int i=2; i<=n; i++ ) nfact *= i;

      n_tests = nfact <= max_tests ? nfact : max_tests;
      run_length = n_tests >= nfact ? n_tests : 4;

      for ( int i=0; i<n; i++ ) p_in[i] = "a" + n - i - 1;
      for ( int i=0; i<n; i++ ) i_in[i] = 0;

      wait( tstart );

      $write("Starting tests for w=%0d, n=%0d\n",w,n);

      for ( int i=0; i<n_tests; i++ ) begin

         automatic int tn_v_err = 0, tn_i_err = 0;
         bit show_v_err, show_i_err, show_trace;

         #1;

         for ( int j=0; j<n; j++ ) if ( p_out[j] !== pb_out[j] ) tn_v_err++;
         for ( int j=0; j<n; j++ ) if ( i_out[j] !== ib_out[j] ) tn_i_err++;

         if ( tn_v_err ) n_v_err++;
         if ( tn_i_err ) n_i_err++;
         show_trace = i < 10;
         show_v_err = tn_v_err && n_v_err < 5;
         show_i_err = tn_i_err && n_i_err < 5;

         if ( show_v_err || show_i_err || show_trace ) begin

            if ( tn_v_err ) $write("Error in permutation: ");
            else $write("Trace of permutation: ");
            for ( int j=n-1; j>=0; j-- ) $write("%1d ", i_in[j]);
            $write(" -> ");
            for ( int j=n-1; j>=0; j-- ) $write("%c ", char_or_q(p_out[j]));
            if ( tn_v_err ) begin
               $write( "  !=  ");
               for ( int j=n-1; j>=0; j-- ) $write("%c ", pb_out[j]);
               $write( " (correct)");
            end
            $write("\n");

            if ( show_i_err ) begin

            if ( tn_i_err ) $write("Error in next index:  ");
            else $write("Trace of next index:  ");
            for ( int j=n-1; j>=0; j-- ) $write("%1d ", i_in[j]);
            $write(" -> ");
            for ( int j=n-1; j>=0; j-- ) $write("%h ", i_out[j]);
            if ( tn_i_err ) begin
               $write( "  !=  ");
               for ( int j=n-1; j>=0; j-- ) $write("%h ", ib_out[j]);
               $write( " (correct)");
            end
            $write("\n");
            end

         end

         if ( run_curr >= run_length ) begin

            run_curr = 0;
            for ( int j=1; j<n; j++ ) i_in[j] = {$random()} % (j+1);

         end else begin

            run_curr++;
            i_in = ib_out;

         end

      end

      $write("Finished with n=%0d, %0d perm errors, %0d next idx errors in %0d tests.\n",
             n, n_v_err, n_i_err, n_tests);

      testbench.t_errs_v[idx] = n_v_err;
      testbench.t_errs_i[idx] = n_i_err;
      testbench.t_n_tests[idx] = n_tests;

      done = 1;

   end

endmodule

// cadence translate_on