hw02-sol.v

////////////////////////////////////////////////////////////////////////////////
//
/// LSU EE 4755 Fall 2025 Homework 2 -- SOLUTION
//

 /// Assignment  https://www.ece.lsu.edu/koppel/v/2025/hw02.pdf


`default_nettype none

localparam logic [1:0] incr = 2, decr = 1, equal = 0;

//////////////////////////////////////////////////////////////////////////////
///  Problem 1
//
  /// Complete is_sorted_to_iter.
  //  
//
//     [✔] Complete is_sorted_to_iter so that it computes the correct outputs.
//     [✔] It CAN NOT use expressions such as a[i] < a[i+1], 
//     [✔] .. instead it must use the outputs of ist_compare instantiations.
//     [✔] Use a generate loop to instantiate enough ist_compare instances.
//     [✔] The module can use procedural code, but not for comparisons.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] The module must be synthesizable.
//
//     [✔] Code must be written clearly.
//     [✔] Pay attention to cost and performance.


module ist_compare
  #( int w = 15 )
   ( output uwire [1:0] gl, input uwire [w-1:0] a, b );
   assign gl = { a < b, a > b };
   // Do not modify this module.
endmodule

module is_sorted_to_iter_new
  #( int n = 12, int w = 32, int wi = $clog2(n) )
   ( output logic [wi-1:0] sidx, eidx,
     output logic [1:0] trend,
     input uwire [w-1:0] a[n-1:0] );

   /// SOLUTION -- Problem 1
   //
   //  This solution improves on is_sorted_to_proc.

   // Declare wires that will connect to ist_compare output ..
   //
   uwire [1:0] cmpa[n];
   //
   // .. and instantiate a compare module for each adjacent pair of a ..
   //
   for ( genvar i=1; i<n; i++ )
     ist_compare #(w) lti( cmpa[i], a[i-1], a[i] );

   always_comb begin

      eidx = 0;
      sidx = n-1;
      trend = 0;

      for ( int i=1; i<n; i++ ) begin

         // Check whether there was a non-zero trend ..
         // .. and whether a[i-1] and a[i] have a non-zero trend ..
         // .. and if so whether the two are inconsistent (cmpa[i] != trend).
         //
         if ( trend && cmpa[i] && cmpa[i] != trend )
           begin
              // At this point trend correctly describes the elements
              // from a[0] to a[i-1], but a[i] would break the trend.
              // So set sidx to the last good index, i-1 ..
              sidx = i - 1;
              // .. and exit the loop ..
              break;
           end

         // At this point either trend == cmpa[i] or one or both of them
         // is zero.
         //
         if ( trend == 0 ) trend = cmpa[i];
         if ( trend == 0 ) eidx = i;

      end
   end

endmodule

module is_sorted_to_iter_alt
  #( int n = 12, int w = 32, int wi = $clog2(n) )
   ( output logic [wi-1:0] sidx, eidx,
     output logic [1:0] trend,
     input uwire [w-1:0] a[n-1:0] );

   /// SOLUTION -- Problem 1
   //
   //  This solution is based on is_sorted_to_proc.

   // Declare wires that will connect to ist_compare output ..
   //
   uwire [1:0] cmpa[n];
   //
   // .. and instantiate a compare module for each adjacent pair of a ..
   //
   for ( genvar i=1; i<n; i++ )
     ist_compare #(w) lti( cmpa[i], a[i-1], a[i] );

   always_comb begin

      eidx = 0;
      sidx = n-1;
      trend = 0;
      for ( int i=1; i<n; i++ ) begin

         // Check whether there was a non-zero trend and if it is now broken.
         //
         if ( trend && cmpa[i] && cmpa[i] != trend )
           begin
              // The length of the sorted prefix has been found, set sidx ..
              sidx = i - 1;
              // .. and exit the loop ..
              break;
           end

         trend |= cmpa[i];
         if ( trend == 0 ) eidx = i;
      end
   end

endmodule


module is_sorted_to_proc
  #( int n = 12, int w = 32, int wi = $clog2(n) )
   ( output logic [wi-1:0] sidx, eidx,
     output logic [1:0] trend,
     input uwire [w-1:0] a[n-1:0] );

   // Do not modify this module.

   always_comb begin
      sidx = n-1;
      eidx = 0;
      trend = 0; // 2: increasing, 1: decreasing.
      for ( int i=1; i<n; i++ ) begin
         if ( a[i] > a[i-1] ) begin
            if ( trend == decr  ) begin sidx = i - 1; break; end
            trend = incr;
         end
         if ( a[i] < a[i-1] ) begin
            if ( trend == incr  ) begin sidx = i - 1; break; end
            trend = decr;
         end
         if ( trend == 0 ) eidx = i;
      end
   end

endmodule


//////////////////////////////////////////////////////////////////////////////
///  Problem 2
//
  /// Complete is_sorted_to_tree
  //  
//
//     [✔] Module is_sorted_to_tree must instantiate ist_compare somewhere.
//     [✔] Use recursion to form a tree.
//     [✔] Choose recursive instance sizes to simplify adders ..
//     [✔] .. for example, in expressions like " nlo + valhi ",
//     [✔] .. make constant nlo a power of 2.
//     [✔] The module can use procedural code, but not for comparisons.
//
//     [✔] Make sure that the testbench does not report errors.
//     [✔] The module must be synthesizable.
//
//     [✔] Pay attention to cost and performance.
//     [✔] Code must be written clearly.



module is_sorted_to_tree
  #( int n = 12, int w = 32, int wi = $clog2(n), bit pot = 1 )
   ( output logic [wi-1:0] sidx, eidx,
     output logic [1:0] trend,
     input uwire [w-1:0] a[n-1:0] );

   /// SOLUTION -- Problem 2

   if ( n == 1 ) begin

      // Base Case: One element.
      //
      assign sidx = 0;   // "Entire" sequence sorted.
      assign eidx = 0;   // "Entire" sequence equal.
      assign trend = 0;  // All elements (just one) equal.

   end else if ( n == 2 ) begin

      // Base Case: Two elements. Sorted (either ascending or descending).
      //
      assign sidx = 1;                // Entire sequence sorted.
      assign eidx = trend ? 0 : 1;
      ist_compare #(w) lti( trend, a[0], a[1] );

   end else begin

      // Choose nlo and nhi so that ..
      // .. nlo is a power of 2 (if pot is 1, n/2 otherwise) ..
      // .. and nlo + nhi = n.
      //
      // Connect the first nlo + 1 elements to the lo recursive instantiation ..
      // and nhi elements to the hi instantiation.
      //
      // Element a[nlo] is connected to *both* the lo and hi instances.
      // Doing so avoids the need to compare elements at this level.

      localparam int nlg = $clog2(n-1) - 1;
      localparam int nlo = pot ? 1 << nlg : n / 2;

      localparam int nhi = n - nlo;
      localparam int wilo = $clog2(nlo+1);
      localparam int wihi = $clog2(nhi);

      uwire [wilo-1:0] sidxlo, eilo;
      uwire [wihi-1:0] sidxhi, eihi;
      uwire [1:0] trlo, trhi;

      is_sorted_to_tree #(nlo+1,w,wilo,pot) islo(sidxlo, eilo, trlo, a[nlo:0] );
      is_sorted_to_tree #(nhi,w,wihi,pot) ishi(sidxhi, eihi, trhi, a[n-1:nlo] );

      always_comb begin

         if ( sidxlo != nlo ) begin

            // The sorted sequence is entirely within the lo instance.

            trend = trlo;
            eidx = eilo;
            sidx = sidxlo;

         end else if ( ( trlo | trhi ) == 2'b11 ) begin

            // The entire input to lo is sorted according to trlo, but
            // the trend of the hi instance is different (not just zero).
            //
            // Use trlo and eilo.
            //
            trend = trlo;
            eidx = eilo;
            //
            // Though the trend in hi is different than lo, it's possible
            // that the first few elements in hi are equal, and if so they
            // would be part of the sorted sequence.
            //
            sidx = nlo + eihi;

         end else begin

            // The entire input to lo is sorted according to trlo, and
            // trhi is compatible. (It is possible that trlo or trhi
            // or both are zero.)
            // 
            trend = trlo | trhi;
            //
            // If all elements in lo are equal, add on the number of
            // equal elements in hi.
            //
            eidx = trlo == 0 ? nlo + eihi : eilo;
            sidx = nlo + sidxhi;

         end

      end

   end

endmodule



//////////////////////////////////////////////////////////////////////////////
/// Testbench Code
//
// It is okay to modify the testbench code to facilitate the coding
// and debugging of your modules. Keep in mind that your submission
// will be tested using a different testbench, so no one will be
// accused of dishonesty for modifying the testbench below. If you do
// modify the testbench be sure to make sure all of the original tests
// are performed to make sure that your code passes the original
// testbench.


// cadence translate_off

// Module names. (Used by the testbench.)
//
typedef enum { M_proc, M_iter, M_tree } M_Type;

module testbench;

   localparam int npsets = 7; // This MUST be set to the size of pset.
   localparam int pset[npsets][2] =
              '{{ 4, 7 }, { 5, 8 }, { 6, 8 }, { 7, 8 },
                { 8, 8 }, { 9, 8 }, { 10, 9 } };

   localparam int nmsets = 3;
   localparam M_Type mset[3] = '{ M_iter, M_tree, M_proc };

   string mtype_str[M_Type] =
          '{ M_proc: "is_sorted_to_proc",
             M_iter: "is_sorted_to_iter", M_tree: "is_sorted_to_tree" };
   string mtype_abbr[M_Type] =
          '{ M_proc: "func", M_iter: "iter", M_tree: "tree" };

   int t_errs_each_tr[M_Type][int];
   int t_errs_each_sidx[M_Type][int];
   int t_errs_each_eidx[M_Type][int];
   int t_errs_tr, t_errs_sidx, t_errs_eidx;

   localparam int nsets = npsets * nmsets;

   logic d[nsets:-1]; // Start / Done signals.

   initial begin
      t_errs_tr = 0;
      t_errs_sidx = 0;
      t_errs_eidx = 0;
      for ( int m=0; m<nmsets; m++ )
        for ( int i=0; i<npsets; i++ ) begin
           automatic int n = pset[i][0];
           t_errs_each_tr[mset[m]][n] = 0;
           t_errs_each_sidx[mset[m]][n] = 0;
           t_errs_each_eidx[mset[m]][n] = 0;
        end

      d[-1] = 1;
   end

   final begin
      for ( int mi=0; mi<nmsets; mi++ )
        for ( int i=0; i<npsets; i++ ) begin
           automatic M_Type m = mset[mi];
           automatic int n = pset[i][0];
           $write("Total %s n=%2d: Errors: %0d trend, %0d sidx, %0d eidx.\n",
                  mtype_str[m], n,
                  t_errs_each_tr[m][n],
                  t_errs_each_sidx[m][n],
                  t_errs_each_eidx[m][n]);
           t_errs_tr += t_errs_each_tr[m][n];
           t_errs_sidx += t_errs_each_sidx[m][n];
           t_errs_eidx += t_errs_each_eidx[m][n];
        end

      $write("Grand Total Errors:  %0d trend, %0d sidx, %0d eidx.\n",
             t_errs_tr, t_errs_sidx, t_errs_eidx);

   end

   for ( genvar m=0; m<nmsets; m++ )
     for ( genvar i=0; i<npsets; i++ ) begin
        localparam int idx = m * npsets + i;
        testbench_n
          #( .n(pset[i][0]), .w(pset[i][1]), .mtype(mset[m]) )
        t2( .done(d[idx]), .tstart(d[idx-1]) );
     end

endmodule

module testbench_n
  #( int n = 6, w = 6, M_Type mtype = M_proc )
   ( output logic done, input uwire tstart );

   localparam int wi = $clog2(n);
   localparam int num_tests = 10000;

   logic [w-1:0] a[n-1:0];
   uwire [1:0] trend;
   uwire [wi-1:0] sidx, eidx;

   case ( mtype )
     M_proc:
       is_sorted_to_proc #(n,w) isi( sidx, eidx, trend, a );
     M_iter:
       is_sorted_to_iter #(n,w) isi( sidx, eidx, trend, a );
     M_tree:
       is_sorted_to_tree #(n,w) isi( sidx, eidx, trend, a );
   endcase

   int seed;
   initial seed = 47552025;

   function automatic bit rand_bern( int period );
      rand_bern = $dist_uniform(seed,1,period) == 1;
   endfunction

   function automatic int rand_n( int n );
      rand_n = $dist_uniform(seed,0,n-1);
   endfunction

   function automatic int rand_w( int w );
      rand_w = $dist_uniform(seed,0,2**w-1);
   endfunction


   initial begin

      automatic int n_err_trend =0, n_err_sidx = 0, n_err_eidx = 0;
      automatic string prefix_txt =
        $sformatf("%s n=%0d, w=%0d",testbench.mtype_str[mtype],n,w);
      automatic string prefix_abbr_txt =
        $sformatf("%s n=%0d, w=%0d",testbench.mtype_abbr[mtype],n,w);

      automatic string trend_str[int] =
        '{ 0: "equal", 2'b10: "increasing",
           2'b01: "decreasing", 2'b11: "Error" };
      automatic int n_samples_trend[int];

      localparam int n_samples = 2;

      bit show_sample;

      wait( tstart );

      $write("Starting tests for %s.\n", prefix_txt);

      for ( int i=0; i<num_tests; i++ ) begin

         bit err_trend, err_sidx, err_eidx;

         automatic int shadow_sidx = $dist_uniform(seed,1,n-1);
         automatic bit all_sorted = shadow_sidx == n - 1;

         automatic int n_distinct =
           all_sorted && rand_bern(5) ? 1 :
             shadow_sidx == 1 ? 2 : $dist_uniform(seed, 2, 2+shadow_sidx/2 );
         logic [1:0] shadow_trend;
         int shadow_eidx;
         bit all_equal;

         automatic int dist_elts[] = new [ n_distinct ];
         automatic int prefix[] = new [shadow_sidx+1];
         foreach ( dist_elts[i] ) dist_elts[i] = rand_w( w );
         // Make sure that there are at least two distinct elements.
         dist_elts.sort();
         if ( !all_sorted && dist_elts[0] == dist_elts[n_distinct-1] )
           dist_elts[0]++;
         all_equal = all_sorted && dist_elts[0] == dist_elts[n_distinct-1];
         shadow_trend = all_equal ? 0 : $dist_uniform(seed,1,2);
         foreach ( prefix[j] )
           prefix[j] = dist_elts[ j < n_distinct ? j : rand_n(n_distinct)];
         if ( shadow_trend == incr ) prefix.sort(); else prefix.rsort();
         foreach ( prefix[i] ) a[i] = prefix[i];
         if ( !all_sorted )
           a[shadow_sidx+1] = shadow_trend == incr
             ? a[shadow_sidx] - 1 : a[shadow_sidx] + 1;
         for ( int i=shadow_sidx+2; i<n; i++ ) a[i] = rand_w(w);

         for ( int j=0; j<n; j++ )
           if ( a[j] == a[0] ) shadow_eidx = j; else break;

         #1;

         err_trend = shadow_trend !== trend;
         err_sidx = shadow_sidx !== sidx;
         err_eidx = shadow_eidx !== eidx;
         if ( err_trend ) n_err_trend++;
         if ( err_sidx ) n_err_sidx++;
         if ( err_eidx ) n_err_eidx++;

         show_sample = 0;

         if ( err_trend && n_err_trend < 5 ) begin
            show_sample = 1;
            $write( "Error trend %b != %b (correct)\n", trend, shadow_trend);
         end
         if ( err_sidx && n_err_sidx < 5 ) begin
            show_sample = 1;
            $write( "Error sidx %0d != %0d (correct)\n", sidx, shadow_sidx);
         end
         if ( err_eidx && n_err_eidx < 5 ) begin
            show_sample = 1;
            $write( "Error eidx %0d != %0d (correct)\n", eidx, shadow_eidx);
         end

         if ( !show_sample && n_samples_trend[shadow_trend] < n_samples ) begin
            show_sample = 1;
            $write( "Output below is correct, it shows trend %2b -- %s.\n",
                    shadow_trend, trend_str[shadow_trend] );
         end

         if ( show_sample ) begin
            n_samples_trend[shadow_trend]++;
            for ( int j=0; j<n; j++ ) begin
               $write(" %2d%0s",
                      j, j == shadow_eidx && j == shadow_sidx ? "es" :
                      j == shadow_eidx ? "e" :
                      j == shadow_sidx ? "s" : " " );
            end
            $write("\n");
            for ( int j=0; j<n; j++ ) begin
               $write(" %3d",a[j]);
            end
            $write("\n");
         end

      end

      $write("Did %0d tests of %s. Errors %0d trend, %0d sidx, %0d eidx.\n",
             num_tests, prefix_abbr_txt, n_err_trend, n_err_sidx, n_err_eidx);

      testbench.t_errs_each_tr[mtype][n] = n_err_trend;
      testbench.t_errs_each_sidx[mtype][n] = n_err_sidx;
      testbench.t_errs_each_eidx[mtype][n] = n_err_eidx;

      done = 1;

   end

endmodule

// cadence translate_on