/// EE 4755 - Digital Design Using HDLs
//
//  Classroom demo code for content-addressable memory.
//
// Time-stamp: <3 December 2014, 17:45:46 CST, koppel@sky.ece.lsu.edu>

 /// Content-Addressable Memory
//
//   A device that retrieves data using a key.
//   The key can be any k-bit quantity.
//
//   The difference between a RAM and a CAM:
//
//      Suppose both devices have 2**n locations.
//      For the CAM it only makes sense that k > n (otherwise use a RAM).
//
//      The RAM will have an n-bit address input, and
//      the RAM can simultaneously hold 2**n elements, one for
//      each possible n-bit address.
//
//      A RAM typically has READ and WRITE operations, these operate on
//      a particular location (the one specified by the address).
//
//      The CAM will have a k-bit key input.  The CAM
//      can hold at most 2**n elements.
//
//      A CAM has INSERT, READ, and DELETE operations (others are
//      possible). The key itself does not specify the location to
//      access, instead the CAM looks for the location in which the
//      key is stored. If the key is found a READ and DELETE operation
//      can be performed. If the key is not found an INSERT operation
//      can add the key to some existing location.
//
//      A storage location in a RAM only holds just the data, say d bits.
//
//      A storage location in a CAM holds the data, the key, and
//      a full bit indicating that the location is occupied, totaling
//      d + k + 1 bits.

 /// The Modules in This File
//
//   These modules are intended to demonstrate features of Verilog
//   and digital design.
//
//   They are NOT intended to show the best way to design CAMs.
//
//   Some modules may lack features, such as reset.
//
//   If you need a CAM for real applications, look elsewhere.



//////////////////////////////////////////////////////////////////////////////
/// Simple RAM Module
//
//  A conventional memory module.
//
//  Provides 2**awid storage locations of dwid bits each.
//
//  Lookup is performed using an integer address.
//
//  This should be used for small amounts of storage.
//
//  For larger amounts of storage consider:
//    - A component provided in the target technology library.
//    - A connection to off-chip storage.
//
//  Pedagogical Purpose:
//    To emphasize the difference between integer-addressed storage (this RAM)
//    and content-addressed storage (the CAM modules described below).

module our_ram
  #( int awid = 5, int dwid = 15, int size = 1 << awid )
   ( output [dwid:1] out_data,
     input [awid:1] addr,
     input [dwid:1] in_data,
     input write, clk );

   logic [dwid:1] storage[size];

   assign  out_data = storage[ addr ];

   always @( posedge clk ) if ( write ) storage[ addr ] <= in_data;

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Common Definitions
//
//  The enumeration below is used by several CAM modules.
//
//  :sv: 6.19 Description of enumerations.

 /// Type for CAM Commands
//
typedef enum logic [1:0] { CMD_INS, CMD_RD, CMD_DEL, CMD_NOP } Cam_Command;
//
//  For synthesis and simulation purposes this type is equivalent to a
//  two-bit logic object.
//
//  But for humans, there is the added convenience of the associated
//  enumeration constants, CMD_INS, CMD_RD, etc.
//
//  These constants are automatically assigned values starting at 0.
//
//  It's possible to explicitly assign values to them.


//////////////////////////////////////////////////////////////////////////////
/// Simple CAM Module
//
 /// Specifications
//
//   Number of Storage Locations : ssize            : N = 2**n
//   Size of Key                 : kwid             : k
//   Size of Data Item           : dwid             : d
//
//   Can store N different elements regardless of the key values.
//
//
 ///  Hardware Cost Highlights:
//
//    Amount of storage:  ssize * ( kwid + dwid + 1 ) bits.
//                     :  N ( k + d + 1 ) bits
//    Number of key comparison units: ssize = N

module cam
  #( int kwid = 10,                  // Key Width
     int dwid = 10,                  // Data Width
     int ssize = 16,                 // Storage Size (Number of locations)
     int ssize_lg = $clog2(ssize) )
   ( output [dwid:1] out_data,
     output out_valid,
     output ready,
     input [kwid:1] key,
     input [dwid:1] in_data,
     input Cam_Command cmd,
     input clk);

   logic [dwid:1] storage_data [ssize];
   logic [kwid:1] storage_key [ssize];
   logic [ssize-1:0] storage_full;

   logic [ssize_lg:1] midx, eidx;
   logic mmatch, ematch;

   // cadence translate_off
   initial for ( int i=0; i<ssize; i++ ) storage_full[i] = 0;

   initial begin
      if ( ssize > ( 1 << kwid ) ) begin
         $display("Size of cam too large for key size.");
         $fatal(1);
      end
   end
   // cadence translate_on

   /// Hardware to Find Matching Key
   //
   always_comb begin
      mmatch = 0;               // A match was found.
      midx = 0;                 // [Key] Match Index
      for ( int i=0; i<ssize; i++ )
        if ( storage_full[i] && storage_key[i] == key )
          begin mmatch = 1; midx = i; end
   end

   assign out_data = storage_data[midx];
   assign out_valid =
          cmd == CMD_RD  && mmatch ||
          cmd == CMD_INS && ( mmatch || ematch );

   /// Hardware to Find Empty Location
   //
   always_comb begin
      ematch = 0;               // An empty location was found.
      eidx = 0;                 // Empty location index.
      for ( int i=0; i<ssize; i++ )
        if ( !storage_full[i] )
          begin ematch = 1; eidx = i; end
   end

   // Everything takes one cycle.
   assign ready = 1;

   /// Hardware to Add or Remove Items
   //
   always @( posedge clk )
     case ( cmd )
       CMD_DEL: if ( mmatch ) storage_full[midx] = 0;
       CMD_INS:
         if ( ematch && !mmatch ) begin
            storage_full[eidx] = 1;
            storage_data[eidx] = in_data;
            storage_key[eidx] = key;
         end else if ( mmatch ) begin
            storage_data[midx] = in_data;
         end
       default:;
     endcase

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Hash Module
//

module hash #(int key_bits = 8, int hash_bits = 2)
   (output logic [hash_bits-1:0] hash, input [key_bits-1:0] key);

   always_comb begin
      hash = 0;
      for ( int i=0; i < key_bits; i += hash_bits ) hash ^= key >> i;
   end

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Hash CAM
//
 /// Motivation
//
//   Reduce the number of comparison units.
//   A hash function determines which set to place a key in.
//   The number of comparisons is equal to the set size.
//
//
 /// Specifications
//
//   Number of Storage Locations : ssize            : N = 2**n
//   Size of Key                 : kwid             : k
//   Size of Data Item           : dwid             : d
//   Size of Hash (bits)         : num_sets_lg      : m
//   Number of Sets              : 2 ** num_sets_lg : M = 2**m
//
//   Can store UP TO N different elements.
//
//   At most N/M elements with the same hash can be simultaneously stored.
//
//
 ///  Hardware Cost Highlights:
//
//    Amount of storage:  ssize * ( kwid + dwid + 1 ) bits.
//                        = N ( k + d + 1 ) bits
//
//    Number of key comparison units: ssize * ( 2 ** -num_sets_lg )
//                        = N / M = 2 ** ( n - m )
//
//    Interesting Values:
//
//       m = 0:
//         Identical to CAM module.
//         Does not need a hash unit.
//
//       m = n:
//         Uses just one key comparison unit.
//         No two elements can have the same hash.
//

 /// Design Considerations
//
//   Larger m, lower cost.
//   Smaller m, higher occupancy.
//   This module cannot be used
//
 /// Timing
//
//   Command, key, and data provided by end of one clock cycle,
//     meaning just before the positive edge.
//
//   Result available by next clock cycle.
//     For CMD_RD and CMD_DEL, result is whether key was found
//         (out_valid), and if so, data.
//     For CMD_INS, result is whether our not input data was written.
//
 /// Known Issue
//
//   There is no way to reset this module. This makes it unsuitable
//     for production use but very suitable for a future homework
//     assignment.


module cam_hash
  #( int kwid = 9,
     int dwid = 10,
     int ssize = 16,
     int num_sets_lg = 1,
     bit iterate = 0 )
   ( output [dwid:1] out_data,
     output out_valid,
     output ready,
     input [kwid:1] in_key,
     input [dwid:1] in_data,
     input Cam_Command in_cmd,
     input clk);

   localparam int num_sets = 1 << num_sets_lg;
   localparam int set_size = ssize >> num_sets_lg;
   localparam int set_size_lg = $clog2(set_size);

   logic [dwid:1] storage_data [num_sets][set_size];
   logic [kwid:1] storage_key [num_sets][set_size];
   logic [0:num_sets-1][0:set_size-1] storage_full;

   logic [set_size_lg:1] midx, eidx;
   logic mmatch, ematch;

   logic [kwid:1] b_key;
   logic [dwid:1] b_data;
   Cam_Command b_cmd;


   // cadence translate_off
   initial
   for ( int j=0; j<num_sets; j++ ) for ( int i=0; i<set_size; i++ )
   storage_full[j][i] = 0;

   initial begin
      if ( ssize > ( 1 << kwid ) ) begin
         $display("Size of cam too large for key size.");
         $fatal(1);
      end
   end
   // cadence translate_on


   /// Remember command, key, and data for use in next cycle.
   //
   always_ff @( posedge clk ) begin

      b_key <= in_key;
      b_data <= in_data;
      b_cmd <= in_cmd;

   end

   wire [(num_sets_lg?num_sets_lg-1:0):0] hashed_key;

   /// Hardware to compute hash of key.
   //
   generate
     if ( num_sets_lg > 0 )
       hash #(kwid,num_sets_lg) our_hash_module(hashed_key,b_key);
     else
       assign hashed_key = 0;
   endgenerate

   /// Hardware to find matching key.
   //
   //  Note: In part this describes a priority encoder. We
   //   expect that the synthesis program will generate a log N/M depth
   //   circuit for this common component.
   //
   always_comb begin
      mmatch = 0;               // Whether a match was found.
      midx = 0;                 // Match index.
      for ( int i=0; i<set_size; i++ )
        if ( storage_full[hashed_key][i]
             && storage_key[hashed_key][i] == b_key )
          begin mmatch = 1; midx = i; end
   end


   assign out_data = storage_data[hashed_key][midx];
   assign out_valid =
          mmatch && ( b_cmd == CMD_RD || b_cmd == CMD_DEL )
          || ( mmatch || ematch ) && b_cmd == CMD_INS;
   assign ready = 1;


   /// Hardware to find unoccupied location.
   //
   always_comb begin
      ematch = 0;               // Whether or not an empty location was found.
      eidx = 0;                 // Empty index.
      for ( int i=0; i<set_size; i++ )
        if ( !storage_full[hashed_key][i] )
          begin ematch = 1; eidx = i; end
   end

   /// Hardware to update contents.
   //
   always @( posedge clk )
     case ( b_cmd )
       CMD_DEL: if ( mmatch ) storage_full[hashed_key][midx] <= 0;
       CMD_INS: if ( ematch && !mmatch ) begin
          storage_full[hashed_key][eidx] <= 1;
          storage_data[hashed_key][eidx] <= b_data;
          storage_key[hashed_key][eidx] <= b_key;
       end else if ( mmatch ) begin
          storage_data[hashed_key][midx] <= b_data;
       end
       default:;
     endcase

endmodule


//////////////////////////////////////////////////////////////////////////////
/// Multi-Cycle CAM Module
//
 /// Motivation
//
//   The cam_hash module can reject an insert command even
//   though it has space. (Because all M locations at the hashed
//   key value are full.)
//
//   In this module perform a search over several clock cycles.
//     The search starts in a set determined by a hash of the key, call
//        that the *preferred set*.
//     If necessary, the search continues in subsequent sets.
//
//   This allows us to achieve full occupancy without requiring
//     N key comparison units.
//
//   The downside is that the worst-case time for operations is M cycles.
//

 /// Specifications
//
//   Number of Storage Locations : ssize            : N = 2**n
//   Size of Key                 : kwid             : k
//   Size of Data Item           : dwid             : d
//   Size of Hash (bits)         : num_sets_lg      : m
//   Number of Sets              : 2 ** num_sets_lg : M = 2**m
//
//   Can store N different elements regardless of the key values.
//
//   Number of cycles to read an item range from 1 to M.
//
//   Items added when occupancy was low can be found quickly.
//
//
 ///  Hardware Cost Highlights:
//
//    Amount of storage:  ssize * ( kwid + dwid + 1 ) bits.
//                        = N ( k + d + 1 ) bits
//
//    Number of key comparison units: ssize * ( 2 ** -num_sets_lg )
//                        = N / M = 2 ** ( n - m )
//
 /// Known Issues
//
//   Note: These may be fixed in future assignments.
//
//   There is no way to reset this module. This makes it unsuitable
//     for production use but very suitable for a future homework
//     assignment.
//
//   No attempt is made to avoid poor performance. For example,
//     after a delete no attempt is made to move another element into
//     the vacated location.
//
//   The hash module is on the critical path. If multi-cycle operations
//     are expected this wastes time.


module cam_multicycle
  #( int kwid = 9,
     int dwid = 10,
     int ssize = 16,
     int num_sets_lg = 1,
     bit use_hash = 1 )
   ( output [dwid:1] out_data,
     output out_valid,
     output ready,
     input [kwid:1] in_key,
     input [dwid:1] in_data,
     input Cam_Command in_cmd,
     input clk);

   localparam int num_sets = 1 << num_sets_lg;
   localparam int set_size = ssize >> num_sets_lg;
   localparam int set_size_lg = $clog2(set_size);

   logic [dwid:1] storage_data [num_sets][set_size];
   logic [kwid:1] storage_key [num_sets][set_size];
   logic [0:num_sets-1][0:set_size-1] storage_full;

   logic [set_size_lg:1] midx, eidx;
   logic mmatch, ematch;

   logic [num_sets_lg:1] iter;

   logic [kwid:1] b_key;  // b is for buffered.
   logic [dwid:1] b_data;
   Cam_Command b_cmd;

   logic [set_size_lg:1] b_eidx;
   logic [num_sets_lg:1] b_sidx;
   logic b_eidx_full;

   wire [(num_sets_lg?num_sets_lg-1:0):0] hashed_key;

   // cadence translate_off
   initial begin
      for ( int j=0; j<num_sets; j++ ) for ( int i=0; i<set_size; i++ )
        storage_full[j][i] = 0;
      iter = num_sets - 1;
   end

   initial begin
      if ( ssize > ( 1 << kwid ) ) begin
         $display("Size of cam too large for key size.");
         $fatal(1);
      end
   end
   // cadence translate_on

   wire    start = ready && in_cmd != CMD_NOP;

   /// Remember command, key, and data for use in next cycle.
   //
   always @( posedge clk ) if ( start ) begin

      b_key <= in_key;
      b_data <= in_data;
      b_cmd <= in_cmd;
      iter <= 0;

   end else begin

      iter <= ready ? iter : iter + 1;

   end

   /// Hardware For Computing Hash of Key
   //
   generate
     if ( use_hash && num_sets_lg > 0 )
       hash #(kwid,num_sets_lg) our_hash_module(hashed_key,b_key);
     else
       assign hashed_key = 0;
   endgenerate

   wire [num_sets_lg:1] set_idx = hashed_key ^ iter;


   /// Find Matching Key Hardware
   //
   always_comb begin
      mmatch = 0;
      midx = 0;
      for ( int i=0; i<set_size; i++ )
        if ( storage_full[set_idx][i] && storage_key[set_idx][i] == b_key )
          begin mmatch = 1; midx = i; end
   end

   wire found = ( b_cmd != CMD_NOP ) && mmatch;
   wire last_iter = iter == num_sets - 1;
   assign ready = found || last_iter;

   assign out_data = storage_data[set_idx][midx];


   /// Find Empty Location Hardware
   //
   always_comb begin
      ematch = 0;
      eidx = 0;
      for ( int i=0; i<set_size; i++ )
        if ( !storage_full[set_idx][i] ) begin ematch = 1; eidx = i; end
   end

   /// Hardware to Store Location of First Empty Location
   //
   wire [set_size_lg:1] b_eidx_next = b_eidx_full ? b_eidx : eidx;
   wire [num_sets_lg:1] b_sidx_next = b_eidx_full ? b_sidx : set_idx;
   wire                 b_eidx_full_next = b_eidx_full || ematch;

   always_ff @( posedge clk ) begin

      if ( !b_eidx_full ) begin
         b_eidx <= b_eidx_next;
         b_sidx <= b_sidx_next;
      end

      b_eidx_full <= b_eidx_full_next && !start;

   end

   assign out_valid =
          (  b_cmd == CMD_RD || b_cmd == CMD_DEL ) && mmatch
          || b_cmd == CMD_INS && ( mmatch || b_eidx_full_next );


   /// Hardware to Update Contents
   //
   always @( posedge clk ) begin

       if ( found )
         case ( b_cmd )
           CMD_DEL: storage_full[set_idx][midx] <= 0;
           CMD_INS: storage_data[set_idx][midx] <= b_data;
           default:;
         endcase
       else if ( last_iter && b_eidx_full_next )
         case ( b_cmd )
           CMD_INS: begin
              storage_key[b_sidx_next][b_eidx_next] <= b_key;
              storage_full[b_sidx_next][b_eidx_next] <= 1;
              storage_data[b_sidx_next][b_eidx_next] <= b_data;
           end
           default:;
         endcase

   end

endmodule



//////////////////////////////////////////////////////////////////////////////
/// Testbench

// cadence translate_off

module test_cam();

   localparam int kwid = 16;
   localparam int dwid = 20;
   localparam int ssize_lg = 5;
   localparam int ssize = 1 << ssize_lg;
   localparam int num_sets_lg = 4;
   localparam int num_sets = 1 << num_sets_lg;
   localparam int idx_bits = $clog2(ssize);

   localparam int num_pairs = ssize + 5;
   //  localparam int num_pairs = 2;
   localparam int num_tests = 40000;
   localparam bit trace = 0;
   localparam logic [kwid:1] trace_key = 'hx;

   wire [dwid:1] out_data;
   wire          out_valid, ready;
   logic [kwid:1] key;
   logic [dwid:1] in_data;
   Cam_Command cmd;
   int cycles;
   logic clk;

   cam_hash #(kwid,dwid,ssize,num_sets_lg) c1
     (out_data, out_valid, ready, key, in_data, cmd, clk);
   //  cam #(kwid,dwid,ssize) c1(out_data, out_valid, ready, key, in_data, cmd, clk);
   //  our_ram #($clog2(ssize),dwid) r1(out_datax, addr, in_data, write, clk);

   initial begin
      clk = 0; cycles = 0;
      forever begin
         #1;
         if ( clk == 0 ) cycles <= cycles + 1;
         clk <= !clk;
      end
   end

   // Use a hash to remember which keys we have already used.
   bit keys[int];

   // Array of structures holding information about a test input.
   //
   struct { logic [kwid:1] k; logic [dwid:1] d; bit in; } pairs[num_pairs];

   initial begin
      #( num_tests * num_sets * 2 * 2 );
      $display("Tests taking too long, stopping.");
      $finish(2);
   end

   initial begin

      logic [dwid:1] out_expect;
      logic valid_expect;
      automatic int derr_count = 0;
      automatic int verr_count = 0;
      automatic int ins_count = 0;
      automatic int ins_reject_occ_sum = 0;
      automatic int ins_reject_space = 0;
      automatic int ins_reject_full = 0;
      automatic int rd_in_cycles_sum = 0;
      automatic int rd_in_count = 0;
      automatic int del_count = 0;
      automatic int occ_expect = 0;

      $display("\n\n");
      $display("Running testbench on module with n = %0d, m = %0d.\n",
               ssize_lg, num_sets_lg);

      // Initialize pairs array.
      //
      for ( int i=0; i<num_pairs; i++ ) begin

         // Choose a key that has not yet been used.
         //
         logic [kwid:1] k;
         do k = $random(); while ( keys.exists( k ) );
         keys[k] = 1;

         pairs[i].k = k;
         pairs[i].d = $random();
         pairs[i].in = 0;

      end

      cmd = CMD_NOP;

      repeat ( 2 ) @( negedge clk );

      while ( !ready ) @( negedge clk );

      for ( int i=0; i<num_tests; i++ ) begin

         automatic int cycles_start = cycles;

         // Choose a new test pattern.
         //
         automatic int idx = {$random} % num_pairs;

         // Apply a new test pattern.

         key = pairs[idx].k;

         if ( pairs[idx].in ) begin

            // Test pattern should be in CAM.
            valid_expect = 1;

            if ( {$random} % 16 < 15 ) begin

               // Try reading it using either read or insert.

               cmd = {$random} % 16 < 8 ? CMD_INS : CMD_RD;

               if ( cmd == CMD_INS ) begin
                  pairs[idx].d++;
                  out_expect = 'hx;
               end else begin
                  out_expect = pairs[idx].d;
               end

               in_data = pairs[idx].d;

            end else begin

               // Try deleting it.

               cmd = CMD_DEL;
               occ_expect--;
               out_expect = 'hx;
               pairs[idx].in = 0;
               del_count++;

            end

         end else begin

            // Test pattern should not be in CAM, make sure it isn't.

            if ( {$random} % 16 < 8 ) begin

               cmd = CMD_RD;
               out_expect = 'hx;
               valid_expect = 0;

            end else begin

               cmd = CMD_INS;
               out_expect = 'hx;
               valid_expect = 'hx;
               in_data = pairs[idx].d;

            end

         end

         do @( negedge clk ); while ( !ready );

         if ( cmd == CMD_RD && valid_expect === 1 )
           begin
              rd_in_count++;
              rd_in_cycles_sum += cycles - cycles_start;
           end

         if ( ( trace || trace_key === key )
              && ( derr_count + verr_count ) < 2 )
           $display("Command %d, key %h, expect %h  in data %h  out_valid %d",
                    cmd, key, out_expect, in_data, out_valid);

         if ( cmd == CMD_INS && !pairs[idx].in )
           begin
              ins_count++;
              if ( out_valid ) begin
                 occ_expect++;
                 pairs[idx].in = 1;
                 out_expect = 'hx;
              end else begin
                 ins_reject_occ_sum += occ_expect;
                 if ( occ_expect < ssize )
                   ins_reject_space++;
                 else
                   ins_reject_full++;
                 out_expect = 'hx;
              end
           end

         // Check whether module output is correct.
         //
         if ( out_expect !== (dwid)'('hx) && out_data !== out_expect ) begin
            derr_count++;
            if ( derr_count < 20 )
              $display(" i %4d  %h != %h (correct)  cmd %d  key %h",
                       i, out_data, out_expect, cmd, key);

         end
         if ( valid_expect !== 1'hx && valid_expect !== out_valid ) begin
            verr_count++;
            if ( verr_count < 20 )
              $display(" i %4d  %h != %h (correct) output valid bit cmd %d key %h",
                       i, out_valid, valid_expect, cmd, key);

         end

      end

      $display("Done with %1d tests (%1d ins, %1d del), %1d+%1d = %1d errors.",
               num_tests, ins_count, del_count,
               derr_count, verr_count, derr_count + verr_count);
      $display("Insert rejected with space %.2f%%,  when full %.2f%%",
               100 * real'(ins_reject_space) / ins_count,
               100 * real'(ins_reject_full) / ins_count);
      $display("Average occupancy at reject %.2f%%",
               100.0 * real'(ins_reject_occ_sum)
               / ( ssize * ( ins_reject_space + ins_reject_full )));
      $display("Avg read time when in: %.2f cyc",
               real'(rd_in_cycles_sum) / rd_in_count);

      $display("\n\n");

      $finish(2);

   end

endmodule


// cadence translate_on



// Effort high
// Module Name                             Area   Clock    Total
//                                               Period    Delay
// cam_kwid8_dwid8_ssize8                129584    1753     1753
// cam_kwid8_dwid8_ssize16               244720    2364     2364
// cam_kwid8_dwid8_ssize32               502496    2487     2487
// cam_kwid8_dwid8_ssize64               991624    3278     3278

// Effort medium
// Module Name                             Area   Clock    Total
//                                               Period    Delay
// cam_kwid8_dwid8_ssize8                123212    1970     1970
// cam_kwid8_dwid8_ssize16               233852    2486     2486
// cam_kwid8_dwid8_ssize32               484936    2596     2596
// cam_kwid8_dwid8_ssize64               950576    3813     3813


// Module Name                             Area   Clock    Total
//                                               Period    Delay
// our_ram_awid3_dwid8                    79712     736      736
// our_ram_awid4_dwid8                   160636     739      739
// our_ram_awid5_dwid8                   319616     758      758
// our_ram_awid6_dwid8                   641920     734      734