/// EE 4755 - Digital Design Using HDLs // // Classroom demo code for content-addressable memory. // // Time-stamp: <3 December 2014, 17:45:46 CST, koppel@sky.ece.lsu.edu> /// Content-Addressable Memory // // A device that retrieves data using a key. // The key can be any k-bit quantity. // // The difference between a RAM and a CAM: // // Suppose both devices have 2**n locations. // For the CAM it only makes sense that k > n (otherwise use a RAM). // // The RAM will have an n-bit address input, and // the RAM can simultaneously hold 2**n elements, one for // each possible n-bit address. // // A RAM typically has READ and WRITE operations, these operate on // a particular location (the one specified by the address). // // The CAM will have a k-bit key input. The CAM // can hold at most 2**n elements. // // A CAM has INSERT, READ, and DELETE operations (others are // possible). The key itself does not specify the location to // access, instead the CAM looks for the location in which the // key is stored. If the key is found a READ and DELETE operation // can be performed. If the key is not found an INSERT operation // can add the key to some existing location. // // A storage location in a RAM only holds just the data, say d bits. // // A storage location in a CAM holds the data, the key, and // a full bit indicating that the location is occupied, totaling // d + k + 1 bits. /// The Modules in This File // // These modules are intended to demonstrate features of Verilog // and digital design. // // They are NOT intended to show the best way to design CAMs. // // Some modules may lack features, such as reset. // // If you need a CAM for real applications, look elsewhere. ////////////////////////////////////////////////////////////////////////////// /// Simple RAM Module // // A conventional memory module. // // Provides 2**awid storage locations of dwid bits each. // // Lookup is performed using an integer address. // // This should be used for small amounts of storage. // // For larger amounts of storage consider: // - A component provided in the target technology library. // - A connection to off-chip storage. // // Pedagogical Purpose: // To emphasize the difference between integer-addressed storage (this RAM) // and content-addressed storage (the CAM modules described below). module our_ram #( int awid = 5, int dwid = 15, int size = 1 << awid ) ( output [dwid:1] out_data, input [awid:1] addr, input [dwid:1] in_data, input write, clk ); logic [dwid:1] storage[size]; assign out_data = storage[ addr ]; always @( posedge clk ) if ( write ) storage[ addr ] <= in_data; endmodule ////////////////////////////////////////////////////////////////////////////// /// Common Definitions // // The enumeration below is used by several CAM modules. // // :sv: 6.19 Description of enumerations. /// Type for CAM Commands // typedef enum logic [1:0] { CMD_INS, CMD_RD, CMD_DEL, CMD_NOP } Cam_Command; // // For synthesis and simulation purposes this type is equivalent to a // two-bit logic object. // // But for humans, there is the added convenience of the associated // enumeration constants, CMD_INS, CMD_RD, etc. // // These constants are automatically assigned values starting at 0. // // It's possible to explicitly assign values to them. ////////////////////////////////////////////////////////////////////////////// /// Simple CAM Module // /// Specifications // // Number of Storage Locations : ssize : N = 2**n // Size of Key : kwid : k // Size of Data Item : dwid : d // // Can store N different elements regardless of the key values. // // /// Hardware Cost Highlights: // // Amount of storage: ssize * ( kwid + dwid + 1 ) bits. // : N ( k + d + 1 ) bits // Number of key comparison units: ssize = N module cam #( int kwid = 10, // Key Width int dwid = 10, // Data Width int ssize = 16, // Storage Size (Number of locations) int ssize_lg = $clog2(ssize) ) ( output [dwid:1] out_data, output out_valid, output ready, input [kwid:1] key, input [dwid:1] in_data, input Cam_Command cmd, input clk); logic [dwid:1] storage_data [ssize]; logic [kwid:1] storage_key [ssize]; logic [ssize-1:0] storage_full; logic [ssize_lg:1] midx, eidx; logic mmatch, ematch; // cadence translate_off initial for ( int i=0; i<ssize; i++ ) storage_full[i] = 0; initial begin if ( ssize > ( 1 << kwid ) ) begin $display("Size of cam too large for key size."); $fatal(1); end end // cadence translate_on /// Hardware to Find Matching Key // always_comb begin mmatch = 0; // A match was found. midx = 0; // [Key] Match Index for ( int i=0; i<ssize; i++ ) if ( storage_full[i] && storage_key[i] == key ) begin mmatch = 1; midx = i; end end assign out_data = storage_data[midx]; assign out_valid = cmd == CMD_RD && mmatch || cmd == CMD_INS && ( mmatch || ematch ); /// Hardware to Find Empty Location // always_comb begin ematch = 0; // An empty location was found. eidx = 0; // Empty location index. for ( int i=0; i<ssize; i++ ) if ( !storage_full[i] ) begin ematch = 1; eidx = i; end end // Everything takes one cycle. assign ready = 1; /// Hardware to Add or Remove Items // always @( posedge clk ) case ( cmd ) CMD_DEL: if ( mmatch ) storage_full[midx] = 0; CMD_INS: if ( ematch && !mmatch ) begin storage_full[eidx] = 1; storage_data[eidx] = in_data; storage_key[eidx] = key; end else if ( mmatch ) begin storage_data[midx] = in_data; end default:; endcase endmodule ////////////////////////////////////////////////////////////////////////////// /// Hash Module // module hash #(int key_bits = 8, int hash_bits = 2) (output logic [hash_bits-1:0] hash, input [key_bits-1:0] key); always_comb begin hash = 0; for ( int i=0; i < key_bits; i += hash_bits ) hash ^= key >> i; end endmodule ////////////////////////////////////////////////////////////////////////////// /// Hash CAM // /// Motivation // // Reduce the number of comparison units. // A hash function determines which set to place a key in. // The number of comparisons is equal to the set size. // // /// Specifications // // Number of Storage Locations : ssize : N = 2**n // Size of Key : kwid : k // Size of Data Item : dwid : d // Size of Hash (bits) : num_sets_lg : m // Number of Sets : 2 ** num_sets_lg : M = 2**m // // Can store UP TO N different elements. // // At most N/M elements with the same hash can be simultaneously stored. // // /// Hardware Cost Highlights: // // Amount of storage: ssize * ( kwid + dwid + 1 ) bits. // = N ( k + d + 1 ) bits // // Number of key comparison units: ssize * ( 2 ** -num_sets_lg ) // = N / M = 2 ** ( n - m ) // // Interesting Values: // // m = 0: // Identical to CAM module. // Does not need a hash unit. // // m = n: // Uses just one key comparison unit. // No two elements can have the same hash. // /// Design Considerations // // Larger m, lower cost. // Smaller m, higher occupancy. // This module cannot be used // /// Timing // // Command, key, and data provided by end of one clock cycle, // meaning just before the positive edge. // // Result available by next clock cycle. // For CMD_RD and CMD_DEL, result is whether key was found // (out_valid), and if so, data. // For CMD_INS, result is whether our not input data was written. // /// Known Issue // // There is no way to reset this module. This makes it unsuitable // for production use but very suitable for a future homework // assignment. module cam_hash #( int kwid = 9, int dwid = 10, int ssize = 16, int num_sets_lg = 1, bit iterate = 0 ) ( output [dwid:1] out_data, output out_valid, output ready, input [kwid:1] in_key, input [dwid:1] in_data, input Cam_Command in_cmd, input clk); localparam int num_sets = 1 << num_sets_lg; localparam int set_size = ssize >> num_sets_lg; localparam int set_size_lg = $clog2(set_size); logic [dwid:1] storage_data [num_sets][set_size]; logic [kwid:1] storage_key [num_sets][set_size]; logic [0:num_sets-1][0:set_size-1] storage_full; logic [set_size_lg:1] midx, eidx; logic mmatch, ematch; logic [kwid:1] b_key; logic [dwid:1] b_data; Cam_Command b_cmd; // cadence translate_off initial for ( int j=0; j<num_sets; j++ ) for ( int i=0; i<set_size; i++ ) storage_full[j][i] = 0; initial begin if ( ssize > ( 1 << kwid ) ) begin $display("Size of cam too large for key size."); $fatal(1); end end // cadence translate_on /// Remember command, key, and data for use in next cycle. // always_ff @( posedge clk ) begin b_key <= in_key; b_data <= in_data; b_cmd <= in_cmd; end wire [(num_sets_lg?num_sets_lg-1:0):0] hashed_key; /// Hardware to compute hash of key. // generate if ( num_sets_lg > 0 ) hash #(kwid,num_sets_lg) our_hash_module(hashed_key,b_key); else assign hashed_key = 0; endgenerate /// Hardware to find matching key. // // Note: In part this describes a priority encoder. We // expect that the synthesis program will generate a log N/M depth // circuit for this common component. // always_comb begin mmatch = 0; // Whether a match was found. midx = 0; // Match index. for ( int i=0; i<set_size; i++ ) if ( storage_full[hashed_key][i] && storage_key[hashed_key][i] == b_key ) begin mmatch = 1; midx = i; end end assign out_data = storage_data[hashed_key][midx]; assign out_valid = mmatch && ( b_cmd == CMD_RD || b_cmd == CMD_DEL ) || ( mmatch || ematch ) && b_cmd == CMD_INS; assign ready = 1; /// Hardware to find unoccupied location. // always_comb begin ematch = 0; // Whether or not an empty location was found. eidx = 0; // Empty index. for ( int i=0; i<set_size; i++ ) if ( !storage_full[hashed_key][i] ) begin ematch = 1; eidx = i; end end /// Hardware to update contents. // always @( posedge clk ) case ( b_cmd ) CMD_DEL: if ( mmatch ) storage_full[hashed_key][midx] <= 0; CMD_INS: if ( ematch && !mmatch ) begin storage_full[hashed_key][eidx] <= 1; storage_data[hashed_key][eidx] <= b_data; storage_key[hashed_key][eidx] <= b_key; end else if ( mmatch ) begin storage_data[hashed_key][midx] <= b_data; end default:; endcase endmodule ////////////////////////////////////////////////////////////////////////////// /// Multi-Cycle CAM Module // /// Motivation // // The cam_hash module can reject an insert command even // though it has space. (Because all M locations at the hashed // key value are full.) // // In this module perform a search over several clock cycles. // The search starts in a set determined by a hash of the key, call // that the *preferred set*. // If necessary, the search continues in subsequent sets. // // This allows us to achieve full occupancy without requiring // N key comparison units. // // The downside is that the worst-case time for operations is M cycles. // /// Specifications // // Number of Storage Locations : ssize : N = 2**n // Size of Key : kwid : k // Size of Data Item : dwid : d // Size of Hash (bits) : num_sets_lg : m // Number of Sets : 2 ** num_sets_lg : M = 2**m // // Can store N different elements regardless of the key values. // // Number of cycles to read an item range from 1 to M. // // Items added when occupancy was low can be found quickly. // // /// Hardware Cost Highlights: // // Amount of storage: ssize * ( kwid + dwid + 1 ) bits. // = N ( k + d + 1 ) bits // // Number of key comparison units: ssize * ( 2 ** -num_sets_lg ) // = N / M = 2 ** ( n - m ) // /// Known Issues // // Note: These may be fixed in future assignments. // // There is no way to reset this module. This makes it unsuitable // for production use but very suitable for a future homework // assignment. // // No attempt is made to avoid poor performance. For example, // after a delete no attempt is made to move another element into // the vacated location. // // The hash module is on the critical path. If multi-cycle operations // are expected this wastes time. module cam_multicycle #( int kwid = 9, int dwid = 10, int ssize = 16, int num_sets_lg = 1, bit use_hash = 1 ) ( output [dwid:1] out_data, output out_valid, output ready, input [kwid:1] in_key, input [dwid:1] in_data, input Cam_Command in_cmd, input clk); localparam int num_sets = 1 << num_sets_lg; localparam int set_size = ssize >> num_sets_lg; localparam int set_size_lg = $clog2(set_size); logic [dwid:1] storage_data [num_sets][set_size]; logic [kwid:1] storage_key [num_sets][set_size]; logic [0:num_sets-1][0:set_size-1] storage_full; logic [set_size_lg:1] midx, eidx; logic mmatch, ematch; logic [num_sets_lg:1] iter; logic [kwid:1] b_key; // b is for buffered. logic [dwid:1] b_data; Cam_Command b_cmd; logic [set_size_lg:1] b_eidx; logic [num_sets_lg:1] b_sidx; logic b_eidx_full; wire [(num_sets_lg?num_sets_lg-1:0):0] hashed_key; // cadence translate_off initial begin for ( int j=0; j<num_sets; j++ ) for ( int i=0; i<set_size; i++ ) storage_full[j][i] = 0; iter = num_sets - 1; end initial begin if ( ssize > ( 1 << kwid ) ) begin $display("Size of cam too large for key size."); $fatal(1); end end // cadence translate_on wire start = ready && in_cmd != CMD_NOP; /// Remember command, key, and data for use in next cycle. // always @( posedge clk ) if ( start ) begin b_key <= in_key; b_data <= in_data; b_cmd <= in_cmd; iter <= 0; end else begin iter <= ready ? iter : iter + 1; end /// Hardware For Computing Hash of Key // generate if ( use_hash && num_sets_lg > 0 ) hash #(kwid,num_sets_lg) our_hash_module(hashed_key,b_key); else assign hashed_key = 0; endgenerate wire [num_sets_lg:1] set_idx = hashed_key ^ iter; /// Find Matching Key Hardware // always_comb begin mmatch = 0; midx = 0; for ( int i=0; i<set_size; i++ ) if ( storage_full[set_idx][i] && storage_key[set_idx][i] == b_key ) begin mmatch = 1; midx = i; end end wire found = ( b_cmd != CMD_NOP ) && mmatch; wire last_iter = iter == num_sets - 1; assign ready = found || last_iter; assign out_data = storage_data[set_idx][midx]; /// Find Empty Location Hardware // always_comb begin ematch = 0; eidx = 0; for ( int i=0; i<set_size; i++ ) if ( !storage_full[set_idx][i] ) begin ematch = 1; eidx = i; end end /// Hardware to Store Location of First Empty Location // wire [set_size_lg:1] b_eidx_next = b_eidx_full ? b_eidx : eidx; wire [num_sets_lg:1] b_sidx_next = b_eidx_full ? b_sidx : set_idx; wire b_eidx_full_next = b_eidx_full || ematch; always_ff @( posedge clk ) begin if ( !b_eidx_full ) begin b_eidx <= b_eidx_next; b_sidx <= b_sidx_next; end b_eidx_full <= b_eidx_full_next && !start; end assign out_valid = ( b_cmd == CMD_RD || b_cmd == CMD_DEL ) && mmatch || b_cmd == CMD_INS && ( mmatch || b_eidx_full_next ); /// Hardware to Update Contents // always @( posedge clk ) begin if ( found ) case ( b_cmd ) CMD_DEL: storage_full[set_idx][midx] <= 0; CMD_INS: storage_data[set_idx][midx] <= b_data; default:; endcase else if ( last_iter && b_eidx_full_next ) case ( b_cmd ) CMD_INS: begin storage_key[b_sidx_next][b_eidx_next] <= b_key; storage_full[b_sidx_next][b_eidx_next] <= 1; storage_data[b_sidx_next][b_eidx_next] <= b_data; end default:; endcase end endmodule ////////////////////////////////////////////////////////////////////////////// /// Testbench // cadence translate_off module test_cam(); localparam int kwid = 16; localparam int dwid = 20; localparam int ssize_lg = 5; localparam int ssize = 1 << ssize_lg; localparam int num_sets_lg = 4; localparam int num_sets = 1 << num_sets_lg; localparam int idx_bits = $clog2(ssize); localparam int num_pairs = ssize + 5; // localparam int num_pairs = 2; localparam int num_tests = 40000; localparam bit trace = 0; localparam logic [kwid:1] trace_key = 'hx; wire [dwid:1] out_data; wire out_valid, ready; logic [kwid:1] key; logic [dwid:1] in_data; Cam_Command cmd; int cycles; logic clk; cam_hash #(kwid,dwid,ssize,num_sets_lg) c1 (out_data, out_valid, ready, key, in_data, cmd, clk); // cam #(kwid,dwid,ssize) c1(out_data, out_valid, ready, key, in_data, cmd, clk); // our_ram #($clog2(ssize),dwid) r1(out_datax, addr, in_data, write, clk); initial begin clk = 0; cycles = 0; forever begin #1; if ( clk == 0 ) cycles <= cycles + 1; clk <= !clk; end end // Use a hash to remember which keys we have already used. bit keys[int]; // Array of structures holding information about a test input. // struct { logic [kwid:1] k; logic [dwid:1] d; bit in; } pairs[num_pairs]; initial begin #( num_tests * num_sets * 2 * 2 ); $display("Tests taking too long, stopping."); $finish(2); end initial begin logic [dwid:1] out_expect; logic valid_expect; automatic int derr_count = 0; automatic int verr_count = 0; automatic int ins_count = 0; automatic int ins_reject_occ_sum = 0; automatic int ins_reject_space = 0; automatic int ins_reject_full = 0; automatic int rd_in_cycles_sum = 0; automatic int rd_in_count = 0; automatic int del_count = 0; automatic int occ_expect = 0; $display("\n\n"); $display("Running testbench on module with n = %0d, m = %0d.\n", ssize_lg, num_sets_lg); // Initialize pairs array. // for ( int i=0; i<num_pairs; i++ ) begin // Choose a key that has not yet been used. // logic [kwid:1] k; do k = $random(); while ( keys.exists( k ) ); keys[k] = 1; pairs[i].k = k; pairs[i].d = $random(); pairs[i].in = 0; end cmd = CMD_NOP; repeat ( 2 ) @( negedge clk ); while ( !ready ) @( negedge clk ); for ( int i=0; i<num_tests; i++ ) begin automatic int cycles_start = cycles; // Choose a new test pattern. // automatic int idx = {$random} % num_pairs; // Apply a new test pattern. key = pairs[idx].k; if ( pairs[idx].in ) begin // Test pattern should be in CAM. valid_expect = 1; if ( {$random} % 16 < 15 ) begin // Try reading it using either read or insert. cmd = {$random} % 16 < 8 ? CMD_INS : CMD_RD; if ( cmd == CMD_INS ) begin pairs[idx].d++; out_expect = 'hx; end else begin out_expect = pairs[idx].d; end in_data = pairs[idx].d; end else begin // Try deleting it. cmd = CMD_DEL; occ_expect--; out_expect = 'hx; pairs[idx].in = 0; del_count++; end end else begin // Test pattern should not be in CAM, make sure it isn't. if ( {$random} % 16 < 8 ) begin cmd = CMD_RD; out_expect = 'hx; valid_expect = 0; end else begin cmd = CMD_INS; out_expect = 'hx; valid_expect = 'hx; in_data = pairs[idx].d; end end do @( negedge clk ); while ( !ready ); if ( cmd == CMD_RD && valid_expect === 1 ) begin rd_in_count++; rd_in_cycles_sum += cycles - cycles_start; end if ( ( trace || trace_key === key ) && ( derr_count + verr_count ) < 2 ) $display("Command %d, key %h, expect %h in data %h out_valid %d", cmd, key, out_expect, in_data, out_valid); if ( cmd == CMD_INS && !pairs[idx].in ) begin ins_count++; if ( out_valid ) begin occ_expect++; pairs[idx].in = 1; out_expect = 'hx; end else begin ins_reject_occ_sum += occ_expect; if ( occ_expect < ssize ) ins_reject_space++; else ins_reject_full++; out_expect = 'hx; end end // Check whether module output is correct. // if ( out_expect !== (dwid)'('hx) && out_data !== out_expect ) begin derr_count++; if ( derr_count < 20 ) $display(" i %4d %h != %h (correct) cmd %d key %h", i, out_data, out_expect, cmd, key); end if ( valid_expect !== 1'hx && valid_expect !== out_valid ) begin verr_count++; if ( verr_count < 20 ) $display(" i %4d %h != %h (correct) output valid bit cmd %d key %h", i, out_valid, valid_expect, cmd, key); end end $display("Done with %1d tests (%1d ins, %1d del), %1d+%1d = %1d errors.", num_tests, ins_count, del_count, derr_count, verr_count, derr_count + verr_count); $display("Insert rejected with space %.2f%%, when full %.2f%%", 100 * real'(ins_reject_space) / ins_count, 100 * real'(ins_reject_full) / ins_count); $display("Average occupancy at reject %.2f%%", 100.0 * real'(ins_reject_occ_sum) / ( ssize * ( ins_reject_space + ins_reject_full ))); $display("Avg read time when in: %.2f cyc", real'(rd_in_cycles_sum) / rd_in_count); $display("\n\n"); $finish(2); end endmodule // cadence translate_on // Effort high // Module Name Area Clock Total // Period Delay // cam_kwid8_dwid8_ssize8 129584 1753 1753 // cam_kwid8_dwid8_ssize16 244720 2364 2364 // cam_kwid8_dwid8_ssize32 502496 2487 2487 // cam_kwid8_dwid8_ssize64 991624 3278 3278 // Effort medium // Module Name Area Clock Total // Period Delay // cam_kwid8_dwid8_ssize8 123212 1970 1970 // cam_kwid8_dwid8_ssize16 233852 2486 2486 // cam_kwid8_dwid8_ssize32 484936 2596 2596 // cam_kwid8_dwid8_ssize64 950576 3813 3813 // Module Name Area Clock Total // Period Delay // our_ram_awid3_dwid8 79712 736 736 // our_ram_awid4_dwid8 160636 739 739 // our_ram_awid5_dwid8 319616 758 758 // our_ram_awid6_dwid8 641920 734 734