Project Full coverage report
Current view: Cores-VeeR-EL2—Cores-VeeR-EL2—design—lsu—el2_lsu_stbuf.sv Coverage Hit Total
Test Date: 21-11-2024 Toggle 80.0% 60 75
Test: all Branch 100.0% 22 22

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : // Copyright 2020 Western Digital Corporation or its affiliates.
       3              : //
       4              : // Licensed under the Apache License, Version 2.0 (the "License");
       5              : // you may not use this file except in compliance with the License.
       6              : // You may obtain a copy of the License at
       7              : //
       8              : // http://www.apache.org/licenses/LICENSE-2.0
       9              : //
      10              : // Unless required by applicable law or agreed to in writing, software
      11              : // distributed under the License is distributed on an "AS IS" BASIS,
      12              : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      13              : // See the License for the specific language governing permissions and
      14              : // limitations under the License.
      15              : 
      16              : //********************************************************************************
      17              : // $Id$
      18              : //
      19              : //
      20              : // Owner:
      21              : // Function: Store Buffer
      22              : // Comments: Dual writes and single drain
      23              : //
      24              : //
      25              : // DC1 -> DC2 -> DC3 -> DC4 (Commit)
      26              : //
      27              : // //********************************************************************************
      28              : 
      29              : 
      30              : module el2_lsu_stbuf
      31              : import el2_pkg::*;
      32              : #(
      33              : `include "el2_param.vh"
      34              :  )
      35              : (
      36     69840565 :    input logic                           clk,                         // core clock
      37          338 :    input logic                           rst_l,                       // reset
      38              : 
      39     69840565 :    input logic                           lsu_stbuf_c1_clk,            // stbuf clock
      40     69840565 :    input logic                           lsu_free_c2_clk,             // free clk
      41              : 
      42              :    // Store Buffer input
      43       258932 :    input logic                           store_stbuf_reqvld_r,        // core instruction goes to stbuf
      44      2285226 :    input logic                           lsu_commit_r,                // lsu commits
      45      2282093 :    input logic                           dec_lsu_valid_raw_d,         // Speculative decode valid
      46         1717 :    input logic [pt.DCCM_DATA_WIDTH-1:0]  store_data_hi_r,             // merged data from the dccm for stores. This is used for fwding
      47        92714 :    input logic [pt.DCCM_DATA_WIDTH-1:0]  store_data_lo_r,             // merged data from the dccm for stores. This is used for fwding
      48         1921 :    input logic [pt.DCCM_DATA_WIDTH-1:0]  store_datafn_hi_r,           // merged data from the dccm for stores
      49        92714 :    input logic [pt.DCCM_DATA_WIDTH-1:0]  store_datafn_lo_r,           // merged data from the dccm for stores
      50              : 
      51              :    // Store Buffer output
      52       258448 :    output logic                          stbuf_reqvld_any,            // stbuf is draining
      53            0 :    output logic                          stbuf_reqvld_flushed_any,    // Top entry is flushed
      54        18811 :    output logic [pt.LSU_SB_BITS-1:0]     stbuf_addr_any,              // address
      55         7576 :    output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any,              // stbuf data
      56              : 
      57       262890 :    input  logic                          lsu_stbuf_commit_any,        // pop the stbuf as it commite
      58        10324 :    output logic                          lsu_stbuf_full_any,          // stbuf is full
      59       258787 :    output logic                          lsu_stbuf_empty_any,         // stbuf is empty
      60       258932 :    output logic                          ldst_stbuf_reqvld_r,         // needed for clocking
      61              : 
      62       470956 :    input logic [pt.LSU_SB_BITS-1:0]      lsu_addr_d,                  // lsu address D-stage
      63       593286 :    input logic [31:0]                    lsu_addr_m,                  // lsu address M-stage
      64       593285 :    input logic [31:0]                    lsu_addr_r,                  // lsu address R-stage
      65              : 
      66       677351 :    input logic [pt.LSU_SB_BITS-1:0]      end_addr_d,                  // lsu end address D-stage - needed to check unaligned
      67       593686 :    input logic [31:0]                    end_addr_m,                  // lsu end address M-stage - needed to check unaligned
      68       593685 :    input logic [31:0]                    end_addr_r,                  // lsu end address R-stage - needed to check unaligned
      69              : 
      70        36584 :    input logic                           ldst_dual_d, ldst_dual_m, ldst_dual_r,
      71       614422 :    input logic                           addr_in_dccm_m,              // address is in dccm
      72       614422 :    input logic                           addr_in_dccm_r,              // address is in dccm
      73              : 
      74              :    // Forwarding signals
      75       614442 :    input logic                           lsu_cmpen_m,                 // needed for forwarding stbuf - load
      76       477824 :    input el2_lsu_pkt_t                  lsu_pkt_m,                   // LSU packet M-stage
      77       477821 :    input el2_lsu_pkt_t                  lsu_pkt_r,                   // LSU packet R-stage
      78              : 
      79         5218 :    output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m,          // stbuf data
      80         4892 :    output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m,          // stbuf data
      81         4200 :    output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m,        // stbuf data
      82         4202 :    output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m,        // stbuf data
      83              : 
      84              :    // Excluding scan_mode from coverage as its usage is determined by the integrator of the VeeR core.
      85              :    /*verilator coverage_off*/
      86              :    input  logic       scan_mode                                       // Scan mode
      87              :    /*verilator coverage_on*/
      88              : 
      89              : );
      90              : 
      91              : 
      92              :    localparam DEPTH      = pt.LSU_STBUF_DEPTH;
      93              :    localparam DATA_WIDTH = pt.DCCM_DATA_WIDTH;
      94              :    localparam BYTE_WIDTH = pt.DCCM_BYTE_WIDTH;
      95              :    localparam DEPTH_LOG2 = $clog2(DEPTH);
      96              : 
      97              :    // These are the fields in the store queue
      98        85454 :    logic [DEPTH-1:0]                     stbuf_vld;
      99            0 :    logic [DEPTH-1:0]                     stbuf_dma_kill;
     100         1563 :    logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr;
     101        72904 :    logic [DEPTH-1:0][BYTE_WIDTH-1:0]     stbuf_byteen;
     102         2718 :    logic [DEPTH-1:0][DATA_WIDTH-1:0]     stbuf_data;
     103              : 
     104        85499 :    logic [DEPTH-1:0]                     sel_lo;
     105        85454 :    logic [DEPTH-1:0]                     stbuf_wr_en;
     106            0 :    logic [DEPTH-1:0]                     stbuf_dma_kill_en;
     107        85454 :    logic [DEPTH-1:0]                     stbuf_reset;
     108       654608 :    logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin;
     109        15495 :    logic [DEPTH-1:0][DATA_WIDTH-1:0]     stbuf_datain;
     110        73705 :    logic [DEPTH-1:0][BYTE_WIDTH-1:0]     stbuf_byteenin;
     111              : 
     112            0 :    logic [7:0]             store_byteen_ext_r;
     113            0 :    logic [BYTE_WIDTH-1:0]  store_byteen_hi_r;
     114       697736 :    logic [BYTE_WIDTH-1:0]  store_byteen_lo_r;
     115              : 
     116       258732 :    logic                   WrPtrEn, RdPtrEn;
     117        85460 :    logic [DEPTH_LOG2-1:0]  WrPtr, RdPtr;
     118        85466 :    logic [DEPTH_LOG2-1:0]  NxtWrPtr, NxtRdPtr;
     119        85466 :    logic [DEPTH_LOG2-1:0]  WrPtrPlus1, WrPtrPlus2, RdPtrPlus1;
     120              : 
     121            0 :    logic                   dual_stbuf_write_r;
     122              : 
     123       258932 :    logic                   isdccmst_m, isdccmst_r;
     124            0 :    logic [3:0]             stbuf_numvld_any, stbuf_specvld_any;
     125            0 :    logic [1:0]             stbuf_specvld_m, stbuf_specvld_r;
     126              : 
     127       677364 :    logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m;
     128              : 
     129              :    // variables to detect matching from the store queue
     130         3558 :    logic [DEPTH-1:0]                 stbuf_match_hi, stbuf_match_lo;
     131         1010 :    logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo;
     132         4106 :    logic [DATA_WIDTH-1:0]            stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m;
     133         4078 :    logic [BYTE_WIDTH-1:0]            stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m;
     134              : 
     135              :    // logic to detect matching from the pipe - needed for store - load forwarding
     136            0 :    logic [BYTE_WIDTH-1:0]  ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
     137            0 :    logic                   ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
     138              : 
     139          124 :    logic [BYTE_WIDTH-1:0]  ld_byte_hit_lo, ld_byte_rhit_lo;
     140          122 :    logic [BYTE_WIDTH-1:0]  ld_byte_hit_hi, ld_byte_rhit_hi;
     141              : 
     142            0 :    logic [BYTE_WIDTH-1:0]  ldst_byteen_hi_r;
     143       647300 :    logic [BYTE_WIDTH-1:0]  ldst_byteen_lo_r;
     144              :    // byte_en flowing down
     145            0 :    logic [7:0]             ldst_byteen_r;
     146            0 :    logic [7:0]             ldst_byteen_ext_r;
     147              :    // fwd data through the pipe
     148          202 :    logic [31:0]       ld_fwddata_rpipe_lo;
     149          528 :    logic [31:0]       ld_fwddata_rpipe_hi;
     150              : 
     151              :    // coalescing signals
     152            0 :    logic [DEPTH-1:0]      store_matchvec_lo_r, store_matchvec_hi_r;
     153            0 :    logic                  store_coalesce_lo_r, store_coalesce_hi_r;
     154              : 
     155              :    //----------------------------------------
     156              :    // Logic starts here
     157              :    //----------------------------------------
     158              :    // Create high/low byte enables
     159              :    assign store_byteen_ext_r[7:0]           = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
     160              :    assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{lsu_pkt_r.store}};
     161              :    assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{lsu_pkt_r.store}};
     162              : 
     163              :    assign RdPtrPlus1[DEPTH_LOG2-1:0]     = RdPtr[DEPTH_LOG2-1:0] + 1'b1;
     164              :    assign WrPtrPlus1[DEPTH_LOG2-1:0]     = WrPtr[DEPTH_LOG2-1:0] + 1'b1;
     165              :    assign WrPtrPlus2[DEPTH_LOG2-1:0]     = WrPtr[DEPTH_LOG2-1:0] + 2'b10;
     166              : 
     167              :    // ecc error on both hi/lo
     168              :    assign dual_stbuf_write_r   = ldst_dual_r & store_stbuf_reqvld_r;
     169              :    assign ldst_stbuf_reqvld_r  = ((lsu_commit_r | lsu_pkt_r.dma) & store_stbuf_reqvld_r);
     170              : 
     171              :   // Store Buffer coalescing
     172              :    for (genvar i=0; i<DEPTH; i++) begin: FindMatchEntry
     173              :        assign store_matchvec_lo_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == lsu_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & ~stbuf_reset[i];
     174              :        assign store_matchvec_hi_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == end_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & dual_stbuf_write_r & ~stbuf_reset[i];
     175              :    end: FindMatchEntry
     176              : 
     177              :    assign store_coalesce_lo_r = |store_matchvec_lo_r[DEPTH-1:0];
     178              :    assign store_coalesce_hi_r = |store_matchvec_hi_r[DEPTH-1:0];
     179              : 
     180              : 
     181              :    if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
     182              :       // Allocate new in this entry if :
     183              :       // 1. wrptr, single allocate, lo did not coalesce
     184              :       // 2. wrptr, double allocate, lo ^ hi coalesced
     185              :       // 3. wrptr + 1, double alloacte, niether lo or hi coalesced
     186              :       // Also update if there is a hi or a lo coalesce to this entry
     187              :       // Store Buffer instantiation
     188              :       for (genvar i=0; i<DEPTH; i++) begin: GenStBuf
     189              :          assign stbuf_wr_en[i] = ldst_stbuf_reqvld_r & (
     190              :                                    ( (i == WrPtr[DEPTH_LOG2-1:0])      &  ~store_coalesce_lo_r)   |                                                    // Allocate : new Lo
     191              :                                    ( (i == WrPtr[DEPTH_LOG2-1:0])      &  dual_stbuf_write_r & ~store_coalesce_hi_r) |                               // Allocate : only 1 new Write Either
     192              :                                    ( (i == WrPtrPlus1[DEPTH_LOG2-1:0]) &  dual_stbuf_write_r & ~(store_coalesce_lo_r | store_coalesce_hi_r)) |     // Allocate2 : 2 new so Write Hi
     193              :                                    store_matchvec_lo_r[i] | store_matchvec_hi_r[i]);                                                                 // Coalesced Write Lo or Hi
     194              :          assign stbuf_reset[i] = (lsu_stbuf_commit_any | stbuf_reqvld_flushed_any) & (i == RdPtr[DEPTH_LOG2-1:0]);
     195              : 
     196              :          // Mux select for start/end address
     197              :          assign sel_lo[i]                         = ((~ldst_dual_r | store_stbuf_reqvld_r) & (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) |   // lo allocated new entry
     198              :                                                     store_matchvec_lo_r[i];                                                                                                           // lo coalesced in to this entry
     199              :          assign stbuf_addrin[i][pt.LSU_SB_BITS-1:0]  = sel_lo[i] ? lsu_addr_r[pt.LSU_SB_BITS-1:0]       : end_addr_r[pt.LSU_SB_BITS-1:0];
     200              :          assign stbuf_byteenin[i][BYTE_WIDTH-1:0] = sel_lo[i] ? (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_lo_r[BYTE_WIDTH-1:0])          : (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_hi_r[BYTE_WIDTH-1:0]);
     201              :          assign stbuf_datain[i][7:0]              = sel_lo[i] ? ((~stbuf_byteen[i][0] | store_byteen_lo_r[0]) ? store_datafn_lo_r[7:0]   : stbuf_data[i][7:0])    :
     202              :                                                                 ((~stbuf_byteen[i][0] | store_byteen_hi_r[0]) ? store_datafn_hi_r[7:0]   : stbuf_data[i][7:0]);
     203              :          assign stbuf_datain[i][15:8]             = sel_lo[i] ? ((~stbuf_byteen[i][1] | store_byteen_lo_r[1]) ? store_datafn_lo_r[15:8]  : stbuf_data[i][15:8])    :
     204              :                                                                 ((~stbuf_byteen[i][1] | store_byteen_hi_r[1]) ? store_datafn_hi_r[15:8]  : stbuf_data[i][15:8]);
     205              :          assign stbuf_datain[i][23:16]            = sel_lo[i] ? ((~stbuf_byteen[i][2] | store_byteen_lo_r[2]) ? store_datafn_lo_r[23:16] : stbuf_data[i][23:16])    :
     206              :                                                                 ((~stbuf_byteen[i][2] | store_byteen_hi_r[2]) ? store_datafn_hi_r[23:16] : stbuf_data[i][23:16]);
     207              :          assign stbuf_datain[i][31:24]            = sel_lo[i] ? ((~stbuf_byteen[i][3] | store_byteen_lo_r[3]) ? store_datafn_lo_r[31:24] : stbuf_data[i][31:24])    :
     208              :                                                                 ((~stbuf_byteen[i][3] | store_byteen_hi_r[3]) ? store_datafn_hi_r[31:24] : stbuf_data[i][31:24]);
     209              : 
     210              :          rvdffsc #(.WIDTH(1))              stbuf_vldff         (.din(1'b1),                                .dout(stbuf_vld[i]),                      .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
     211              :          rvdffsc #(.WIDTH(1))              stbuf_killff        (.din(1'b1),                                .dout(stbuf_dma_kill[i]),                 .en(stbuf_dma_kill_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
     212              :          rvdffe  #(.WIDTH(pt.LSU_SB_BITS)) stbuf_addrff        (.din(stbuf_addrin[i][pt.LSU_SB_BITS-1:0]), .dout(stbuf_addr[i][pt.LSU_SB_BITS-1:0]), .en(stbuf_wr_en[i]), .*);
     213              :          rvdffsc #(.WIDTH(BYTE_WIDTH))     stbuf_byteenff      (.din(stbuf_byteenin[i][BYTE_WIDTH-1:0]),   .dout(stbuf_byteen[i][BYTE_WIDTH-1:0]),   .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_stbuf_c1_clk), .*);
     214              :          rvdffe  #(.WIDTH(DATA_WIDTH))     stbuf_dataff        (.din(stbuf_datain[i][DATA_WIDTH-1:0]),     .dout(stbuf_data[i][DATA_WIDTH-1:0]),     .en(stbuf_wr_en[i]), .*);
     215              :       end
     216              :    end else begin: Gen_dccm_disable
     217              :       assign stbuf_wr_en[DEPTH-1:0] = '0;
     218              :       assign stbuf_reset[DEPTH-1:0] = '0;
     219              :       assign stbuf_vld[DEPTH-1:0]   = '0;
     220              :       assign stbuf_dma_kill[DEPTH-1:0] = '0;
     221              :       assign stbuf_addr[DEPTH-1:0]  = '0;
     222              :       assign stbuf_byteen[DEPTH-1:0] = '0;
     223              :       assign stbuf_data[DEPTH-1:0]   = '0;
     224              :    end
     225              : 
     226              :    // Store Buffer drain logic
     227              :    assign stbuf_reqvld_flushed_any            = stbuf_vld[RdPtr] & stbuf_dma_kill[RdPtr];
     228              :    assign stbuf_reqvld_any                    = stbuf_vld[RdPtr] & ~stbuf_dma_kill[RdPtr] & ~(|stbuf_dma_kill_en[DEPTH-1:0]);  // Don't drain if some kill bit is being set this cycle
     229              :    assign stbuf_addr_any[pt.LSU_SB_BITS-1:0]  = stbuf_addr[RdPtr][pt.LSU_SB_BITS-1:0];
     230              :    assign stbuf_data_any[DATA_WIDTH-1:0]      = stbuf_data[RdPtr][DATA_WIDTH-1:0];
     231              : 
     232              :    // Update the RdPtr/WrPtr logic
     233              :    // Need to revert the WrPtr for flush cases. Also revert the pipe WrPtrs
     234              :    assign WrPtrEn                  = (ldst_stbuf_reqvld_r  & ~dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r))  |  // writing 1 and did not coalesce
     235              :                                      (ldst_stbuf_reqvld_r  &  dual_stbuf_write_r & ~(store_coalesce_hi_r & store_coalesce_lo_r));    // writing 2 and atleast 1 did not coalesce
     236              :    assign NxtWrPtr[DEPTH_LOG2-1:0] = (ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) ? WrPtrPlus2[DEPTH_LOG2-1:0] : WrPtrPlus1[DEPTH_LOG2-1:0];
     237              :    assign RdPtrEn                  = lsu_stbuf_commit_any | stbuf_reqvld_flushed_any;
     238              :    assign NxtRdPtr[DEPTH_LOG2-1:0] = RdPtrPlus1[DEPTH_LOG2-1:0];
     239              : 
     240          339 :    always_comb begin
     241          339 :       stbuf_numvld_any[3:0] = '0;
     242          339 :       for (int i=0; i<DEPTH; i++) begin
     243         1356 :          stbuf_numvld_any[3:0] += {3'b0, stbuf_vld[i]};
     244              :       end
     245              :    end
     246              : 
     247              :     // These go to store buffer to detect full
     248              :    assign isdccmst_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_in_dccm_m & ~lsu_pkt_m.dma;
     249              :    assign isdccmst_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~lsu_pkt_r.dma;
     250              : 
     251              :    assign stbuf_specvld_m[1:0] = {1'b0,isdccmst_m} << (isdccmst_m & ldst_dual_m);
     252              :    assign stbuf_specvld_r[1:0] = {1'b0,isdccmst_r} << (isdccmst_r & ldst_dual_r);
     253              :    assign stbuf_specvld_any[3:0] = stbuf_numvld_any[3:0] +  {2'b0, stbuf_specvld_m[1:0]} + {2'b0, stbuf_specvld_r[1:0]};
     254              : 
     255              :    assign lsu_stbuf_full_any  = (~ldst_dual_d & dec_lsu_valid_raw_d) ? (stbuf_specvld_any[3:0] >= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1));
     256              :    assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0);
     257              : 
     258              :    // Load forwarding logic from the store queue
     259              :    assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = end_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
     260              : 
     261              :    assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
     262              : 
     263          339 :    always_comb begin: GenLdFwd
     264          339 :       stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0]   = '0;
     265          339 :       stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0]   = '0;
     266              : 
     267          339 :       for (int i=0; i<DEPTH; i++) begin
     268         1356 :          stbuf_match_hi[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
     269         1356 :          stbuf_match_lo[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
     270              : 
     271              :          // Kill the store buffer entry if there is a dma store since it already updated the dccm
     272         1356 :          stbuf_dma_kill_en[i] = (stbuf_match_hi[i] | stbuf_match_lo[i]) & lsu_pkt_m.valid & lsu_pkt_m.dma & lsu_pkt_m.store;
     273              : 
     274         1356 :          for (int j=0; j<BYTE_WIDTH; j++) begin
     275         5424 :             stbuf_fwdbyteenvec_hi[i][j] = stbuf_match_hi[i] & stbuf_byteen[i][j] & stbuf_vld[i];
     276         5424 :             stbuf_fwdbyteen_hi_pre_m[j]  |= stbuf_fwdbyteenvec_hi[i][j];
     277              : 
     278         5424 :             stbuf_fwdbyteenvec_lo[i][j] = stbuf_match_lo[i] & stbuf_byteen[i][j] & stbuf_vld[i];
     279         5424 :             stbuf_fwdbyteen_lo_pre_m[j]  |= stbuf_fwdbyteenvec_lo[i][j];
     280              :          end
     281              :       end
     282              :    end // block: GenLdFwd
     283              : 
     284          339 :    always_comb begin: GenLdData
     285          339 :       stbuf_fwddata_hi_pre_m[31:0]   = '0;
     286          339 :       stbuf_fwddata_lo_pre_m[31:0]   = '0;
     287              : 
     288          339 :       for (int i=0; i<DEPTH; i++) begin
     289         1356 :          stbuf_fwddata_hi_pre_m[31:0] |= {32{stbuf_match_hi[i]}} & stbuf_data[i][31:0];
     290         1356 :          stbuf_fwddata_lo_pre_m[31:0] |= {32{stbuf_match_lo[i]}} & stbuf_data[i][31:0];
     291              : 
     292              :       end
     293              : 
     294              :    end // block: GenLdData
     295              : 
     296              :    // Create Hi/Lo signals - needed for the pipe forwarding
     297              :    assign ldst_byteen_r[7:0] =  ({8{lsu_pkt_r.by}}    & 8'b0000_0001) |
     298              :                                  ({8{lsu_pkt_r.half}}  & 8'b0000_0011) |
     299              :                                  ({8{lsu_pkt_r.word}}  & 8'b0000_1111) |
     300              :                                  ({8{lsu_pkt_r.dword}} & 8'b1111_1111);
     301              : 
     302              :    assign ldst_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
     303              : 
     304              :    assign ldst_byteen_hi_r[3:0]   = ldst_byteen_ext_r[7:4];
     305              :    assign ldst_byteen_lo_r[3:0]   = ldst_byteen_ext_r[3:0];
     306              : 
     307              :    assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma;
     308              :    assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma;
     309              :    assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r;
     310              :    assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r;
     311              : 
     312              :    for (genvar i=0; i<BYTE_WIDTH; i++) begin
     313              :       assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i];
     314              :       assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i];
     315              :       assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i];
     316              :       assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i];
     317              : 
     318              :       assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
     319              :       assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
     320              : 
     321              :        assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
     322              :                                                      ({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
     323              : 
     324              :        assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
     325              :                                                      ({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
     326              : 
     327              :       assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
     328              :       assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
     329              : 
     330              :       assign stbuf_fwdbyteen_hi_m[i] = ld_byte_hit_hi[i] | stbuf_fwdbyteen_hi_pre_m[i];
     331              :       assign stbuf_fwdbyteen_lo_m[i] = ld_byte_hit_lo[i] | stbuf_fwdbyteen_lo_pre_m[i];
     332              :       // // Pipe vs Store Queue priority
     333              :       assign stbuf_fwddata_lo_m[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i]    ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : stbuf_fwddata_lo_pre_m[(8*i)+7:(8*i)];
     334              :       // // Pipe vs Store Queue priority
     335              :       assign stbuf_fwddata_hi_m[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i]    ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : stbuf_fwddata_hi_pre_m[(8*i)+7:(8*i)];
     336              :    end
     337              : 
     338              :    // Flops
     339              :    rvdffs #(.WIDTH(DEPTH_LOG2)) WrPtrff (.din(NxtWrPtr[DEPTH_LOG2-1:0]), .dout(WrPtr[DEPTH_LOG2-1:0]), .en(WrPtrEn), .clk(lsu_stbuf_c1_clk), .*);
     340              :    rvdffs #(.WIDTH(DEPTH_LOG2)) RdPtrff (.din(NxtRdPtr[DEPTH_LOG2-1:0]), .dout(RdPtr[DEPTH_LOG2-1:0]), .en(RdPtrEn), .clk(lsu_stbuf_c1_clk), .*);
     341              : 
     342              : `ifdef RV_ASSERT_ON
     343              : 
     344              :    assert_stbuf_overflow: assert #0 (stbuf_specvld_any[2:0] <= DEPTH);
     345              :    property stbuf_wren_store_dccm;
     346              :       @(posedge clk)  disable iff(~rst_l) (|stbuf_wr_en[DEPTH-1:0]) |-> (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r);
     347              :    endproperty
     348              :    assert_stbuf_wren_store_dccm: assert property (stbuf_wren_store_dccm) else
     349              :       $display("Illegal store buffer write");
     350              : 
     351              : `endif
     352              : 
     353              : endmodule
     354              :