Project Full coverage report
Current view: Cores-VeeR-EL2—Cores-VeeR-EL2—design—lsu—el2_lsu_stbuf.sv Coverage Hit Total
Test Date: 19-09-2024 Toggle 78.9% 60 76
Test: all Branch 100.0% 22 22

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : // Copyright 2020 Western Digital Corporation or its affiliates.
       3              : //
       4              : // Licensed under the Apache License, Version 2.0 (the "License");
       5              : // you may not use this file except in compliance with the License.
       6              : // You may obtain a copy of the License at
       7              : //
       8              : // http://www.apache.org/licenses/LICENSE-2.0
       9              : //
      10              : // Unless required by applicable law or agreed to in writing, software
      11              : // distributed under the License is distributed on an "AS IS" BASIS,
      12              : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      13              : // See the License for the specific language governing permissions and
      14              : // limitations under the License.
      15              : 
      16              : //********************************************************************************
      17              : // $Id$
      18              : //
      19              : //
      20              : // Owner:
      21              : // Function: Store Buffer
      22              : // Comments: Dual writes and single drain
      23              : //
      24              : //
      25              : // DC1 -> DC2 -> DC3 -> DC4 (Commit)
      26              : //
      27              : // //********************************************************************************
      28              : 
      29              : 
      30              : module el2_lsu_stbuf
      31              : import el2_pkg::*;
      32              : #(
      33              : `include "el2_param.vh"
      34              :  )
      35              : (
      36     61843746 :    input logic                           clk,                         // core clock
      37          316 :    input logic                           rst_l,                       // reset
      38              : 
      39     61843746 :    input logic                           lsu_stbuf_c1_clk,            // stbuf clock
      40     61843746 :    input logic                           lsu_free_c2_clk,             // free clk
      41              : 
      42              :    // Store Buffer input
      43       258930 :    input logic                           store_stbuf_reqvld_r,        // core instruction goes to stbuf
      44      2279496 :    input logic                           lsu_commit_r,                // lsu commits
      45      2276073 :    input logic                           dec_lsu_valid_raw_d,         // Speculative decode valid
      46         1716 :    input logic [pt.DCCM_DATA_WIDTH-1:0]  store_data_hi_r,             // merged data from the dccm for stores. This is used for fwding
      47        92644 :    input logic [pt.DCCM_DATA_WIDTH-1:0]  store_data_lo_r,             // merged data from the dccm for stores. This is used for fwding
      48         1920 :    input logic [pt.DCCM_DATA_WIDTH-1:0]  store_datafn_hi_r,           // merged data from the dccm for stores
      49        92644 :    input logic [pt.DCCM_DATA_WIDTH-1:0]  store_datafn_lo_r,           // merged data from the dccm for stores
      50              : 
      51              :    // Store Buffer output
      52       258446 :    output logic                          stbuf_reqvld_any,            // stbuf is draining
      53            0 :    output logic                          stbuf_reqvld_flushed_any,    // Top entry is flushed
      54        18811 :    output logic [pt.LSU_SB_BITS-1:0]     stbuf_addr_any,              // address
      55         7574 :    output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_data_any,              // stbuf data
      56              : 
      57       262888 :    input  logic                          lsu_stbuf_commit_any,        // pop the stbuf as it commite
      58        10324 :    output logic                          lsu_stbuf_full_any,          // stbuf is full
      59       258763 :    output logic                          lsu_stbuf_empty_any,         // stbuf is empty
      60       258930 :    output logic                          ldst_stbuf_reqvld_r,         // needed for clocking
      61              : 
      62       471780 :    input logic [pt.LSU_SB_BITS-1:0]      lsu_addr_d,                  // lsu address D-stage
      63       594110 :    input logic [31:0]                    lsu_addr_m,                  // lsu address M-stage
      64       594109 :    input logic [31:0]                    lsu_addr_r,                  // lsu address R-stage
      65              : 
      66       678187 :    input logic [pt.LSU_SB_BITS-1:0]      end_addr_d,                  // lsu end address D-stage - needed to check unaligned
      67       594510 :    input logic [31:0]                    end_addr_m,                  // lsu end address M-stage - needed to check unaligned
      68       594509 :    input logic [31:0]                    end_addr_r,                  // lsu end address R-stage - needed to check unaligned
      69              : 
      70        36568 :    input logic                           ldst_dual_d, ldst_dual_m, ldst_dual_r,
      71       614420 :    input logic                           addr_in_dccm_m,              // address is in dccm
      72       614420 :    input logic                           addr_in_dccm_r,              // address is in dccm
      73              : 
      74              :    // Forwarding signals
      75       614428 :    input logic                           lsu_cmpen_m,                 // needed for forwarding stbuf - load
      76       478184 :    input el2_lsu_pkt_t                  lsu_pkt_m,                   // LSU packet M-stage
      77       478181 :    input el2_lsu_pkt_t                  lsu_pkt_r,                   // LSU packet R-stage
      78              : 
      79         5218 :    output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_hi_m,          // stbuf data
      80         4892 :    output logic [pt.DCCM_DATA_WIDTH-1:0] stbuf_fwddata_lo_m,          // stbuf data
      81         4198 :    output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_hi_m,        // stbuf data
      82         4202 :    output logic [pt.DCCM_BYTE_WIDTH-1:0] stbuf_fwdbyteen_lo_m,        // stbuf data
      83              : 
      84            0 :    input  logic       scan_mode                                       // Scan mode
      85              : 
      86              : );
      87              : 
      88              : 
      89              :    localparam DEPTH      = pt.LSU_STBUF_DEPTH;
      90              :    localparam DATA_WIDTH = pt.DCCM_DATA_WIDTH;
      91              :    localparam BYTE_WIDTH = pt.DCCM_BYTE_WIDTH;
      92              :    localparam DEPTH_LOG2 = $clog2(DEPTH);
      93              : 
      94              :    // These are the fields in the store queue
      95        85454 :    logic [DEPTH-1:0]                     stbuf_vld;
      96            0 :    logic [DEPTH-1:0]                     stbuf_dma_kill;
      97         1563 :    logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addr;
      98        72904 :    logic [DEPTH-1:0][BYTE_WIDTH-1:0]     stbuf_byteen;
      99         2718 :    logic [DEPTH-1:0][DATA_WIDTH-1:0]     stbuf_data;
     100              : 
     101        85499 :    logic [DEPTH-1:0]                     sel_lo;
     102        85454 :    logic [DEPTH-1:0]                     stbuf_wr_en;
     103            0 :    logic [DEPTH-1:0]                     stbuf_dma_kill_en;
     104        85454 :    logic [DEPTH-1:0]                     stbuf_reset;
     105       655432 :    logic [DEPTH-1:0][pt.LSU_SB_BITS-1:0] stbuf_addrin;
     106        15488 :    logic [DEPTH-1:0][DATA_WIDTH-1:0]     stbuf_datain;
     107        73705 :    logic [DEPTH-1:0][BYTE_WIDTH-1:0]     stbuf_byteenin;
     108              : 
     109            0 :    logic [7:0]             store_byteen_ext_r;
     110            0 :    logic [BYTE_WIDTH-1:0]  store_byteen_hi_r;
     111       697750 :    logic [BYTE_WIDTH-1:0]  store_byteen_lo_r;
     112              : 
     113       258730 :    logic                   WrPtrEn, RdPtrEn;
     114        85460 :    logic [DEPTH_LOG2-1:0]  WrPtr, RdPtr;
     115        85465 :    logic [DEPTH_LOG2-1:0]  NxtWrPtr, NxtRdPtr;
     116        85465 :    logic [DEPTH_LOG2-1:0]  WrPtrPlus1, WrPtrPlus2, RdPtrPlus1;
     117              : 
     118            0 :    logic                   dual_stbuf_write_r;
     119              : 
     120       258930 :    logic                   isdccmst_m, isdccmst_r;
     121            0 :    logic [3:0]             stbuf_numvld_any, stbuf_specvld_any;
     122            0 :    logic [1:0]             stbuf_specvld_m, stbuf_specvld_r;
     123              : 
     124       678188 :    logic [pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] cmpaddr_hi_m, cmpaddr_lo_m;
     125              : 
     126              :    // variables to detect matching from the store queue
     127         3558 :    logic [DEPTH-1:0]                 stbuf_match_hi, stbuf_match_lo;
     128         1010 :    logic [DEPTH-1:0][BYTE_WIDTH-1:0] stbuf_fwdbyteenvec_hi, stbuf_fwdbyteenvec_lo;
     129         4106 :    logic [DATA_WIDTH-1:0]            stbuf_fwddata_hi_pre_m, stbuf_fwddata_lo_pre_m;
     130         4078 :    logic [BYTE_WIDTH-1:0]            stbuf_fwdbyteen_hi_pre_m, stbuf_fwdbyteen_lo_pre_m;
     131              : 
     132              :    // logic to detect matching from the pipe - needed for store - load forwarding
     133            0 :    logic [BYTE_WIDTH-1:0]  ld_byte_rhit_lo_lo, ld_byte_rhit_hi_lo, ld_byte_rhit_lo_hi, ld_byte_rhit_hi_hi;
     134            0 :    logic                   ld_addr_rhit_lo_lo, ld_addr_rhit_hi_lo, ld_addr_rhit_lo_hi, ld_addr_rhit_hi_hi;
     135              : 
     136          124 :    logic [BYTE_WIDTH-1:0]  ld_byte_hit_lo, ld_byte_rhit_lo;
     137          120 :    logic [BYTE_WIDTH-1:0]  ld_byte_hit_hi, ld_byte_rhit_hi;
     138              : 
     139            0 :    logic [BYTE_WIDTH-1:0]  ldst_byteen_hi_r;
     140       648029 :    logic [BYTE_WIDTH-1:0]  ldst_byteen_lo_r;
     141              :    // byte_en flowing down
     142            0 :    logic [7:0]             ldst_byteen_r;
     143            0 :    logic [7:0]             ldst_byteen_ext_r;
     144              :    // fwd data through the pipe
     145          202 :    logic [31:0]       ld_fwddata_rpipe_lo;
     146          528 :    logic [31:0]       ld_fwddata_rpipe_hi;
     147              : 
     148              :    // coalescing signals
     149            0 :    logic [DEPTH-1:0]      store_matchvec_lo_r, store_matchvec_hi_r;
     150            0 :    logic                  store_coalesce_lo_r, store_coalesce_hi_r;
     151              : 
     152              :    //----------------------------------------
     153              :    // Logic starts here
     154              :    //----------------------------------------
     155              :    // Create high/low byte enables
     156              :    assign store_byteen_ext_r[7:0]           = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
     157              :    assign store_byteen_hi_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[7:4] & {4{lsu_pkt_r.store}};
     158              :    assign store_byteen_lo_r[BYTE_WIDTH-1:0] = store_byteen_ext_r[3:0] & {4{lsu_pkt_r.store}};
     159              : 
     160              :    assign RdPtrPlus1[DEPTH_LOG2-1:0]     = RdPtr[DEPTH_LOG2-1:0] + 1'b1;
     161              :    assign WrPtrPlus1[DEPTH_LOG2-1:0]     = WrPtr[DEPTH_LOG2-1:0] + 1'b1;
     162              :    assign WrPtrPlus2[DEPTH_LOG2-1:0]     = WrPtr[DEPTH_LOG2-1:0] + 2'b10;
     163              : 
     164              :    // ecc error on both hi/lo
     165              :    assign dual_stbuf_write_r   = ldst_dual_r & store_stbuf_reqvld_r;
     166              :    assign ldst_stbuf_reqvld_r  = ((lsu_commit_r | lsu_pkt_r.dma) & store_stbuf_reqvld_r);
     167              : 
     168              :   // Store Buffer coalescing
     169              :    for (genvar i=0; i<DEPTH; i++) begin: FindMatchEntry
     170              :        assign store_matchvec_lo_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == lsu_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & ~stbuf_reset[i];
     171              :        assign store_matchvec_hi_r[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == end_addr_r[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & dual_stbuf_write_r & ~stbuf_reset[i];
     172              :    end: FindMatchEntry
     173              : 
     174              :    assign store_coalesce_lo_r = |store_matchvec_lo_r[DEPTH-1:0];
     175              :    assign store_coalesce_hi_r = |store_matchvec_hi_r[DEPTH-1:0];
     176              : 
     177              : 
     178              :    if (pt.DCCM_ENABLE == 1) begin: Gen_dccm_enable
     179              :       // Allocate new in this entry if :
     180              :       // 1. wrptr, single allocate, lo did not coalesce
     181              :       // 2. wrptr, double allocate, lo ^ hi coalesced
     182              :       // 3. wrptr + 1, double alloacte, niether lo or hi coalesced
     183              :       // Also update if there is a hi or a lo coalesce to this entry
     184              :       // Store Buffer instantiation
     185              :       for (genvar i=0; i<DEPTH; i++) begin: GenStBuf
     186              :          assign stbuf_wr_en[i] = ldst_stbuf_reqvld_r & (
     187              :                                    ( (i == WrPtr[DEPTH_LOG2-1:0])      &  ~store_coalesce_lo_r)   |                                                    // Allocate : new Lo
     188              :                                    ( (i == WrPtr[DEPTH_LOG2-1:0])      &  dual_stbuf_write_r & ~store_coalesce_hi_r) |                               // Allocate : only 1 new Write Either
     189              :                                    ( (i == WrPtrPlus1[DEPTH_LOG2-1:0]) &  dual_stbuf_write_r & ~(store_coalesce_lo_r | store_coalesce_hi_r)) |     // Allocate2 : 2 new so Write Hi
     190              :                                    store_matchvec_lo_r[i] | store_matchvec_hi_r[i]);                                                                 // Coalesced Write Lo or Hi
     191              :          assign stbuf_reset[i] = (lsu_stbuf_commit_any | stbuf_reqvld_flushed_any) & (i == RdPtr[DEPTH_LOG2-1:0]);
     192              : 
     193              :          // Mux select for start/end address
     194              :          assign sel_lo[i]                         = ((~ldst_dual_r | store_stbuf_reqvld_r) & (i == WrPtr[DEPTH_LOG2-1:0]) & ~store_coalesce_lo_r) |   // lo allocated new entry
     195              :                                                     store_matchvec_lo_r[i];                                                                                                           // lo coalesced in to this entry
     196              :          assign stbuf_addrin[i][pt.LSU_SB_BITS-1:0]  = sel_lo[i] ? lsu_addr_r[pt.LSU_SB_BITS-1:0]       : end_addr_r[pt.LSU_SB_BITS-1:0];
     197              :          assign stbuf_byteenin[i][BYTE_WIDTH-1:0] = sel_lo[i] ? (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_lo_r[BYTE_WIDTH-1:0])          : (stbuf_byteen[i][BYTE_WIDTH-1:0] | store_byteen_hi_r[BYTE_WIDTH-1:0]);
     198              :          assign stbuf_datain[i][7:0]              = sel_lo[i] ? ((~stbuf_byteen[i][0] | store_byteen_lo_r[0]) ? store_datafn_lo_r[7:0]   : stbuf_data[i][7:0])    :
     199              :                                                                 ((~stbuf_byteen[i][0] | store_byteen_hi_r[0]) ? store_datafn_hi_r[7:0]   : stbuf_data[i][7:0]);
     200              :          assign stbuf_datain[i][15:8]             = sel_lo[i] ? ((~stbuf_byteen[i][1] | store_byteen_lo_r[1]) ? store_datafn_lo_r[15:8]  : stbuf_data[i][15:8])    :
     201              :                                                                 ((~stbuf_byteen[i][1] | store_byteen_hi_r[1]) ? store_datafn_hi_r[15:8]  : stbuf_data[i][15:8]);
     202              :          assign stbuf_datain[i][23:16]            = sel_lo[i] ? ((~stbuf_byteen[i][2] | store_byteen_lo_r[2]) ? store_datafn_lo_r[23:16] : stbuf_data[i][23:16])    :
     203              :                                                                 ((~stbuf_byteen[i][2] | store_byteen_hi_r[2]) ? store_datafn_hi_r[23:16] : stbuf_data[i][23:16]);
     204              :          assign stbuf_datain[i][31:24]            = sel_lo[i] ? ((~stbuf_byteen[i][3] | store_byteen_lo_r[3]) ? store_datafn_lo_r[31:24] : stbuf_data[i][31:24])    :
     205              :                                                                 ((~stbuf_byteen[i][3] | store_byteen_hi_r[3]) ? store_datafn_hi_r[31:24] : stbuf_data[i][31:24]);
     206              : 
     207              :          rvdffsc #(.WIDTH(1))              stbuf_vldff         (.din(1'b1),                                .dout(stbuf_vld[i]),                      .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
     208              :          rvdffsc #(.WIDTH(1))              stbuf_killff        (.din(1'b1),                                .dout(stbuf_dma_kill[i]),                 .en(stbuf_dma_kill_en[i]), .clear(stbuf_reset[i]), .clk(lsu_free_c2_clk), .*);
     209              :          rvdffe  #(.WIDTH(pt.LSU_SB_BITS)) stbuf_addrff        (.din(stbuf_addrin[i][pt.LSU_SB_BITS-1:0]), .dout(stbuf_addr[i][pt.LSU_SB_BITS-1:0]), .en(stbuf_wr_en[i]), .*);
     210              :          rvdffsc #(.WIDTH(BYTE_WIDTH))     stbuf_byteenff      (.din(stbuf_byteenin[i][BYTE_WIDTH-1:0]),   .dout(stbuf_byteen[i][BYTE_WIDTH-1:0]),   .en(stbuf_wr_en[i]), .clear(stbuf_reset[i]), .clk(lsu_stbuf_c1_clk), .*);
     211              :          rvdffe  #(.WIDTH(DATA_WIDTH))     stbuf_dataff        (.din(stbuf_datain[i][DATA_WIDTH-1:0]),     .dout(stbuf_data[i][DATA_WIDTH-1:0]),     .en(stbuf_wr_en[i]), .*);
     212              :       end
     213              :    end else begin: Gen_dccm_disable
     214              :       assign stbuf_wr_en[DEPTH-1:0] = '0;
     215              :       assign stbuf_reset[DEPTH-1:0] = '0;
     216              :       assign stbuf_vld[DEPTH-1:0]   = '0;
     217              :       assign stbuf_dma_kill[DEPTH-1:0] = '0;
     218              :       assign stbuf_addr[DEPTH-1:0]  = '0;
     219              :       assign stbuf_byteen[DEPTH-1:0] = '0;
     220              :       assign stbuf_data[DEPTH-1:0]   = '0;
     221              :    end
     222              : 
     223              :    // Store Buffer drain logic
     224              :    assign stbuf_reqvld_flushed_any            = stbuf_vld[RdPtr] & stbuf_dma_kill[RdPtr];
     225              :    assign stbuf_reqvld_any                    = stbuf_vld[RdPtr] & ~stbuf_dma_kill[RdPtr] & ~(|stbuf_dma_kill_en[DEPTH-1:0]);  // Don't drain if some kill bit is being set this cycle
     226              :    assign stbuf_addr_any[pt.LSU_SB_BITS-1:0]  = stbuf_addr[RdPtr][pt.LSU_SB_BITS-1:0];
     227              :    assign stbuf_data_any[DATA_WIDTH-1:0]      = stbuf_data[RdPtr][DATA_WIDTH-1:0];
     228              : 
     229              :    // Update the RdPtr/WrPtr logic
     230              :    // Need to revert the WrPtr for flush cases. Also revert the pipe WrPtrs
     231              :    assign WrPtrEn                  = (ldst_stbuf_reqvld_r  & ~dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r))  |  // writing 1 and did not coalesce
     232              :                                      (ldst_stbuf_reqvld_r  &  dual_stbuf_write_r & ~(store_coalesce_hi_r & store_coalesce_lo_r));    // writing 2 and atleast 1 did not coalesce
     233              :    assign NxtWrPtr[DEPTH_LOG2-1:0] = (ldst_stbuf_reqvld_r & dual_stbuf_write_r & ~(store_coalesce_hi_r | store_coalesce_lo_r)) ? WrPtrPlus2[DEPTH_LOG2-1:0] : WrPtrPlus1[DEPTH_LOG2-1:0];
     234              :    assign RdPtrEn                  = lsu_stbuf_commit_any | stbuf_reqvld_flushed_any;
     235              :    assign NxtRdPtr[DEPTH_LOG2-1:0] = RdPtrPlus1[DEPTH_LOG2-1:0];
     236              : 
     237          317 :    always_comb begin
     238          317 :       stbuf_numvld_any[3:0] = '0;
     239          317 :       for (int i=0; i<DEPTH; i++) begin
     240         1268 :          stbuf_numvld_any[3:0] += {3'b0, stbuf_vld[i]};
     241              :       end
     242              :    end
     243              : 
     244              :     // These go to store buffer to detect full
     245              :    assign isdccmst_m = lsu_pkt_m.valid & lsu_pkt_m.store & addr_in_dccm_m & ~lsu_pkt_m.dma;
     246              :    assign isdccmst_r = lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r & ~lsu_pkt_r.dma;
     247              : 
     248              :    assign stbuf_specvld_m[1:0] = {1'b0,isdccmst_m} << (isdccmst_m & ldst_dual_m);
     249              :    assign stbuf_specvld_r[1:0] = {1'b0,isdccmst_r} << (isdccmst_r & ldst_dual_r);
     250              :    assign stbuf_specvld_any[3:0] = stbuf_numvld_any[3:0] +  {2'b0, stbuf_specvld_m[1:0]} + {2'b0, stbuf_specvld_r[1:0]};
     251              : 
     252              :    assign lsu_stbuf_full_any  = (~ldst_dual_d & dec_lsu_valid_raw_d) ? (stbuf_specvld_any[3:0] >= DEPTH) : (stbuf_specvld_any[3:0] >= (DEPTH-1));
     253              :    assign lsu_stbuf_empty_any = (stbuf_numvld_any[3:0] == 4'b0);
     254              : 
     255              :    // Load forwarding logic from the store queue
     256              :    assign cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = end_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
     257              : 
     258              :    assign cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] = lsu_addr_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)];
     259              : 
     260          317 :    always_comb begin: GenLdFwd
     261          317 :       stbuf_fwdbyteen_hi_pre_m[BYTE_WIDTH-1:0]   = '0;
     262          317 :       stbuf_fwdbyteen_lo_pre_m[BYTE_WIDTH-1:0]   = '0;
     263              : 
     264          317 :       for (int i=0; i<DEPTH; i++) begin
     265         1268 :          stbuf_match_hi[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_hi_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
     266         1268 :          stbuf_match_lo[i] = (stbuf_addr[i][pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)] == cmpaddr_lo_m[pt.LSU_SB_BITS-1:$clog2(BYTE_WIDTH)]) & stbuf_vld[i] & ~stbuf_dma_kill[i] & addr_in_dccm_m;
     267              : 
     268              :          // Kill the store buffer entry if there is a dma store since it already updated the dccm
     269         1268 :          stbuf_dma_kill_en[i] = (stbuf_match_hi[i] | stbuf_match_lo[i]) & lsu_pkt_m.valid & lsu_pkt_m.dma & lsu_pkt_m.store;
     270              : 
     271         1268 :          for (int j=0; j<BYTE_WIDTH; j++) begin
     272         5072 :             stbuf_fwdbyteenvec_hi[i][j] = stbuf_match_hi[i] & stbuf_byteen[i][j] & stbuf_vld[i];
     273         5072 :             stbuf_fwdbyteen_hi_pre_m[j]  |= stbuf_fwdbyteenvec_hi[i][j];
     274              : 
     275         5072 :             stbuf_fwdbyteenvec_lo[i][j] = stbuf_match_lo[i] & stbuf_byteen[i][j] & stbuf_vld[i];
     276         5072 :             stbuf_fwdbyteen_lo_pre_m[j]  |= stbuf_fwdbyteenvec_lo[i][j];
     277              :          end
     278              :       end
     279              :    end // block: GenLdFwd
     280              : 
     281          317 :    always_comb begin: GenLdData
     282          317 :       stbuf_fwddata_hi_pre_m[31:0]   = '0;
     283          317 :       stbuf_fwddata_lo_pre_m[31:0]   = '0;
     284              : 
     285          317 :       for (int i=0; i<DEPTH; i++) begin
     286         1268 :          stbuf_fwddata_hi_pre_m[31:0] |= {32{stbuf_match_hi[i]}} & stbuf_data[i][31:0];
     287         1268 :          stbuf_fwddata_lo_pre_m[31:0] |= {32{stbuf_match_lo[i]}} & stbuf_data[i][31:0];
     288              : 
     289              :       end
     290              : 
     291              :    end // block: GenLdData
     292              : 
     293              :    // Create Hi/Lo signals - needed for the pipe forwarding
     294              :    assign ldst_byteen_r[7:0] =  ({8{lsu_pkt_r.by}}    & 8'b0000_0001) |
     295              :                                  ({8{lsu_pkt_r.half}}  & 8'b0000_0011) |
     296              :                                  ({8{lsu_pkt_r.word}}  & 8'b0000_1111) |
     297              :                                  ({8{lsu_pkt_r.dword}} & 8'b1111_1111);
     298              : 
     299              :    assign ldst_byteen_ext_r[7:0] = ldst_byteen_r[7:0] << lsu_addr_r[1:0];
     300              : 
     301              :    assign ldst_byteen_hi_r[3:0]   = ldst_byteen_ext_r[7:4];
     302              :    assign ldst_byteen_lo_r[3:0]   = ldst_byteen_ext_r[3:0];
     303              : 
     304              :    assign ld_addr_rhit_lo_lo = (lsu_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma;
     305              :    assign ld_addr_rhit_lo_hi = (end_addr_m[31:2] == lsu_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma;
     306              :    assign ld_addr_rhit_hi_lo = (lsu_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r;
     307              :    assign ld_addr_rhit_hi_hi = (end_addr_m[31:2] == end_addr_r[31:2]) & lsu_pkt_r.valid & lsu_pkt_r.store & ~lsu_pkt_r.dma & dual_stbuf_write_r;
     308              : 
     309              :    for (genvar i=0; i<BYTE_WIDTH; i++) begin
     310              :       assign ld_byte_rhit_lo_lo[i] = ld_addr_rhit_lo_lo & ldst_byteen_lo_r[i];
     311              :       assign ld_byte_rhit_lo_hi[i] = ld_addr_rhit_lo_hi & ldst_byteen_lo_r[i];
     312              :       assign ld_byte_rhit_hi_lo[i] = ld_addr_rhit_hi_lo & ldst_byteen_hi_r[i];
     313              :       assign ld_byte_rhit_hi_hi[i] = ld_addr_rhit_hi_hi & ldst_byteen_hi_r[i];
     314              : 
     315              :       assign ld_byte_rhit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
     316              :       assign ld_byte_rhit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
     317              : 
     318              :        assign ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_lo[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
     319              :                                                      ({8{ld_byte_rhit_hi_lo[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
     320              : 
     321              :        assign ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] = ({8{ld_byte_rhit_lo_hi[i]}} & store_data_lo_r[(8*i)+7:(8*i)]) |
     322              :                                                      ({8{ld_byte_rhit_hi_hi[i]}} & store_data_hi_r[(8*i)+7:(8*i)]);
     323              : 
     324              :       assign ld_byte_hit_lo[i] = ld_byte_rhit_lo_lo[i] | ld_byte_rhit_hi_lo[i];
     325              :       assign ld_byte_hit_hi[i] = ld_byte_rhit_lo_hi[i] | ld_byte_rhit_hi_hi[i];
     326              : 
     327              :       assign stbuf_fwdbyteen_hi_m[i] = ld_byte_hit_hi[i] | stbuf_fwdbyteen_hi_pre_m[i];
     328              :       assign stbuf_fwdbyteen_lo_m[i] = ld_byte_hit_lo[i] | stbuf_fwdbyteen_lo_pre_m[i];
     329              :       // // Pipe vs Store Queue priority
     330              :       assign stbuf_fwddata_lo_m[(8*i)+7:(8*i)] = ld_byte_rhit_lo[i]    ? ld_fwddata_rpipe_lo[(8*i)+7:(8*i)] : stbuf_fwddata_lo_pre_m[(8*i)+7:(8*i)];
     331              :       // // Pipe vs Store Queue priority
     332              :       assign stbuf_fwddata_hi_m[(8*i)+7:(8*i)] = ld_byte_rhit_hi[i]    ? ld_fwddata_rpipe_hi[(8*i)+7:(8*i)] : stbuf_fwddata_hi_pre_m[(8*i)+7:(8*i)];
     333              :    end
     334              : 
     335              :    // Flops
     336              :    rvdffs #(.WIDTH(DEPTH_LOG2)) WrPtrff (.din(NxtWrPtr[DEPTH_LOG2-1:0]), .dout(WrPtr[DEPTH_LOG2-1:0]), .en(WrPtrEn), .clk(lsu_stbuf_c1_clk), .*);
     337              :    rvdffs #(.WIDTH(DEPTH_LOG2)) RdPtrff (.din(NxtRdPtr[DEPTH_LOG2-1:0]), .dout(RdPtr[DEPTH_LOG2-1:0]), .en(RdPtrEn), .clk(lsu_stbuf_c1_clk), .*);
     338              : 
     339              : `ifdef RV_ASSERT_ON
     340              : 
     341              :    assert_stbuf_overflow: assert #0 (stbuf_specvld_any[2:0] <= DEPTH);
     342              :    property stbuf_wren_store_dccm;
     343              :       @(posedge clk)  disable iff(~rst_l) (|stbuf_wr_en[DEPTH-1:0]) |-> (lsu_pkt_r.valid & lsu_pkt_r.store & addr_in_dccm_r);
     344              :    endproperty
     345              :    assert_stbuf_wren_store_dccm: assert property (stbuf_wren_store_dccm) else
     346              :       $display("Illegal store buffer write");
     347              : 
     348              : `endif
     349              : 
     350              : endmodule
     351              :