Project Full coverage report
Current view: Cores-VeeR-EL2—Cores-VeeR-EL2—design—lsu—el2_lsu_lsc_ctl.sv Coverage Hit Total
Test Date: 14-11-2024 Toggle 86.9% 73 84
Test: all Branch 100.0% 17 17

            Line data    Source code
       1              : // SPDX-License-Identifier: Apache-2.0
       2              : // Copyright 2020 Western Digital Corporation or its affiliates.
       3              : //
       4              : // Licensed under the Apache License, Version 2.0 (the "License");
       5              : // you may not use this file except in compliance with the License.
       6              : // You may obtain a copy of the License at
       7              : //
       8              : // http://www.apache.org/licenses/LICENSE-2.0
       9              : //
      10              : // Unless required by applicable law or agreed to in writing, software
      11              : // distributed under the License is distributed on an "AS IS" BASIS,
      12              : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      13              : // See the License for the specific language governing permissions and
      14              : // limitations under the License.
      15              : 
      16              : //********************************************************************************
      17              : // $Id$
      18              : //
      19              : //
      20              : // Owner:
      21              : // Function: LSU control
      22              : // Comments:
      23              : //
      24              : //
      25              : // DC1 -> DC2 -> DC3 -> DC4 (Commit)
      26              : //
      27              : //********************************************************************************
      28              : module el2_lsu_lsc_ctl
      29              : import el2_pkg::*;
      30              : #(
      31              : `include "el2_param.vh"
      32              :  )(
      33          338 :    input logic                rst_l,                     // reset, active low
      34            2 :    input logic                clk_override,              // Override non-functional clock gating
      35     69830461 :    input logic                clk,                       // Clock only while core active.  Through one clock header.  For flops with    second clock header built in.  Connected to ACTIVE_L2CLK.
      36              : 
      37              :    // clocks per pipe
      38     69830461 :    input logic                lsu_c1_m_clk,
      39     69830461 :    input logic                lsu_c1_r_clk,
      40     69830461 :    input logic                lsu_c2_m_clk,
      41     69830461 :    input logic                lsu_c2_r_clk,
      42     69830461 :    input logic                lsu_store_c1_m_clk,
      43              : 
      44            0 :    input logic [31:0]         lsu_ld_data_r,             // Load data R-stage
      45        24696 :    input logic [31:0]         lsu_ld_data_corr_r,        // ECC corrected data R-stage
      46            4 :    input logic                lsu_single_ecc_error_r,    // ECC single bit error R-stage
      47            4 :    input logic                lsu_double_ecc_error_r,    // ECC double bit error R-stage
      48              : 
      49        54456 :    input logic [31:0]         lsu_ld_data_m,             // Load data M-stage
      50            4 :    input logic                lsu_single_ecc_error_m,    // ECC single bit error M-stage
      51            4 :    input logic                lsu_double_ecc_error_m,    // ECC double bit error M-stage
      52              : 
      53        59234 :    input logic                flush_m_up,                // Flush M and D stage
      54        29690 :    input logic                flush_r,                   // Flush R-stage
      55        36584 :    input logic                ldst_dual_d,               // load/store is unaligned at 32 bit boundary D-stage
      56        36584 :    input logic                ldst_dual_m,               // load/store is unaligned at 32 bit boundary M-stage
      57        36584 :    input logic                ldst_dual_r,               // load/store is unaligned at 32 bit boundary R-stage
      58              : 
      59       413533 :    input logic [31:0]         exu_lsu_rs1_d,             // address
      60        81504 :    input logic [31:0]         exu_lsu_rs2_d,             // store data
      61              : 
      62       623733 :    input el2_lsu_pkt_t       lsu_p,                     // lsu control packet
      63      2283261 :    input logic                dec_lsu_valid_raw_d,       // Raw valid for address computation
      64       270454 :    input logic [11:0]         dec_lsu_offset_d,          // 12b offset for load/store addresses
      65              : 
      66            0 :    input  logic [31:0]        picm_mask_data_m,          // PIC data M-stage
      67          200 :    input  logic [31:0]        bus_read_data_m,           // the bus return data
      68        45313 :    output logic [31:0]        lsu_result_m,              // lsu load data
      69        36052 :    output logic [31:0]        lsu_result_corr_r,         // This is the ECC corrected data going to RF
      70              :    // lsu address down the pipe
      71       593765 :    output logic [31:0]        lsu_addr_d,
      72       593764 :    output logic [31:0]        lsu_addr_m,
      73       593763 :    output logic [31:0]        lsu_addr_r,
      74              :    // lsu address down the pipe - needed to check unaligned
      75       593791 :    output logic [31:0]        end_addr_d,
      76       594164 :    output logic [31:0]        end_addr_m,
      77       594163 :    output logic [31:0]        end_addr_r,
      78              :    // store data down the pipe
      79        81504 :    output logic [31:0]        store_data_m,
      80              : 
      81            0 :    input  logic [31:0]         dec_tlu_mrac_ff,          // CSR for memory region control
      82           80 :    output logic                lsu_exc_m,                // Access or misaligned fault
      83        36448 :    output logic                is_sideeffects_m,         // is sideffects space
      84      2286394 :    output logic                lsu_commit_r,             // lsu instruction in r commits
      85            4 :    output logic                lsu_single_ecc_error_incr,// LSU inc SB error counter
      86            4 :    output el2_lsu_error_pkt_t lsu_error_pkt_r,          // lsu exception packet
      87              : 
      88        24696 :    output logic [31:1]         lsu_fir_addr,             // fast interrupt address
      89            0 :    output logic [1:0]          lsu_fir_error,            // Error during fast interrupt lookup
      90              : 
      91              :    // address in dccm/pic/external per pipe stage
      92       614422 :    output logic               addr_in_dccm_d,
      93       614422 :    output logic               addr_in_dccm_m,
      94       614422 :    output logic               addr_in_dccm_r,
      95              : 
      96           12 :    output logic               addr_in_pic_d,
      97           12 :    output logic               addr_in_pic_m,
      98           12 :    output logic               addr_in_pic_r,
      99              : 
     100       614788 :    output logic               addr_external_m,
     101              : 
     102              :    // DMA slave
     103            0 :    input logic                dma_dccm_req,
     104            0 :    input logic [31:0]         dma_mem_addr,
     105            0 :    input logic [2:0]          dma_mem_sz,
     106           22 :    input logic                dma_mem_write,
     107           12 :    input logic [63:0]         dma_mem_wdata,
     108              : 
     109              :    // Store buffer related signals
     110       478026 :    output el2_lsu_pkt_t      lsu_pkt_d,
     111       477978 :    output el2_lsu_pkt_t      lsu_pkt_m,
     112       477975 :    output el2_lsu_pkt_t      lsu_pkt_r,
     113              : 
     114       166874 :     input logic lsu_pmp_error_start,
     115       166874 :     input logic lsu_pmp_error_end,
     116              : 
     117              :    // Excluding scan_mode from coverage as its usage is determined by the integrator of the VeeR core.
     118              :    /*verilator coverage_off*/
     119              :    input  logic               scan_mode                  // Scan mode
     120              :    /*verilator coverage_on*/
     121              : 
     122              :    );
     123              : 
     124           14 :    logic [31:3]        end_addr_pre_m, end_addr_pre_r;
     125       593765 :    logic [31:0]        full_addr_d;
     126       593791 :    logic [31:0]        full_end_addr_d;
     127       475936 :    logic [31:0]        lsu_rs1_d;
     128       270072 :    logic [11:0]        lsu_offset_d;
     129       475936 :    logic [31:0]        rs1_d;
     130       270072 :    logic [11:0]        offset_d;
     131       283284 :    logic [12:0]        end_addr_offset_d;
     132            0 :    logic [2:0]         addr_offset_d;
     133              : 
     134           12 :    logic [63:0]        dma_mem_wdata_shifted;
     135       614789 :    logic               addr_external_d;
     136       614788 :    logic               addr_external_r;
     137           10 :    logic               access_fault_d, misaligned_fault_d;
     138           10 :    logic               access_fault_m, misaligned_fault_m;
     139              : 
     140            0 :    logic               fir_dccm_access_error_d, fir_nondccm_access_error_d;
     141            0 :    logic               fir_dccm_access_error_m, fir_nondccm_access_error_m;
     142              : 
     143            0 :    logic [3:0]         exc_mscause_d, exc_mscause_m;
     144       475936 :    logic [31:0]        rs1_d_raw;
     145        81504 :    logic [31:0]        store_data_d, store_data_pre_m, store_data_m_in;
     146          198 :    logic [31:0]        bus_read_data_r;
     147              : 
     148           22 :    el2_lsu_pkt_t           dma_pkt_d;
     149       477978 :    el2_lsu_pkt_t           lsu_pkt_m_in, lsu_pkt_r_in;
     150            4 :    el2_lsu_error_pkt_t     lsu_error_pkt_m;
     151              : 
     152              : 
     153              :    // Premux the rs1/offset for dma
     154              :    assign lsu_rs1_d[31:0]    = dec_lsu_valid_raw_d ? exu_lsu_rs1_d[31:0] : dma_mem_addr[31:0];
     155              :    assign lsu_offset_d[11:0] = dec_lsu_offset_d[11:0] & {12{dec_lsu_valid_raw_d}};
     156              :    assign rs1_d_raw[31:0]    = lsu_rs1_d[31:0];
     157              :    assign offset_d[11:0]     = lsu_offset_d[11:0];
     158              : 
     159              :    assign rs1_d[31:0] = (lsu_pkt_d.load_ldst_bypass_d) ? lsu_result_m[31:0] : rs1_d_raw[31:0];
     160              : 
     161              :    // generate the ls address
     162              :    rvlsadder   lsadder  (.rs1(rs1_d[31:0]),
     163              :                        .offset(offset_d[11:0]),
     164              :                        .dout(full_addr_d[31:0])
     165              :                        );
     166              : 
     167              :    // Module to generate the memory map of the address
     168              :    el2_lsu_addrcheck addrcheck (
     169              :               .start_addr_d(full_addr_d[31:0]),
     170              :               .end_addr_d(full_end_addr_d[31:0]),
     171              :               .rs1_region_d(rs1_d[31:28]),
     172              :               .*
     173              :   );
     174              : 
     175              :    // Calculate start/end address for load/store
     176              :    assign addr_offset_d[2:0]      = ({3{lsu_pkt_d.half}} & 3'b01) | ({3{lsu_pkt_d.word}} & 3'b11) | ({3{lsu_pkt_d.dword}} & 3'b111);
     177              :    assign end_addr_offset_d[12:0] = {offset_d[11],offset_d[11:0]} + {9'b0,addr_offset_d[2:0]};
     178              :    assign full_end_addr_d[31:0]   = rs1_d[31:0] + {{19{end_addr_offset_d[12]}},end_addr_offset_d[12:0]};
     179              :    assign end_addr_d[31:0]        = full_end_addr_d[31:0];
     180              :    assign lsu_exc_m               = access_fault_m | misaligned_fault_m;
     181              : 
     182              :    // Goes to TLU to increment the ECC error counter
     183              :    assign lsu_single_ecc_error_incr = (lsu_single_ecc_error_r & ~lsu_double_ecc_error_r) & (lsu_commit_r | lsu_pkt_r.dma) & lsu_pkt_r.valid;
     184              : 
     185              :    if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U_Plus1_1
     186              :       logic               access_fault_r, misaligned_fault_r;
     187              :       logic [3:0]         exc_mscause_r;
     188              :       logic               fir_dccm_access_error_r, fir_nondccm_access_error_r;
     189              : 
     190              :       // Generate exception packet
     191              :       assign lsu_error_pkt_r.exc_valid = (access_fault_r | misaligned_fault_r | lsu_double_ecc_error_r) & lsu_pkt_r.valid & ~lsu_pkt_r.dma & ~lsu_pkt_r.fast_int;
     192              :       assign lsu_error_pkt_r.single_ecc_error = lsu_single_ecc_error_r & ~lsu_error_pkt_r.exc_valid & ~lsu_pkt_r.dma;
     193              :       assign lsu_error_pkt_r.inst_type = lsu_pkt_r.store;
     194              :       assign lsu_error_pkt_r.exc_type  = ~misaligned_fault_r;
     195              :       assign lsu_error_pkt_r.mscause[3:0] = (lsu_double_ecc_error_r & ~misaligned_fault_r & ~access_fault_r) ? 4'h1 : exc_mscause_r[3:0];
     196              :       assign lsu_error_pkt_r.addr[31:0] = lsu_addr_r[31:0];
     197              : 
     198              :       assign lsu_fir_error[1:0] = fir_nondccm_access_error_r ? 2'b11 : (fir_dccm_access_error_r ? 2'b10 : ((lsu_pkt_r.fast_int & lsu_double_ecc_error_r) ? 2'b01 : 2'b00));
     199              : 
     200              :       rvdff #(1) access_fault_rff             (.din(access_fault_m),             .dout(access_fault_r),             .clk(lsu_c1_r_clk), .*);
     201              :       rvdff #(1) misaligned_fault_rff         (.din(misaligned_fault_m),         .dout(misaligned_fault_r),         .clk(lsu_c1_r_clk), .*);
     202              :       rvdff #(4) exc_mscause_rff              (.din(exc_mscause_m[3:0]),         .dout(exc_mscause_r[3:0]),         .clk(lsu_c1_r_clk), .*);
     203              :       rvdff #(1) fir_dccm_access_error_mff    (.din(fir_dccm_access_error_m),    .dout(fir_dccm_access_error_r),    .clk(lsu_c1_r_clk), .*);
     204              :       rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_m), .dout(fir_nondccm_access_error_r), .clk(lsu_c1_r_clk), .*);
     205              : 
     206              :    end else begin: L2U_Plus1_0
     207              :       logic [1:0] lsu_fir_error_m;
     208              : 
     209              :       // Generate exception packet
     210              :       assign lsu_error_pkt_m.exc_valid = (access_fault_m | misaligned_fault_m | lsu_double_ecc_error_m) & lsu_pkt_m.valid & ~lsu_pkt_m.dma & ~lsu_pkt_m.fast_int & ~flush_m_up;
     211              :       assign lsu_error_pkt_m.single_ecc_error = lsu_single_ecc_error_m & ~lsu_error_pkt_m.exc_valid & ~lsu_pkt_m.dma;
     212              :       assign lsu_error_pkt_m.inst_type = lsu_pkt_m.store;
     213              :       assign lsu_error_pkt_m.exc_type  = ~misaligned_fault_m;
     214              :       assign lsu_error_pkt_m.mscause[3:0] = (lsu_double_ecc_error_m & ~misaligned_fault_m & ~access_fault_m) ? 4'h1 : exc_mscause_m[3:0];
     215              :       assign lsu_error_pkt_m.addr[31:0] = lsu_addr_m[31:0];
     216              : 
     217              :       assign lsu_fir_error_m[1:0] = fir_nondccm_access_error_m ? 2'b11 : (fir_dccm_access_error_m ? 2'b10 : ((lsu_pkt_m.fast_int & lsu_double_ecc_error_m) ? 2'b01 : 2'b00));
     218              : 
     219              :       rvdff  #(1)                             lsu_exc_valid_rff       (.*, .din(lsu_error_pkt_m.exc_valid),                        .dout(lsu_error_pkt_r.exc_valid),                        .clk(lsu_c2_r_clk));
     220              :       rvdff  #(1)                             lsu_single_ecc_error_rff(.*, .din(lsu_error_pkt_m.single_ecc_error),                 .dout(lsu_error_pkt_r.single_ecc_error),                 .clk(lsu_c2_r_clk));
     221              :       rvdffe #($bits(el2_lsu_error_pkt_t)-2) lsu_error_pkt_rff       (.*, .din(lsu_error_pkt_m[$bits(el2_lsu_error_pkt_t)-1:2]), .dout(lsu_error_pkt_r[$bits(el2_lsu_error_pkt_t)-1:2]), .en(lsu_error_pkt_m.exc_valid | lsu_error_pkt_m.single_ecc_error | clk_override));
     222              :       rvdff #(2)                              lsu_fir_error_rff       (.*, .din(lsu_fir_error_m[1:0]),                             .dout(lsu_fir_error[1:0]),                               .clk(lsu_c2_r_clk));
     223              :    end
     224              : 
     225              :    //Create DMA packet
     226          339 :    always_comb begin
     227          339 :       dma_pkt_d = '0;
     228          339 :       dma_pkt_d.valid   = dma_dccm_req;
     229          339 :       dma_pkt_d.dma     = 1'b1;
     230          339 :       dma_pkt_d.store   = dma_mem_write;
     231          339 :       dma_pkt_d.load    = ~dma_mem_write;
     232          339 :       dma_pkt_d.by      = (dma_mem_sz[2:0] == 3'b0);
     233          339 :       dma_pkt_d.half    = (dma_mem_sz[2:0] == 3'b1);
     234          339 :       dma_pkt_d.word    = (dma_mem_sz[2:0] == 3'b10);
     235          339 :       dma_pkt_d.dword   = (dma_mem_sz[2:0] == 3'b11);
     236              :    end
     237              : 
     238          339 :    always_comb begin
     239          339 :       lsu_pkt_d = dec_lsu_valid_raw_d ? lsu_p : dma_pkt_d;
     240          339 :       lsu_pkt_m_in = lsu_pkt_d;
     241          339 :       lsu_pkt_r_in = lsu_pkt_m;
     242              : 
     243          339 :       lsu_pkt_d.valid = (lsu_p.valid & ~(flush_m_up & ~lsu_p.fast_int)) | dma_dccm_req;
     244          339 :       lsu_pkt_m_in.valid = lsu_pkt_d.valid & ~(flush_m_up & ~lsu_pkt_d.dma);
     245          339 :       lsu_pkt_r_in.valid = lsu_pkt_m.valid & ~(flush_m_up & ~lsu_pkt_m.dma) ;
     246              :    end
     247              : 
     248              :    // C2 clock for valid and C1 for other bits of packet
     249              :    rvdff #(1) lsu_pkt_vldmff (.*, .din(lsu_pkt_m_in.valid), .dout(lsu_pkt_m.valid), .clk(lsu_c2_m_clk));
     250              :    rvdff #(1) lsu_pkt_vldrff (.*, .din(lsu_pkt_r_in.valid), .dout(lsu_pkt_r.valid), .clk(lsu_c2_r_clk));
     251              : 
     252              :    rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_mff (.*, .din(lsu_pkt_m_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_m[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_m_clk));
     253              :    rvdff #($bits(el2_lsu_pkt_t)-1) lsu_pkt_rff (.*, .din(lsu_pkt_r_in[$bits(el2_lsu_pkt_t)-1:1]), .dout(lsu_pkt_r[$bits(el2_lsu_pkt_t)-1:1]), .clk(lsu_c1_r_clk));
     254              : 
     255              : 
     256              : 
     257              :    if (pt.LOAD_TO_USE_PLUS1 == 1) begin: L2U1_Plus1_1
     258              :       logic [31:0] lsu_ld_datafn_r, lsu_ld_datafn_corr_r;
     259              : 
     260              :       assign lsu_ld_datafn_r[31:0]  = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_r[31:0];
     261              :       assign lsu_ld_datafn_corr_r[31:0]  = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
     262              : 
     263              :       // this is really R stage signal
     264              :       assign lsu_result_m[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {24'b0,lsu_ld_datafn_r[7:0]}) |
     265              :                                                                     ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_r[15:0]}) |
     266              :                                                                     ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {{24{  lsu_ld_datafn_r[7]}}, lsu_ld_datafn_r[7:0]}) |
     267              :                                                                     ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{  lsu_ld_datafn_r[15]}},lsu_ld_datafn_r[15:0]}) |
     268              :                                                                     ({32{lsu_pkt_r.word}}                     & lsu_ld_datafn_r[31:0]);
     269              : 
     270              :       // this signal is used for gpr update
     271              :       assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
     272              :                                                                               ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
     273              :                                                                               ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {{24{  lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
     274              :                                                                               ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{  lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
     275              :                                                                               ({32{lsu_pkt_r.word}}                     & lsu_ld_datafn_corr_r[31:0]);
     276              : 
     277              :    end else begin: L2U1_Plus1_0 // block: L2U1_Plus1_1
     278              :       logic [31:0] lsu_ld_datafn_m, lsu_ld_datafn_corr_r;
     279              : 
     280              :       assign lsu_ld_datafn_m[31:0] = addr_external_m ? bus_read_data_m[31:0] : lsu_ld_data_m[31:0];
     281              :       assign lsu_ld_datafn_corr_r[31:0] = addr_external_r ? bus_read_data_r[31:0] : lsu_ld_data_corr_r[31:0];
     282              : 
     283              :       // this result must look at prior stores and merge them in
     284              :       assign lsu_result_m[31:0] = ({32{ lsu_pkt_m.unsign & lsu_pkt_m.by  }} & {24'b0,lsu_ld_datafn_m[7:0]}) |
     285              :                                                                     ({32{ lsu_pkt_m.unsign & lsu_pkt_m.half}} & {16'b0,lsu_ld_datafn_m[15:0]}) |
     286              :                                                                     ({32{~lsu_pkt_m.unsign & lsu_pkt_m.by  }} & {{24{  lsu_ld_datafn_m[7]}}, lsu_ld_datafn_m[7:0]}) |
     287              :                                                                     ({32{~lsu_pkt_m.unsign & lsu_pkt_m.half}} & {{16{  lsu_ld_datafn_m[15]}},lsu_ld_datafn_m[15:0]}) |
     288              :                                                                     ({32{lsu_pkt_m.word}}                     & lsu_ld_datafn_m[31:0]);
     289              : 
     290              :       // this signal is used for gpr update
     291              :       assign lsu_result_corr_r[31:0] = ({32{ lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {24'b0,lsu_ld_datafn_corr_r[7:0]}) |
     292              :                                                                               ({32{ lsu_pkt_r.unsign & lsu_pkt_r.half}} & {16'b0,lsu_ld_datafn_corr_r[15:0]}) |
     293              :                                                                               ({32{~lsu_pkt_r.unsign & lsu_pkt_r.by  }} & {{24{  lsu_ld_datafn_corr_r[7]}}, lsu_ld_datafn_corr_r[7:0]}) |
     294              :                                                                               ({32{~lsu_pkt_r.unsign & lsu_pkt_r.half}} & {{16{  lsu_ld_datafn_corr_r[15]}},lsu_ld_datafn_corr_r[15:0]}) |
     295              :                                                                               ({32{lsu_pkt_r.word}}                     & lsu_ld_datafn_corr_r[31:0]);
     296              :    end
     297              : 
     298              :    // Fast interrupt address
     299              :    assign lsu_fir_addr[31:1]    = lsu_ld_data_corr_r[31:1];
     300              : 
     301              :    // absence load/store all 0's
     302              :    assign lsu_addr_d[31:0] = full_addr_d[31:0];
     303              : 
     304              :    // Interrupt as a flush source allows the WB to occur
     305              :    assign lsu_commit_r = lsu_pkt_r.valid & (lsu_pkt_r.store | lsu_pkt_r.load) & ~flush_r & ~lsu_pkt_r.dma;
     306              : 
     307              :    assign dma_mem_wdata_shifted[63:0] = 64'(dma_mem_wdata[63:0] >> {dma_mem_addr[2:0], 3'b000});   // Shift the dma data to lower bits to make it consistent to lsu stores
     308              :    assign store_data_d[31:0] = dma_dccm_req ? dma_mem_wdata_shifted[31:0] : exu_lsu_rs2_d[31:0];  // Write to PIC still happens in r stage
     309              : 
     310              :    assign store_data_m_in[31:0] = (lsu_pkt_d.store_data_bypass_d) ? lsu_result_m[31:0] : store_data_d[31:0];
     311              : 
     312              :    assign store_data_m[31:0] = (picm_mask_data_m[31:0] | {32{~addr_in_pic_m}}) & ((lsu_pkt_m.store_data_bypass_m) ? lsu_result_m[31:0] : store_data_pre_m[31:0]);
     313              : 
     314              : 
     315              :    rvdff #(32)  sdmff (.*, .din(store_data_m_in[31:0]), .dout(store_data_pre_m[31:0]),                       .clk(lsu_store_c1_m_clk));
     316              : 
     317              :    rvdff #(32) samff (.*, .din(lsu_addr_d[31:0]), .dout(lsu_addr_m[31:0]), .clk(lsu_c1_m_clk));
     318              :    rvdff #(32) sarff (.*, .din(lsu_addr_m[31:0]), .dout(lsu_addr_r[31:0]), .clk(lsu_c1_r_clk));
     319              : 
     320              :    assign end_addr_m[31:3] = ldst_dual_m ? end_addr_pre_m[31:3] : lsu_addr_m[31:3];       // This is for power saving
     321              :    assign end_addr_r[31:3] = ldst_dual_r ? end_addr_pre_r[31:3] : lsu_addr_r[31:3];       // This is for power saving
     322              : 
     323              :    rvdffe #(29) end_addr_hi_mff (.*, .din(end_addr_d[31:3]), .dout(end_addr_pre_m[31:3]), .en((lsu_pkt_d.valid & ldst_dual_d) | clk_override));
     324              :    rvdffe #(29) end_addr_hi_rff (.*, .din(end_addr_m[31:3]), .dout(end_addr_pre_r[31:3]), .en((lsu_pkt_m.valid & ldst_dual_m) | clk_override));
     325              : 
     326              :    rvdff #(3)  end_addr_lo_mff (.*, .din(end_addr_d[2:0]), .dout(end_addr_m[2:0]), .clk(lsu_c1_m_clk));
     327              :    rvdff #(3)  end_addr_lo_rff (.*, .din(end_addr_m[2:0]), .dout(end_addr_r[2:0]), .clk(lsu_c1_r_clk));
     328              : 
     329              :    rvdff #(1) addr_in_dccm_mff(.din(addr_in_dccm_d), .dout(addr_in_dccm_m), .clk(lsu_c1_m_clk), .*);
     330              :    rvdff #(1) addr_in_dccm_rff(.din(addr_in_dccm_m), .dout(addr_in_dccm_r), .clk(lsu_c1_r_clk), .*);
     331              : 
     332              :    rvdff #(1) addr_in_pic_mff(.din(addr_in_pic_d), .dout(addr_in_pic_m), .clk(lsu_c1_m_clk), .*);
     333              :    rvdff #(1) addr_in_pic_rff(.din(addr_in_pic_m), .dout(addr_in_pic_r), .clk(lsu_c1_r_clk), .*);
     334              : 
     335              :    rvdff #(1) addr_external_mff(.din(addr_external_d), .dout(addr_external_m), .clk(lsu_c1_m_clk), .*);
     336              :    rvdff #(1) addr_external_rff(.din(addr_external_m), .dout(addr_external_r), .clk(lsu_c1_r_clk), .*);
     337              : 
     338              :    rvdff #(1) access_fault_mff     (.din(access_fault_d),     .dout(access_fault_m),     .clk(lsu_c1_m_clk), .*);
     339              :    rvdff #(1) misaligned_fault_mff (.din(misaligned_fault_d), .dout(misaligned_fault_m), .clk(lsu_c1_m_clk), .*);
     340              :    rvdff #(4) exc_mscause_mff      (.din(exc_mscause_d[3:0]), .dout(exc_mscause_m[3:0]), .clk(lsu_c1_m_clk), .*);
     341              : 
     342              :    rvdff #(1) fir_dccm_access_error_mff    (.din(fir_dccm_access_error_d),    .dout(fir_dccm_access_error_m),    .clk(lsu_c1_m_clk), .*);
     343              :    rvdff #(1) fir_nondccm_access_error_mff (.din(fir_nondccm_access_error_d), .dout(fir_nondccm_access_error_m), .clk(lsu_c1_m_clk), .*);
     344              : 
     345              :    rvdffe #(32) bus_read_data_r_ff (.*, .din(bus_read_data_m[31:0]), .dout(bus_read_data_r[31:0]), .en(addr_external_m | clk_override));
     346              : 
     347              : endmodule