Project Full coverage report
Current view: Cores-VeeR-EL2—Cores-VeeR-EL2—design—ifu—el2_ifu_bp_ctl.sv Coverage Hit Total
Test Date: 21-11-2024 Toggle 96.3% 105 109
Test: all Branch 100.0% 27 27

            Line data    Source code
       1              : //********************************************************************************
       2              : // SPDX-License-Identifier: Apache-2.0
       3              : // Copyright 2020 Western Digital Corporation or its affiliates.
       4              : //
       5              : // Licensed under the Apache License, Version 2.0 (the "License");
       6              : // you may not use this file except in compliance with the License.
       7              : // You may obtain a copy of the License at
       8              : //
       9              : // http://www.apache.org/licenses/LICENSE-2.0
      10              : //
      11              : // Unless required by applicable law or agreed to in writing, software
      12              : // distributed under the License is distributed on an "AS IS" BASIS,
      13              : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      14              : // See the License for the specific language governing permissions and
      15              : // limitations under the License.
      16              : //********************************************************************************
      17              : 
      18              : //********************************************************************************
      19              : // Function: Branch predictor
      20              : // Comments:
      21              : //
      22              : //
      23              : //  Bank3 : Bank2 : Bank1 : Bank0
      24              : //  FA  C       8       4       0
      25              : //********************************************************************************
      26              : 
      27              : module el2_ifu_bp_ctl
      28              : import el2_pkg::*;
      29              : #(
      30              : `include "el2_param.vh"
      31              :  )
      32              :   (
      33              : 
      34     69840565 :    input logic clk,
      35          338 :    input logic rst_l,
      36              : 
      37      6782653 :    input logic ic_hit_f,      // Icache hit, enables F address capture
      38              : 
      39          443 :    input logic [31:1] ifc_fetch_addr_f, // look up btb address
      40      3714536 :    input logic ifc_fetch_req_f,  // F1 valid
      41              : 
      42       782023 :    input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // BP commit update packet, includes errors
      43       366738 :    input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
      44       187560 :    input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
      45              : 
      46            0 :    input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associative btb error index
      47              : 
      48        59220 :    input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F
      49            2 :    input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches
      50              : 
      51            0 :    input logic dec_tlu_bpred_disable, // disable all branch prediction
      52              : 
      53        34486 :    input el2_predict_pkt_t  exu_mp_pkt, // mispredict packet
      54              : 
      55       300064 :    input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr (for patching fghr)
      56       378616 :    input logic [pt.BHT_GHR_SIZE-1:0]  exu_mp_fghr,                    // Mispredict fghr
      57       196152 :    input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  exu_mp_index,         // Mispredict index
      58       115620 :    input logic [pt.BTB_BTAG_SIZE-1:0]  exu_mp_btag,                   // Mispredict btag
      59              : 
      60       673974 :    input logic exu_flush_final, // all flushes
      61              : 
      62      3170544 :    output logic ifu_bp_hit_taken_f, // btb hit, select target
      63       518479 :    output logic [31:1] ifu_bp_btb_target_f, //  predicted target PC
      64      2404958 :    output logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified
      65              : 
      66       376431 :    output logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch ghr
      67              : 
      68      2374031 :    output logic [1:0] ifu_bp_way_f, // way
      69        70446 :    output logic [1:0] ifu_bp_ret_f, // predicted ret
      70      2024478 :    output logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified
      71      1823494 :    output logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 0, right justified
      72       408365 :    output logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified
      73       956919 :    output logic [1:0] ifu_bp_valid_f, // branch valid, right justified
      74      2176718 :    output logic [11:0] ifu_bp_poffset_f, // predicted target
      75              : 
      76            0 :    output logic [1:0] [$clog2(pt.BTB_SIZE)-1:0]    ifu_bp_fa_index_f, // predicted branch index (fully associative option)
      77              : 
      78              :    // Excluding scan_mode from coverage as its usage is determined by the integrator of the VeeR core.
      79              :    /*verilator coverage_off*/
      80              :    input  logic       scan_mode
      81              :    /*verilator coverage_on*/
      82              :    );
      83              : 
      84              : 
      85              :    localparam BTB_DWIDTH =  pt.BTB_TOFFSET_SIZE+pt.BTB_BTAG_SIZE+5;
      86              :    localparam BTB_DWIDTH_TOP =  int'(pt.BTB_TOFFSET_SIZE)+int'(pt.BTB_BTAG_SIZE)+4;
      87              :    localparam BTB_FA_INDEX = $clog2(pt.BTB_SIZE)-1;
      88              :    localparam FA_CMP_LOWER = $clog2(pt.ICACHE_LN_SZ);
      89              :    localparam FA_TAG_END_UPPER= 5+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER)-1; // must cast to int or vcs build fails
      90              :    localparam FA_TAG_START_LOWER = 3+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER);
      91              :    localparam FA_TAG_END_LOWER = 5+int'(pt.BTB_TOFFSET_SIZE);
      92              : 
      93              :    localparam TAG_START=BTB_DWIDTH-1;
      94              :    localparam PC4=4;
      95              :    localparam BOFF=3;
      96              :    localparam CALL=2;
      97              :    localparam RET=1;
      98              :    localparam BV=0;
      99              : 
     100              :    localparam LRU_SIZE=pt.BTB_ARRAY_DEPTH;
     101              :    localparam NUM_BHT_LOOP = (pt.BHT_ARRAY_DEPTH > 16 ) ? 16 : pt.BHT_ARRAY_DEPTH;
     102              :    localparam NUM_BHT_LOOP_INNER_HI =  (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+3 : pt.BHT_ADDR_HI;
     103              :    localparam NUM_BHT_LOOP_OUTER_LO =  (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+4 : pt.BHT_ADDR_LO;
     104              :    localparam BHT_NO_ADDR_MATCH  = ( pt.BHT_ARRAY_DEPTH <= 16 );
     105              : 
     106              : 
     107       200596 :    logic exu_mp_valid_write;
     108       523400 :    logic exu_mp_ataken;
     109       508792 :    logic exu_mp_valid; // conditional branch mispredict
     110       230218 :    logic exu_mp_boffset; // branch offsett
     111       273332 :    logic exu_mp_pc4; // branch is a 4B inst
     112        47300 :    logic exu_mp_call; // branch is a call inst
     113       149064 :    logic exu_mp_ret; // branch is a ret inst
     114        80332 :    logic exu_mp_ja; // branch is a jump always
     115       283182 :    logic [1:0] exu_mp_hist; // new history
     116        90270 :    logic [11:0] exu_mp_tgt; // target offset
     117       196152 :    logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
     118      2913013 :    logic                                   dec_tlu_br0_v_wb; // WB stage history update
     119      2721663 :    logic [1:0]                             dec_tlu_br0_hist_wb; // new history
     120       187560 :    logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr
     121        28846 :    logic                                   dec_tlu_br0_error_wb; // error; invalidate bank
     122         9608 :    logic                                   dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg
     123       366738 :    logic [pt.BHT_GHR_SIZE-1:0]             exu_i0_br_fghr_wb;
     124              : 
     125         5008 :    logic use_mp_way, use_mp_way_p1;
     126          122 :    logic [pt.RET_STACK_SIZE-1:0][31:0] rets_out, rets_in;
     127       230670 :    logic [pt.RET_STACK_SIZE-1:0]        rsenable;
     128              : 
     129              : 
     130      2176718 :    logic [11:0]       btb_rd_tgt_f;
     131       498032 :    logic              btb_rd_pc4_f, btb_rd_call_f, btb_rd_ret_f;
     132      1443205 :    logic [1:1]        bp_total_branch_offset_f;
     133              : 
     134          445 :    logic [31:1]       bp_btb_target_adder_f;
     135          445 :    logic [31:1]       bp_rs_call_target_f;
     136       203037 :    logic              rs_push, rs_pop, rs_hold;
     137       126304 :    logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_p1_f, btb_wr_addr, btb_rd_addr_f;
     138        12203 :    logic [pt.BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f, fetch_rd_tag_p1_f;
     139        56054 :    logic [BTB_DWIDTH-1:0]        btb_wr_data;
     140       101186 :    logic               btb_wr_en_way0, btb_wr_en_way1;
     141              : 
     142              : 
     143       261199 :    logic               dec_tlu_error_wb, btb_valid, dec_tlu_br0_middle_wb;
     144       187560 :    logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]        btb_error_addr_wb;
     145         2468 :    logic branch_error_collision_f, fetch_mp_collision_f, branch_error_collision_p1_f, fetch_mp_collision_p1_f;
     146              : 
     147         1040 :    logic  branch_error_bank_conflict_f;
     148       372619 :    logic [pt.BHT_GHR_SIZE-1:0] merged_ghr, fghr_ns, fghr;
     149        74753 :    logic [1:0] num_valids;
     150          232 :    logic [LRU_SIZE-1:0] btb_lru_b0_f, btb_lru_b0_hold, btb_lru_b0_ns,
     151        15418 :                         fetch_wrindex_dec, fetch_wrindex_p1_dec, fetch_wrlru_b0, fetch_wrlru_p1_b0,
     152          366 :                         mp_wrindex_dec, mp_wrlru_b0;
     153      1641818 :    logic                btb_lru_rd_f, btb_lru_rd_p1_f, lru_update_valid_f;
     154       539554 :    logic  tag_match_way0_f, tag_match_way1_f;
     155       755464 :    logic [1:0] way_raw, bht_dir_f, btb_sel_f, wayhit_f, vwayhit_f, wayhit_p1_f;
     156       361342 :    logic [1:0] bht_valid_f, bht_force_taken_f;
     157              : 
     158            2 :    logic leak_one_f, leak_one_f_d1;
     159              : 
     160              :    logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0]  btb_bank0_rd_data_way0_out ;
     161              : 
     162              :    logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0]  btb_bank0_rd_data_way1_out ;
     163              : 
     164      2669337 :    logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_f ;
     165       461811 :    logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_f ;
     166              : 
     167      1084332 :    logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_p1_f ;
     168       459074 :    logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_p1_f ;
     169              : 
     170       278253 :    logic                [BTB_DWIDTH-1:0] btb_vbank0_rd_data_f, btb_vbank1_rd_data_f;
     171              : 
     172      3170544 :    logic                                         final_h;
     173       182762 :    logic                                         btb_fg_crossing_f;
     174       284882 :    logic                                         middle_of_bank;
     175              : 
     176              : 
     177      1781405 :    logic [1:0]                                   bht_vbank0_rd_data_f, bht_vbank1_rd_data_f;
     178          982 :    logic                                         branch_error_bank_conflict_p1_f;
     179       595710 :    logic                                         tag_match_way0_p1_f, tag_match_way1_p1_f;
     180              : 
     181       162887 :    logic [1:0]                                   btb_vlru_rd_f, fetch_start_f, tag_match_vway1_expanded_f, tag_match_way0_expanded_p1_f, tag_match_way1_expanded_p1_f;
     182          445 :    logic [31:2] fetch_addr_p1_f;
     183              : 
     184              : 
     185       203424 :    logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_way_wb;
     186       179172 :    logic                [BTB_DWIDTH-1:0] btb_bank0e_rd_data_f, btb_bank0e_rd_data_p1_f;
     187              : 
     188      1809448 :    logic                [BTB_DWIDTH-1:0] btb_bank0o_rd_data_f;
     189              : 
     190       196725 :    logic [1:0] tag_match_way0_expanded_f, tag_match_way1_expanded_f;
     191              : 
     192              : 
     193      1709581 :     logic [1:0]                                  bht_bank0_rd_data_f;
     194      1958482 :     logic [1:0]                                  bht_bank1_rd_data_f;
     195      1802789 :     logic [1:0]                                  bht_bank0_rd_data_p1_f;
     196              :    genvar                                        j, i;
     197              : 
     198              :    assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f; // conditional branch mispredict
     199              :    assign exu_mp_boffset = exu_mp_pkt.boffset;  // branch offset
     200              :    assign exu_mp_pc4 = exu_mp_pkt.pc4;  // branch is a 4B inst
     201              :    assign exu_mp_call = exu_mp_pkt.pcall;  // branch is a call inst
     202              :    assign exu_mp_ret = exu_mp_pkt.pret;  // branch is a ret inst
     203              :    assign exu_mp_ja = exu_mp_pkt.pja;  // branch is a jump always
     204              :    assign exu_mp_way = exu_mp_pkt.way;  // repl way
     205              :    assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0];  // new history
     206              :    assign exu_mp_tgt[11:0]  = exu_mp_pkt.toffset[11:0] ;  // target offset
     207              :    assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ;  // BTB/BHT address
     208              :    assign exu_mp_ataken = exu_mp_pkt.ataken;
     209              : 
     210              : 
     211              :    assign dec_tlu_br0_v_wb = dec_tlu_br0_r_pkt.valid;
     212              :    assign dec_tlu_br0_hist_wb[1:0]  = dec_tlu_br0_r_pkt.hist[1:0];
     213              :    assign dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     214              :    assign dec_tlu_br0_error_wb = dec_tlu_br0_r_pkt.br_error;
     215              :    assign dec_tlu_br0_middle_wb = dec_tlu_br0_r_pkt.middle;
     216              :    assign dec_tlu_br0_way_wb = dec_tlu_br0_r_pkt.way;
     217              :    assign dec_tlu_br0_start_error_wb = dec_tlu_br0_r_pkt.br_start_error;
     218              :    assign exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0] = exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0];
     219              : 
     220              : 
     221              : 
     222              : 
     223              :    // ----------------------------------------------------------------------
     224              :    // READ
     225              :    // ----------------------------------------------------------------------
     226              : 
     227              :    // hash the incoming fetch PC, first guess at hashing algorithm
     228              :    el2_btb_addr_hash #(.pt(pt)) f1hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
     229              : 
     230              : 
     231              :    assign fetch_addr_p1_f[31:2] = ifc_fetch_addr_f[31:2] + 30'b1;
     232              :    el2_btb_addr_hash #(.pt(pt)) f1hash_p1(.pc(fetch_addr_p1_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
     233              : 
     234              :    assign btb_sel_f[1] = ~bht_dir_f[0];
     235              :    assign btb_sel_f[0] =  bht_dir_f[0];
     236              : 
     237              :    assign fetch_start_f[1:0] = {ifc_fetch_addr_f[1], ~ifc_fetch_addr_f[1]};
     238              : 
     239              :    // Errors colliding with fetches must kill the btb/bht hit.
     240              : 
     241              :    assign branch_error_collision_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
     242              :    assign branch_error_collision_p1_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
     243              : 
     244              :    assign branch_error_bank_conflict_f = branch_error_collision_f & dec_tlu_error_wb;
     245              :    assign branch_error_bank_conflict_p1_f = branch_error_collision_p1_f & dec_tlu_error_wb;
     246              : 
     247              :    // set on leak one, hold until next flush without leak one
     248              :    assign leak_one_f = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f_d1 & ~dec_tlu_flush_lower_wb);
     249              : 
     250       673974 : logic exu_flush_final_d1;
     251              : 
     252              :  if(!pt.BTB_FULLYA) begin : genblock1
     253              :    assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
     254              :                                     exu_mp_valid & ifc_fetch_req_f &
     255              :                                     (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
     256              :                                     );
     257              :    assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
     258              :                                        exu_mp_valid & ifc_fetch_req_f &
     259              :                                        (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
     260              :                                        );
     261              :    // 2 -way SA, figure out the way hit and mux accordingly
     262              :    assign tag_match_way0_f = btb_bank0_rd_data_way0_f[BV] & (btb_bank0_rd_data_way0_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
     263              :                               ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
     264              : 
     265              :    assign tag_match_way1_f = btb_bank0_rd_data_way1_f[BV] & (btb_bank0_rd_data_way1_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
     266              :                               ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
     267              : 
     268              :    assign tag_match_way0_p1_f = btb_bank0_rd_data_way0_p1_f[BV] & (btb_bank0_rd_data_way0_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
     269              :                               ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
     270              : 
     271              :    assign tag_match_way1_p1_f = btb_bank0_rd_data_way1_p1_f[BV] & (btb_bank0_rd_data_way1_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
     272              :                               ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
     273              : 
     274              : 
     275              :    // Both ways could hit, use the offset bit to reorder
     276              : 
     277              :    assign tag_match_way0_expanded_f[1:0] = {tag_match_way0_f &  (btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]),
     278              :                                              tag_match_way0_f & ~(btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4])};
     279              : 
     280              :    assign tag_match_way1_expanded_f[1:0] = {tag_match_way1_f &  (btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]),
     281              :                                              tag_match_way1_f & ~(btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4])};
     282              : 
     283              :    assign tag_match_way0_expanded_p1_f[1:0] = {tag_match_way0_p1_f &  (btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]),
     284              :                                                 tag_match_way0_p1_f & ~(btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4])};
     285              : 
     286              :    assign tag_match_way1_expanded_p1_f[1:0] = {tag_match_way1_p1_f &  (btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]),
     287              :                                                 tag_match_way1_p1_f & ~(btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4])};
     288              : 
     289              :    assign wayhit_f[1:0] = tag_match_way0_expanded_f[1:0] | tag_match_way1_expanded_f[1:0];
     290              :    assign wayhit_p1_f[1:0] = tag_match_way0_expanded_p1_f[1:0] | tag_match_way1_expanded_p1_f[1:0];
     291              : 
     292              :    assign btb_bank0o_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[1]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
     293              :                                                             ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[1]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
     294              :    assign btb_bank0e_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[0]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
     295              :                                                             ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[0]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
     296              : 
     297              :    assign btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_p1_f[0]}} & btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0]) |
     298              :                                                                ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_p1_f[0]}} & btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0]) );
     299              : 
     300              :    // virtual bank order
     301              : 
     302              :    assign btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} &  btb_bank0e_rd_data_f[BTB_DWIDTH-1:0]) |
     303              :                                                             ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} &  btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) );
     304              :    assign btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} &  btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) |
     305              :                                                             ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} &  btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0]) );
     306              : 
     307              :    assign way_raw[1:0] =  tag_match_vway1_expanded_f[1:0] | (~vwayhit_f[1:0] & btb_vlru_rd_f[1:0]);
     308              : 
     309              :    // --------------------------------------------------------------------------------
     310              :    // --------------------------------------------------------------------------------
     311              :    // update lru
     312              :    // mp
     313              : 
     314              :    // create a onehot lru write vector
     315              :    assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     316              : 
     317              :    // fetch
     318              :    assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     319              :    assign fetch_wrindex_p1_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     320              : 
     321              :    assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{exu_mp_valid}};
     322              : 
     323              : 
     324              :    assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0];
     325              : 
     326              :    // Forward the mp lru information to the fetch, avoids multiple way hits later
     327              :    assign use_mp_way = fetch_mp_collision_f;
     328              :    assign use_mp_way_p1 = fetch_mp_collision_p1_f;
     329              : 
     330              :    assign lru_update_valid_f = (vwayhit_f[0] | vwayhit_f[1]) & ifc_fetch_req_f & ~leak_one_f;
     331              : 
     332              : 
     333              :    assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] &
     334              :                                          {LRU_SIZE{lru_update_valid_f}};
     335              :    assign fetch_wrlru_p1_b0[LRU_SIZE-1:0] = fetch_wrindex_p1_dec[LRU_SIZE-1:0] &
     336              :                                          {LRU_SIZE{lru_update_valid_f}};
     337              : 
     338              :    assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) |
     339              :                                           (mp_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) |
     340              :                                           (fetch_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f}}) |
     341              :                                           (fetch_wrlru_p1_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_p1_f}}) );
     342              : 
     343              : 
     344              : 
     345              :    assign btb_lru_rd_f = use_mp_way ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
     346              : 
     347              :    assign btb_lru_rd_p1_f = use_mp_way_p1 ? exu_mp_way_f : |(fetch_wrindex_p1_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
     348              : 
     349              :    // rotated
     350              :    assign btb_vlru_rd_f[1:0] = ( ({2{fetch_start_f[0]}} & {btb_lru_rd_f, btb_lru_rd_f}) |
     351              :                                   ({2{fetch_start_f[1]}} & {btb_lru_rd_p1_f, btb_lru_rd_f}));
     352              : 
     353              :    assign tag_match_vway1_expanded_f[1:0] = ( ({2{fetch_start_f[0]}} & {tag_match_way1_expanded_f[1:0]}) |
     354              :                                                ({2{fetch_start_f[1]}} & {tag_match_way1_expanded_p1_f[0], tag_match_way1_expanded_f[1]}) );
     355              : 
     356              : 
     357              :    rvdffe #(LRU_SIZE) btb_lru_ff (.*, .en(ifc_fetch_req_f | exu_mp_valid),
     358              :                                     .din(btb_lru_b0_ns[(LRU_SIZE)-1:0]),
     359              :                                    .dout(btb_lru_b0_f[(LRU_SIZE)-1:0]));
     360              : 
     361              :  end // if (!pt.BTB_FULLYA)
     362              :    // Detect end of cache line and mask as needed
     363       752417 :    logic eoc_near;
     364       200327 :    logic eoc_mask;
     365              :    assign eoc_near = &ifc_fetch_addr_f[pt.ICACHE_BEAT_ADDR_HI:3];
     366              :    assign eoc_mask = ~eoc_near| (|(~ifc_fetch_addr_f[2:1]));
     367              : 
     368              : 
     369              : 
     370              :    // --------------------------------------------------------------------------------
     371              :    // --------------------------------------------------------------------------------
     372              : 
     373              :    // mux out critical hit bank for pc computation
     374              :    // This is only useful for the first taken branch in the fetch group
     375      1992323 :    logic [16:1] btb_sel_data_f;
     376              : 
     377              :    assign btb_rd_tgt_f[11:0] = btb_sel_data_f[16:5];
     378              :    assign btb_rd_pc4_f       = btb_sel_data_f[4];
     379              :    assign btb_rd_call_f      = btb_sel_data_f[2];
     380              :    assign btb_rd_ret_f       = btb_sel_data_f[1];
     381              : 
     382              :    assign btb_sel_data_f[16:1] = ( ({16{btb_sel_f[1]}} & btb_vbank1_rd_data_f[16:1]) |
     383              :                                     ({16{btb_sel_f[0]}} & btb_vbank0_rd_data_f[16:1]) );
     384              : 
     385              : 
     386        70446 :    logic [1:0] hist0_raw, hist1_raw, pc4_raw, pret_raw;
     387              : 
     388              :    // a valid taken target needs to kill the next fetch as we compute the target address
     389              :    assign ifu_bp_hit_taken_f = |(vwayhit_f[1:0] & hist1_raw[1:0]) & ifc_fetch_req_f & ~leak_one_f_d1 & ~dec_tlu_bpred_disable;
     390              : 
     391              : 
     392              :    // Don't put calls/rets/ja in the predictor, force the bht taken instead
     393              :    assign bht_force_taken_f[1:0] = {(btb_vbank1_rd_data_f[CALL] | btb_vbank1_rd_data_f[RET]),
     394              :                                      (btb_vbank0_rd_data_f[CALL] | btb_vbank0_rd_data_f[RET])};
     395              : 
     396              : 
     397              :    // taken and valid, otherwise, branch errors must clear the bht
     398              :    assign bht_valid_f[1:0] = vwayhit_f[1:0];
     399              : 
     400              :    assign bht_vbank0_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank0_rd_data_f[1:0]) |
     401              :                                          ({2{fetch_start_f[1]}} & bht_bank1_rd_data_f[1:0]) );
     402              : 
     403              :    assign bht_vbank1_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank1_rd_data_f[1:0]) |
     404              :                                          ({2{fetch_start_f[1]}} & bht_bank0_rd_data_p1_f[1:0]) );
     405              : 
     406              : 
     407              :    assign bht_dir_f[1:0] = {(bht_force_taken_f[1] | bht_vbank1_rd_data_f[1]) & bht_valid_f[1],
     408              :                              (bht_force_taken_f[0] | bht_vbank0_rd_data_f[1]) & bht_valid_f[0]};
     409              : 
     410              :    assign ifu_bp_inst_mask_f = (ifu_bp_hit_taken_f & btb_sel_f[1]) | ~ifu_bp_hit_taken_f;
     411              : 
     412              : 
     413              : 
     414              : 
     415              :    // Branch prediction info is sent with the 2byte lane associated with the end of the branch.
     416              :    // Cases
     417              :    //       BANK1         BANK0
     418              :    // -------------------------------
     419              :    // |      :       |      :       |
     420              :    // -------------------------------
     421              :    //         <------------>                   : PC4 branch, offset, should be in B1 (indicated on [2])
     422              :    //                <------------>            : PC4 branch, no offset, indicate PC4, VALID, HIST on [1]
     423              :    //                       <------------>     : PC4 branch, offset, indicate PC4, VALID, HIST on [0]
     424              :    //                <------>                  : PC2 branch, offset, indicate VALID, HIST on [1]
     425              :    //                       <------>           : PC2 branch, no offset, indicate VALID, HIST on [0]
     426              :    //
     427              : 
     428              : 
     429              : 
     430              :    assign hist1_raw[1:0] = bht_force_taken_f[1:0] | {bht_vbank1_rd_data_f[1],
     431              :                                                       bht_vbank0_rd_data_f[1]};
     432              : 
     433              :    assign hist0_raw[1:0] = {bht_vbank1_rd_data_f[0],
     434              :                             bht_vbank0_rd_data_f[0]};
     435              : 
     436              : 
     437              :    assign pc4_raw[1:0] = {vwayhit_f[1] & btb_vbank1_rd_data_f[PC4],
     438              :                           vwayhit_f[0] & btb_vbank0_rd_data_f[PC4]};
     439              : 
     440              :    assign pret_raw[1:0] = {vwayhit_f[1] & ~btb_vbank1_rd_data_f[CALL] & btb_vbank1_rd_data_f[RET],
     441              :                            vwayhit_f[0] & ~btb_vbank0_rd_data_f[CALL] & btb_vbank0_rd_data_f[RET]};
     442              : 
     443              :    // GHR
     444              : 
     445              : 
     446              :   // count the valids with masking based on first taken
     447              :    assign num_valids[1:0] = countones(bht_valid_f[1:0]);
     448              : 
     449              :    // Note that the following property holds
     450              :    // P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0)
     451              :    // Num valid branches   What new GHR must be
     452              :    // 2                    0H
     453              :    // 1                    PH
     454              :    // 0                    PP
     455              : 
     456              :    assign final_h = |(btb_sel_f[1:0] & bht_dir_f[1:0]);
     457              : 
     458              :    assign merged_ghr[pt.BHT_GHR_SIZE-1:0] = (
     459              :                                             ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h2}} & {fghr[pt.BHT_GHR_SIZE-3:0], 1'b0, final_h}) | // 0H
     460              :                                             ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h1}} & {fghr[pt.BHT_GHR_SIZE-2:0], final_h}) | // PH
     461              :                                             ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h0}} & {fghr[pt.BHT_GHR_SIZE-1:0]}) ); // PP
     462              : 
     463       378616 :    logic [pt.BHT_GHR_SIZE-1:0] exu_flush_ghr;
     464              :    assign exu_flush_ghr[pt.BHT_GHR_SIZE-1:0] = exu_mp_fghr[pt.BHT_GHR_SIZE-1:0];
     465              : 
     466              :    assign fghr_ns[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{exu_flush_final_d1}} & exu_flush_ghr[pt.BHT_GHR_SIZE-1:0]) |
     467              :                                          ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1}} & merged_ghr[pt.BHT_GHR_SIZE-1:0]) |
     468              :                                          ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ~(ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1)}} & fghr[pt.BHT_GHR_SIZE-1:0]));
     469              : 
     470              :    rvdffie #(.WIDTH(pt.BHT_GHR_SIZE+3),.OVERRIDE(1)) fetchghr (.*,
     471              :                                           .din ({exu_flush_final, exu_mp_way, leak_one_f, fghr_ns[pt.BHT_GHR_SIZE-1:0]}),
     472              :                                           .dout({exu_flush_final_d1, exu_mp_way_f, leak_one_f_d1, fghr[pt.BHT_GHR_SIZE-1:0]}));
     473              : 
     474              :    assign ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] = fghr[pt.BHT_GHR_SIZE-1:0];
     475              : 
     476              : 
     477              :    assign ifu_bp_way_f[1:0] = way_raw[1:0];
     478              :    assign ifu_bp_hist1_f[1:0]    = hist1_raw[1:0];
     479              :    assign ifu_bp_hist0_f[1:0]    = hist0_raw[1:0];
     480              :    assign ifu_bp_pc4_f[1:0]     = pc4_raw[1:0];
     481              : 
     482              :    assign ifu_bp_valid_f[1:0]   = vwayhit_f[1:0] & ~{2{dec_tlu_bpred_disable}};
     483              :    assign ifu_bp_ret_f[1:0]     = pret_raw[1:0];
     484              : 
     485              : 
     486              :    // compute target
     487              :    // Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk
     488              : 
     489              : //  .i 5
     490              : //  .o 3
     491              : //  .ilb bht_dir_f[1] bht_dir_f[0] fetch_start_f[1] fetch_start_f[0] btb_rd_pc4_f
     492              : //  .ob bloc_f[1] bloc_f[0] use_fa_plus
     493              : //  .type fr
     494              : //
     495              : //
     496              : //  ## rotdir[1:0]  fs   pc4  off fapl
     497              : //    -1            01 -  01  0
     498              : //    10            01 -  10  0
     499              : //
     500              : //    -1            10 -  10  0
     501              : //    10            10 0  01  1
     502              : //    10            10 1  01  0
     503      2583121 : logic [1:0] bloc_f;
     504      2358836 : logic use_fa_plus;
     505              : assign bloc_f[1] = (bht_dir_f[0] & ~fetch_start_f[0]) | (~bht_dir_f[0]
     506              :      & fetch_start_f[0]);
     507              : assign bloc_f[0] = (bht_dir_f[0] & fetch_start_f[0]) | (~bht_dir_f[0]
     508              :      & ~fetch_start_f[0]);
     509              : assign use_fa_plus = (~bht_dir_f[0] & ~fetch_start_f[0] & ~btb_rd_pc4_f);
     510              : 
     511              : 
     512              : 
     513              : 
     514              :     assign btb_fg_crossing_f = fetch_start_f[0] & btb_sel_f[0] & btb_rd_pc4_f;
     515              : 
     516              :    assign bp_total_branch_offset_f =  bloc_f[1] ^ btb_rd_pc4_f;
     517              : 
     518          340 :    logic [31:2] adder_pc_in_f, ifc_fetch_adder_prior;
     519              :    rvdfflie #(.WIDTH(30), .LEFT(19)) faddrf_ff (.*, .en(ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f), .din(ifc_fetch_addr_f[31:2]), .dout(ifc_fetch_adder_prior[31:2]));
     520              : 
     521              : 
     522              :    assign ifu_bp_poffset_f[11:0] = btb_rd_tgt_f[11:0];
     523              : 
     524              :    assign adder_pc_in_f[31:2] = ( ({30{ use_fa_plus}} & fetch_addr_p1_f[31:2]) |
     525              :                                    ({30{ btb_fg_crossing_f}} & ifc_fetch_adder_prior[31:2]) |
     526              :                                    ({30{~btb_fg_crossing_f & ~use_fa_plus}} & ifc_fetch_addr_f[31:2]));
     527              : 
     528              :    rvbradder predtgt_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
     529              :                          .offset(btb_rd_tgt_f[11:0]),
     530              :                          .dout(bp_btb_target_adder_f[31:1])
     531              :                          );
     532              :    // mux in the return stack address here for a predicted return assuming the RS is valid, quite if no prediction
     533              :    assign ifu_bp_btb_target_f[31:1] = (({31{btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0] & ifu_bp_hit_taken_f}} & rets_out[0][31:1]) |
     534              :                                        ({31{~(btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0]) & ifu_bp_hit_taken_f}} & bp_btb_target_adder_f[31:1]) );
     535              : 
     536              : 
     537              :    // ----------------------------------------------------------------------
     538              :    // Return Stack
     539              :    // ----------------------------------------------------------------------
     540              : 
     541              :    rvbradder rs_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
     542              :                     .offset({11'b0,  ~btb_rd_pc4_f}),
     543              :                     .dout(bp_rs_call_target_f[31:1])
     544              :                          );
     545              : 
     546              :    assign rs_push = (btb_rd_call_f & ~btb_rd_ret_f & ifu_bp_hit_taken_f);
     547              :    assign rs_pop = (btb_rd_ret_f & ~btb_rd_call_f & ifu_bp_hit_taken_f);
     548              :    assign rs_hold = ~rs_push & ~rs_pop;
     549              : 
     550              : 
     551              : 
     552              :    // Fetch based (bit 0 is a valid)
     553              :    assign rets_in[0][31:0] = ( ({32{rs_push}} & {bp_rs_call_target_f[31:1], 1'b1}) | // target[31:1], valid
     554              :                                ({32{rs_pop}}  & rets_out[1][31:0]) );
     555              : 
     556              :    assign rsenable[0] = ~rs_hold;
     557              : 
     558              :    for (i=0; i<pt.RET_STACK_SIZE; i++) begin : retstack
     559              : 
     560              :       // for the last entry in the stack, we don't have a pop position
     561              :       if(i==pt.RET_STACK_SIZE-1) begin
     562              :          assign rets_in[i][31:0] = rets_out[i-1][31:0];
     563              :          assign rsenable[i] = rs_push;
     564              :       end
     565              :       else if(i>0) begin
     566              :         assign rets_in[i][31:0] = ( ({32{rs_push}} & rets_out[i-1][31:0]) |
     567              :                                     ({32{rs_pop}}  & rets_out[i+1][31:0]) );
     568              :          assign rsenable[i] = rs_push | rs_pop;
     569              :       end
     570              :       rvdffe #(32) rets_ff (.*, .en(rsenable[i]), .din(rets_in[i][31:0]), .dout(rets_out[i][31:0]));
     571              : 
     572              :    end : retstack
     573              : 
     574              :    // ----------------------------------------------------------------------
     575              :    // WRITE
     576              :    // ----------------------------------------------------------------------
     577              : 
     578              : 
     579              :    assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb;
     580              : 
     581              :    assign btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     582              : 
     583              :    assign dec_tlu_way_wb = dec_tlu_br0_way_wb;
     584              : 
     585              :    assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb;
     586              : 
     587              :    assign btb_wr_tag[pt.BTB_BTAG_SIZE-1:0] = exu_mp_btag[pt.BTB_BTAG_SIZE-1:0];
     588              : 
     589              :    if(!pt.BTB_FULLYA) begin
     590              : 
     591              :       if(pt.BTB_BTAG_FOLD) begin : btbfold
     592              :          el2_btb_tag_hash_fold #(.pt(pt)) rdtagf  (.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
     593              :                                                     .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
     594              :          el2_btb_tag_hash_fold #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
     595              :                                                     .pc({fetch_addr_p1_f[ pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
     596              :       end
     597              :       else begin : btbfold
     598              :          el2_btb_tag_hash #(.pt(pt)) rdtagf(.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
     599              :                                              .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
     600              :          el2_btb_tag_hash #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
     601              :                                                .pc({fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
     602              :       end
     603              : 
     604              :       assign btb_wr_en_way0 = ( ({{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
     605              :                                 ({{~dec_tlu_way_wb & dec_tlu_error_wb}}));
     606              : 
     607              :       assign btb_wr_en_way1 = ( ({{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
     608              :                                 ({{dec_tlu_way_wb & dec_tlu_error_wb}}));
     609              :       assign btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     610              : 
     611              : 
     612              :       assign vwayhit_f[1:0] = ( ({2{fetch_start_f[0]}} & {wayhit_f[1:0]}) |
     613              :                                 ({2{fetch_start_f[1]}} & {wayhit_p1_f[0], wayhit_f[1]})) & {eoc_mask, 1'b1};
     614              : 
     615              :    end // if (!pt.BTB_FULLYA)
     616              : 
     617              :    assign btb_wr_data[BTB_DWIDTH-1:0] = {btb_wr_tag[pt.BTB_BTAG_SIZE-1:0], exu_mp_tgt[pt.BTB_TOFFSET_SIZE-1:0], exu_mp_pc4, exu_mp_boffset,
     618              :                                                 exu_mp_call | exu_mp_ja, exu_mp_ret | exu_mp_ja, btb_valid} ;
     619              : 
     620              :    assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken & ~exu_mp_pkt.valid;
     621       263744 :    logic [1:0] bht_wr_data0, bht_wr_data2;
     622      1652695 :    logic [1:0] bht_wr_en0, bht_wr_en2;
     623              : 
     624              :    assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset;
     625              :    assign bht_wr_en0[1:0] = {2{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & {middle_of_bank, ~middle_of_bank};
     626              :    assign bht_wr_en2[1:0] = {2{dec_tlu_br0_v_wb}} & {dec_tlu_br0_middle_wb, ~dec_tlu_br0_middle_wb} ;
     627              : 
     628              :    // Experiments show this is the best priority scheme for same bank/index writes at the same time.
     629              :    assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority
     630              :    assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority
     631              : 
     632              : 
     633              : 
     634       151741 :    logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] bht_rd_addr_f, bht_rd_addr_p1_f, bht_wr_addr0, bht_wr_addr2;
     635              : 
     636       151741 :    logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] mp_hashed, br0_hashed_wb, bht_rd_addr_hashed_f, bht_rd_addr_hashed_p1_f;
     637              :    el2_btb_ghr_hash #(.pt(pt)) mpghrhs  (.hashin(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]), .hash(mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
     638              :    el2_btb_ghr_hash #(.pt(pt)) br0ghrhs (.hashin(dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0]), .hash(br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
     639              :    el2_btb_ghr_hash #(.pt(pt)) fghrhs (.hashin(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
     640              :    el2_btb_ghr_hash #(.pt(pt)) fghrhs_p1 (.hashin(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
     641              : 
     642              :    assign bht_wr_addr0[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
     643              :    assign bht_wr_addr2[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
     644              :    assign bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
     645              :    assign bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
     646              : 
     647              : 
     648              :    // ----------------------------------------------------------------------
     649              :    // Structures. Using FLOPS
     650              :    // ----------------------------------------------------------------------
     651              :    // BTB
     652              :    // Entry -> tag[pt.BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid
     653              : 
     654              :    if(!pt.BTB_FULLYA) begin
     655              : 
     656              :       for (j=0 ; j<LRU_SIZE ; j++) begin : BTB_FLOPS
     657              :          // Way 0
     658              :          rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way0 (.*,
     659              :                     .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way0)),
     660              :                     .din        (btb_wr_data[BTB_DWIDTH-1:0]),
     661              :                     .dout       (btb_bank0_rd_data_way0_out[j]));
     662              : 
     663              :          // Way 1
     664              :          rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way1 (.*,
     665              :                     .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way1)),
     666              :                     .din        (btb_wr_data[BTB_DWIDTH-1:0]),
     667              :                     .dout       (btb_bank0_rd_data_way1_out[j]));
     668              : 
     669              :       end
     670              : 
     671              : 
     672          339 :     always_comb begin : BTB_rd_mux
     673          339 :         btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] = '0 ;
     674          339 :         btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] = '0 ;
     675          339 :         btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] = '0 ;
     676          339 :         btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] = '0 ;
     677              : 
     678          339 :         for (int j=0; j< LRU_SIZE; j++) begin
     679     28265884 :           if (btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
     680              : 
     681     28265884 :            btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way0_out[j];
     682     28265884 :            btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way1_out[j];
     683              : 
     684              :           end
     685              :         end
     686          339 :         for (int j=0; j< LRU_SIZE; j++) begin
     687     28265884 :           if (btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
     688              : 
     689     28265884 :            btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way0_out[j];
     690     28265884 :            btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way1_out[j];
     691              : 
     692              :           end
     693              :         end
     694              :     end
     695              : end // if (!pt.BTB_FULLYA)
     696              : 
     697              : 
     698              : 
     699              : 
     700              : 
     701              :       if(pt.BTB_FULLYA) begin : fa
     702              : 
     703              :          logic found1, hit0, hit1;
     704              :          logic btb_used_reset, write_used;
     705              :          logic [$clog2(pt.BTB_SIZE)-1:0] btb_fa_wr_addr0, hit0_index, hit1_index;
     706              : 
     707              :          logic [pt.BTB_SIZE-1:0]         btb_tag_hit, btb_offset_0, btb_offset_1, btb_used_ns, btb_used,
     708              :                                          wr0_en, btb_upper_hit;
     709              :          logic [pt.BTB_SIZE-1:0][BTB_DWIDTH-1:0] btbdata;
     710              : 
     711              :          // Fully Associative tag hash uses bits 31:3. Bits 2:1 are the offset bits used for the 4 tag comp banks
     712              :          // Full tag used to speed up lookup. There is one 31:3 cmp per entry, and 4 2:1 cmps per entry.
     713              : 
     714              :          logic [FA_CMP_LOWER-1:1]  ifc_fetch_addr_p1_f;
     715              : 
     716              : 
     717              :          assign ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1] = ifc_fetch_addr_f[FA_CMP_LOWER-1:1] + 1'b1;
     718              : 
     719              :          assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == ifc_fetch_addr_f[31:1]) &
     720              :                                       exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
     721              :          assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == {ifc_fetch_addr_f[31:FA_CMP_LOWER], ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]}) &
     722              :                                       exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
     723              : 
     724              :       always_comb begin
     725              :          btb_vbank0_rd_data_f = '0;
     726              :          btb_vbank1_rd_data_f = '0;
     727              : //       btb_tag_hit = '0;
     728              : //       btb_upper_hit = '0;
     729              : //       btb_offset_0 = '0;
     730              : //       btb_offset_1 = '0;
     731              : 
     732              :          found1 = 1'b0;
     733              :          hit0 = 1'b0;
     734              :          hit1 = 1'b0;
     735              :          hit0_index = '0;
     736              :          hit1_index = '0;
     737              :          btb_fa_wr_addr0 = '0;
     738              : 
     739              :          for(int i=0; i<pt.BTB_SIZE; i++) begin
     740              :             logic upper_hit, offset_0, offset_1;
     741              : 
     742              :             // Break the cmp into chunks for lower area.
     743              :             // Chunk1: FA 31:6 or 31:5 depending on icache line size
     744              :             // Chunk2: FA 5:1 or 4:1 depending on icache line size
     745              : //          btb_upper_hit[i] = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
     746              : //          btb_offset_0[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
     747              : //          btb_offset_1[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
     748              : 
     749              :             upper_hit = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
     750              :             offset_0 = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & upper_hit;
     751              :             offset_1 = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & upper_hit;
     752              : 
     753              :             if(~hit0) begin
     754              :                if(offset_0) begin
     755              :                   hit0_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
     756              :                   // hit unless we are also writing this entry at the same time
     757              :                   hit0 = 1'b1;
     758              :                end
     759              :             end
     760              :             if(~hit1) begin
     761              :                if(offset_1) begin
     762              :                   hit1_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
     763              :                   hit1 = 1'b1;
     764              :                end
     765              :             end
     766              : 
     767              : 
     768              :             // Mux out the 2 potential branches
     769              :             if(offset_0)
     770              :               btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_f ? btb_wr_data : btbdata[i];
     771              :             if(offset_1)
     772              :               btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_p1_f ? btb_wr_data : btbdata[i];
     773              : 
     774              :             // find the first zero from bit zero in the used vector, this is the write address
     775              :             if(~found1 & ((exu_mp_valid_write & ~exu_mp_pkt.way) | dec_tlu_error_wb)) begin
     776              :                if(~btb_used[i]) begin
     777              :                   btb_fa_wr_addr0[BTB_FA_INDEX:0] = i[BTB_FA_INDEX:0];
     778              :                   found1 = 1'b1;
     779              :                end
     780              :             end
     781              :          end
     782              :       end // always_comb begin
     783              : 
     784              : //`ifdef RV_ASSERT_ON
     785              : //   btbhitonehot0: assert #0 ($onehot0(btb_offset_0));
     786              : //   btbhitonehot1: assert #0 ($onehot0(btb_offset_1));
     787              : //`endif
     788              : 
     789              :    assign vwayhit_f[1:0] = {hit1, hit0} & {eoc_mask, 1'b1};
     790              : 
     791              :    // way bit is reused as the predicted bit
     792              :    assign way_raw[1:0] =  vwayhit_f[1:0] | {fetch_mp_collision_p1_f, fetch_mp_collision_f};
     793              : 
     794              :    for (j=0 ; j<pt.BTB_SIZE ; j++) begin : BTB_FAFLOPS
     795              : 
     796              :       assign wr0_en[j] = ((btb_fa_wr_addr0[BTB_FA_INDEX:0] == j) & (exu_mp_valid_write & ~exu_mp_pkt.way)) |
     797              :                          ((dec_fa_error_index == j) & dec_tlu_error_wb);
     798              : 
     799              :       rvdffe #(BTB_DWIDTH) btb_fa (.*, .clk(clk),
     800              :                                    .en  (wr0_en[j]),
     801              :                                    .din (btb_wr_data[BTB_DWIDTH-1:0]),
     802              :                                    .dout(btbdata[j]));
     803              :    end // block: BTB_FAFLOPS
     804              : 
     805              :    assign ifu_bp_fa_index_f[1] = hit1 ? hit1_index : '0;
     806              :    assign ifu_bp_fa_index_f[0] = hit0 ? hit0_index : '0;
     807              : 
     808              :    assign btb_used_reset = &btb_used[pt.BTB_SIZE-1:0];
     809              :    assign btb_used_ns[pt.BTB_SIZE-1:0] = ({pt.BTB_SIZE{vwayhit_f[1]}} & (32'b1 << hit1_index[BTB_FA_INDEX:0])) |
     810              :                                          ({pt.BTB_SIZE{vwayhit_f[0]}} & (32'b1 << hit0_index[BTB_FA_INDEX:0])) |
     811              :                                          ({pt.BTB_SIZE{exu_mp_valid_write & ~exu_mp_pkt.way & ~dec_tlu_error_wb}} & (32'b1 << btb_fa_wr_addr0[BTB_FA_INDEX:0])) |
     812              :                                          ({pt.BTB_SIZE{btb_used_reset}} & {pt.BTB_SIZE{1'b0}}) |
     813              :                                          ({pt.BTB_SIZE{~btb_used_reset & dec_tlu_error_wb}} & (btb_used[pt.BTB_SIZE-1:0] & ~(32'b1 << dec_fa_error_index[BTB_FA_INDEX:0]))) |
     814              :                                          (~{pt.BTB_SIZE{btb_used_reset | dec_tlu_error_wb}} & btb_used[pt.BTB_SIZE-1:0]);
     815              : 
     816              :    assign write_used = btb_used_reset | ifu_bp_hit_taken_f | exu_mp_valid_write | dec_tlu_error_wb;
     817              : 
     818              : 
     819              :    rvdffe #(pt.BTB_SIZE) btb_usedf (.*, .clk(clk),
     820              :                     .en  (write_used),
     821              :                     .din (btb_used_ns[pt.BTB_SIZE-1:0]),
     822              :                     .dout(btb_used[pt.BTB_SIZE-1:0]));
     823              : 
     824              : end // block: fa
     825              : 
     826              : 
     827              :    //-----------------------------------------------------------------------------
     828              :    // BHT
     829              :    // 2 bit Entry -> direction, strength
     830              :    //
     831              :    //-----------------------------------------------------------------------------
     832              : 
     833              : //   logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0]      bht_bank_wr_data ;
     834              :    logic [1:0] [pt.BHT_ARRAY_DEPTH-1:0] [1:0]                bht_bank_rd_data_out ;
     835        11506 :    logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0]                 bht_bank_clken ;
     836            0 :    logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0]                 bht_bank_clk   ;
     837              : //   logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0]           bht_bank_sel   ;
     838              : 
     839              :    for ( i=0; i<2; i++) begin : BANKS
     840              :      wire[pt.BHT_ARRAY_DEPTH-1:0] wr0, wr1;
     841              :      assign wr0 = pt.BHT_ARRAY_DEPTH'(bht_wr_en0[i] << bht_wr_addr0);
     842              :      assign wr1 = pt.BHT_ARRAY_DEPTH'(bht_wr_en2[i] << bht_wr_addr2);
     843              :      for (genvar k=0 ; k < (pt.BHT_ARRAY_DEPTH)/NUM_BHT_LOOP ; k++) begin : BHT_CLK_GROUP
     844              :      assign bht_bank_clken[i][k]  = (bht_wr_en0[i] & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) |  BHT_NO_ADDR_MATCH)) |
     845              :                                     (bht_wr_en2[i] & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) |  BHT_NO_ADDR_MATCH));
     846              : `ifndef RV_FPGA_OPTIMIZE
     847              :      rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); // ifndef RV_FPGA_OPTIMIZE
     848              : `endif
     849              : 
     850              :      for (j=0 ; j<NUM_BHT_LOOP ; j++) begin : BHT_FLOPS
     851              :      wire[1:0] wdata;
     852              :      wire  bank_sel = wr1[NUM_BHT_LOOP*k+j] | wr0[NUM_BHT_LOOP*k+j];
     853              : 
     854              : //       assign   bht_bank_sel[i][k][j]    = (bht_wr_en0[i] & (bht_wr_addr0[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) |
     855              : //                                           (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ;
     856              : 
     857              : //       assign bht_bank_wr_data[i][k][j]  = (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI:pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ? bht_wr_data2[1:0] :
     858              : //                                                                                                                      bht_wr_data0[1:0]   ;
     859              :        assign wdata  = wr1[NUM_BHT_LOOP*k+j] ? bht_wr_data2[1:0] :bht_wr_data0;
     860              : 
     861              : 
     862              : 
     863              :           rvdffs_fpga #(2) bht_bank (.*,
     864              :                     .clk        (bht_bank_clk[i][k]),
     865              :                     .en         (bank_sel),
     866              :                     .rawclk     (clk),
     867              :                     .clken      (bank_sel),
     868              :                     .din        (wdata),
     869              :                     .dout       (bht_bank_rd_data_out[i][(16*k)+j]));
     870              : 
     871              :       end // block: BHT_FLOPS
     872              :    end // block: BHT_CLK_GROUP
     873              :  end // block: BANKS
     874              : 
     875          339 :     always_comb begin : BHT_rd_mux
     876          339 :      bht_bank0_rd_data_f[1:0] = '0 ;
     877          339 :      bht_bank1_rd_data_f[1:0] = '0 ;
     878          339 :      bht_bank0_rd_data_p1_f[1:0] = '0 ;
     879          339 :      for (int j=0; j< pt.BHT_ARRAY_DEPTH; j++) begin
     880     28265884 :        if (bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
     881     28265884 :          bht_bank0_rd_data_f[1:0] = bht_bank_rd_data_out[0][j];
     882     28265884 :          bht_bank1_rd_data_f[1:0] = bht_bank_rd_data_out[1][j];
     883              :        end
     884     28265884 :        if (bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
     885     28265884 :          bht_bank0_rd_data_p1_f[1:0] = bht_bank_rd_data_out[0][j];
     886              :        end
     887              :       end
     888              :     end // block: BHT_rd_mux
     889              : 
     890              : 
     891          339 : function [1:0] countones;
     892              :       input [1:0] valid;
     893              : 
     894          339 :       begin
     895              : 
     896          339 : countones[1:0] = {1'b0, valid[1]} +
     897          339 :                  {1'b0, valid[0]};
     898              :       end
     899              :    endfunction
     900              : endmodule // el2_ifu_bp_ctl
     901              :