Project Full coverage report
Current view: Cores-VeeR-EL2—Cores-VeeR-EL2—design—ifu—el2_ifu_bp_ctl.sv Coverage Hit Total
Test Date: 08-11-2024 Toggle 93.6% 103 110
Test: all Branch 100.0% 27 27

            Line data    Source code
       1              : //********************************************************************************
       2              : // SPDX-License-Identifier: Apache-2.0
       3              : // Copyright 2020 Western Digital Corporation or its affiliates.
       4              : //
       5              : // Licensed under the Apache License, Version 2.0 (the "License");
       6              : // you may not use this file except in compliance with the License.
       7              : // You may obtain a copy of the License at
       8              : //
       9              : // http://www.apache.org/licenses/LICENSE-2.0
      10              : //
      11              : // Unless required by applicable law or agreed to in writing, software
      12              : // distributed under the License is distributed on an "AS IS" BASIS,
      13              : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      14              : // See the License for the specific language governing permissions and
      15              : // limitations under the License.
      16              : //********************************************************************************
      17              : 
      18              : //********************************************************************************
      19              : // Function: Branch predictor
      20              : // Comments:
      21              : //
      22              : //
      23              : //  Bank3 : Bank2 : Bank1 : Bank0
      24              : //  FA  C       8       4       0
      25              : //********************************************************************************
      26              : 
      27              : module el2_ifu_bp_ctl
      28              : import el2_pkg::*;
      29              : #(
      30              : `include "el2_param.vh"
      31              :  )
      32              :   (
      33              : 
      34     69890155 :    input logic clk,
      35          338 :    input logic rst_l,
      36              : 
      37      6789925 :    input logic ic_hit_f,      // Icache hit, enables F address capture
      38              : 
      39          444 :    input logic [31:1] ifc_fetch_addr_f, // look up btb address
      40      3717922 :    input logic ifc_fetch_req_f,  // F1 valid
      41              : 
      42       782419 :    input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // BP commit update packet, includes errors
      43       367244 :    input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
      44       187591 :    input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
      45              : 
      46            0 :    input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associative btb error index
      47              : 
      48        59238 :    input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F
      49            0 :    input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches
      50              : 
      51            0 :    input logic dec_tlu_bpred_disable, // disable all branch prediction
      52              : 
      53        34482 :    input el2_predict_pkt_t  exu_mp_pkt, // mispredict packet
      54              : 
      55       300192 :    input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr (for patching fghr)
      56       379120 :    input logic [pt.BHT_GHR_SIZE-1:0]  exu_mp_fghr,                    // Mispredict fghr
      57       196224 :    input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  exu_mp_index,         // Mispredict index
      58       115620 :    input logic [pt.BTB_BTAG_SIZE-1:0]  exu_mp_btag,                   // Mispredict btag
      59              : 
      60       674202 :    input logic exu_flush_final, // all flushes
      61              : 
      62      3171757 :    output logic ifu_bp_hit_taken_f, // btb hit, select target
      63       518791 :    output logic [31:1] ifu_bp_btb_target_f, //  predicted target PC
      64      2405225 :    output logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified
      65              : 
      66       376935 :    output logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch ghr
      67              : 
      68      2375527 :    output logic [1:0] ifu_bp_way_f, // way
      69        70466 :    output logic [1:0] ifu_bp_ret_f, // predicted ret
      70      2025808 :    output logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified
      71      1824638 :    output logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 0, right justified
      72       408833 :    output logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified
      73       957477 :    output logic [1:0] ifu_bp_valid_f, // branch valid, right justified
      74      2177331 :    output logic [11:0] ifu_bp_poffset_f, // predicted target
      75              : 
      76            0 :    output logic [1:0] [$clog2(pt.BTB_SIZE)-1:0]    ifu_bp_fa_index_f, // predicted branch index (fully associative option)
      77              : 
      78            0 :    input  logic       scan_mode
      79              :    );
      80              : 
      81              : 
      82              :    localparam BTB_DWIDTH =  pt.BTB_TOFFSET_SIZE+pt.BTB_BTAG_SIZE+5;
      83              :    localparam BTB_DWIDTH_TOP =  int'(pt.BTB_TOFFSET_SIZE)+int'(pt.BTB_BTAG_SIZE)+4;
      84              :    localparam BTB_FA_INDEX = $clog2(pt.BTB_SIZE)-1;
      85              :    localparam FA_CMP_LOWER = $clog2(pt.ICACHE_LN_SZ);
      86              :    localparam FA_TAG_END_UPPER= 5+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER)-1; // must cast to int or vcs build fails
      87              :    localparam FA_TAG_START_LOWER = 3+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER);
      88              :    localparam FA_TAG_END_LOWER = 5+int'(pt.BTB_TOFFSET_SIZE);
      89              : 
      90              :    localparam TAG_START=BTB_DWIDTH-1;
      91              :    localparam PC4=4;
      92              :    localparam BOFF=3;
      93              :    localparam CALL=2;
      94              :    localparam RET=1;
      95              :    localparam BV=0;
      96              : 
      97              :    localparam LRU_SIZE=pt.BTB_ARRAY_DEPTH;
      98              :    localparam NUM_BHT_LOOP = (pt.BHT_ARRAY_DEPTH > 16 ) ? 16 : pt.BHT_ARRAY_DEPTH;
      99              :    localparam NUM_BHT_LOOP_INNER_HI =  (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+3 : pt.BHT_ADDR_HI;
     100              :    localparam NUM_BHT_LOOP_OUTER_LO =  (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+4 : pt.BHT_ADDR_LO;
     101              :    localparam BHT_NO_ADDR_MATCH  = ( pt.BHT_ARRAY_DEPTH <= 16 );
     102              : 
     103              : 
     104       200610 :    logic exu_mp_valid_write;
     105       523560 :    logic exu_mp_ataken;
     106       508970 :    logic exu_mp_valid; // conditional branch mispredict
     107       230336 :    logic exu_mp_boffset; // branch offsett
     108       273420 :    logic exu_mp_pc4; // branch is a 4B inst
     109        47310 :    logic exu_mp_call; // branch is a call inst
     110       149094 :    logic exu_mp_ret; // branch is a ret inst
     111        80332 :    logic exu_mp_ja; // branch is a jump always
     112       283388 :    logic [1:0] exu_mp_hist; // new history
     113        90330 :    logic [11:0] exu_mp_tgt; // target offset
     114       196224 :    logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
     115      2914516 :    logic                                   dec_tlu_br0_v_wb; // WB stage history update
     116      2722695 :    logic [1:0]                             dec_tlu_br0_hist_wb; // new history
     117       187591 :    logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr
     118        28846 :    logic                                   dec_tlu_br0_error_wb; // error; invalidate bank
     119         9608 :    logic                                   dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg
     120       367244 :    logic [pt.BHT_GHR_SIZE-1:0]             exu_i0_br_fghr_wb;
     121              : 
     122         5008 :    logic use_mp_way, use_mp_way_p1;
     123          128 :    logic [pt.RET_STACK_SIZE-1:0][31:0] rets_out, rets_in;
     124       230738 :    logic [pt.RET_STACK_SIZE-1:0]        rsenable;
     125              : 
     126              : 
     127      2177331 :    logic [11:0]       btb_rd_tgt_f;
     128       498218 :    logic              btb_rd_pc4_f, btb_rd_call_f, btb_rd_ret_f;
     129      1444556 :    logic [1:1]        bp_total_branch_offset_f;
     130              : 
     131          446 :    logic [31:1]       bp_btb_target_adder_f;
     132          446 :    logic [31:1]       bp_rs_call_target_f;
     133       203102 :    logic              rs_push, rs_pop, rs_hold;
     134       126343 :    logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_p1_f, btb_wr_addr, btb_rd_addr_f;
     135        12203 :    logic [pt.BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f, fetch_rd_tag_p1_f;
     136        56058 :    logic [BTB_DWIDTH-1:0]        btb_wr_data;
     137       101184 :    logic               btb_wr_en_way0, btb_wr_en_way1;
     138              : 
     139              : 
     140       261365 :    logic               dec_tlu_error_wb, btb_valid, dec_tlu_br0_middle_wb;
     141       187591 :    logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]        btb_error_addr_wb;
     142         2468 :    logic branch_error_collision_f, fetch_mp_collision_f, branch_error_collision_p1_f, fetch_mp_collision_p1_f;
     143              : 
     144         1040 :    logic  branch_error_bank_conflict_f;
     145       373123 :    logic [pt.BHT_GHR_SIZE-1:0] merged_ghr, fghr_ns, fghr;
     146        74755 :    logic [1:0] num_valids;
     147          232 :    logic [LRU_SIZE-1:0] btb_lru_b0_f, btb_lru_b0_hold, btb_lru_b0_ns,
     148        15530 :                         fetch_wrindex_dec, fetch_wrindex_p1_dec, fetch_wrlru_b0, fetch_wrlru_p1_b0,
     149          366 :                         mp_wrindex_dec, mp_wrlru_b0;
     150      1643851 :    logic                btb_lru_rd_f, btb_lru_rd_p1_f, lru_update_valid_f;
     151       539542 :    logic  tag_match_way0_f, tag_match_way1_f;
     152       755742 :    logic [1:0] way_raw, bht_dir_f, btb_sel_f, wayhit_f, vwayhit_f, wayhit_p1_f;
     153       361416 :    logic [1:0] bht_valid_f, bht_force_taken_f;
     154              : 
     155            0 :    logic leak_one_f, leak_one_f_d1;
     156              : 
     157              :    logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0]  btb_bank0_rd_data_way0_out ;
     158              : 
     159              :    logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0]  btb_bank0_rd_data_way1_out ;
     160              : 
     161      2668982 :    logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_f ;
     162       461797 :    logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_f ;
     163              : 
     164      1085126 :    logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_p1_f ;
     165       459058 :    logic                [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_p1_f ;
     166              : 
     167       278385 :    logic                [BTB_DWIDTH-1:0] btb_vbank0_rd_data_f, btb_vbank1_rd_data_f;
     168              : 
     169      3171757 :    logic                                         final_h;
     170       183506 :    logic                                         btb_fg_crossing_f;
     171       284964 :    logic                                         middle_of_bank;
     172              : 
     173              : 
     174      1782553 :    logic [1:0]                                   bht_vbank0_rd_data_f, bht_vbank1_rd_data_f;
     175          982 :    logic                                         branch_error_bank_conflict_p1_f;
     176       595704 :    logic                                         tag_match_way0_p1_f, tag_match_way1_p1_f;
     177              : 
     178       162873 :    logic [1:0]                                   btb_vlru_rd_f, fetch_start_f, tag_match_vway1_expanded_f, tag_match_way0_expanded_p1_f, tag_match_way1_expanded_p1_f;
     179          446 :    logic [31:2] fetch_addr_p1_f;
     180              : 
     181              : 
     182       204706 :    logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_way_wb;
     183       179784 :    logic                [BTB_DWIDTH-1:0] btb_bank0e_rd_data_f, btb_bank0e_rd_data_p1_f;
     184              : 
     185      1809009 :    logic                [BTB_DWIDTH-1:0] btb_bank0o_rd_data_f;
     186              : 
     187       196713 :    logic [1:0] tag_match_way0_expanded_f, tag_match_way1_expanded_f;
     188              : 
     189              : 
     190      1711067 :     logic [1:0]                                  bht_bank0_rd_data_f;
     191      1959832 :     logic [1:0]                                  bht_bank1_rd_data_f;
     192      1804447 :     logic [1:0]                                  bht_bank0_rd_data_p1_f;
     193              :    genvar                                        j, i;
     194              : 
     195              :    assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f; // conditional branch mispredict
     196              :    assign exu_mp_boffset = exu_mp_pkt.boffset;  // branch offset
     197              :    assign exu_mp_pc4 = exu_mp_pkt.pc4;  // branch is a 4B inst
     198              :    assign exu_mp_call = exu_mp_pkt.pcall;  // branch is a call inst
     199              :    assign exu_mp_ret = exu_mp_pkt.pret;  // branch is a ret inst
     200              :    assign exu_mp_ja = exu_mp_pkt.pja;  // branch is a jump always
     201              :    assign exu_mp_way = exu_mp_pkt.way;  // repl way
     202              :    assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0];  // new history
     203              :    assign exu_mp_tgt[11:0]  = exu_mp_pkt.toffset[11:0] ;  // target offset
     204              :    assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]  = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ;  // BTB/BHT address
     205              :    assign exu_mp_ataken = exu_mp_pkt.ataken;
     206              : 
     207              : 
     208              :    assign dec_tlu_br0_v_wb = dec_tlu_br0_r_pkt.valid;
     209              :    assign dec_tlu_br0_hist_wb[1:0]  = dec_tlu_br0_r_pkt.hist[1:0];
     210              :    assign dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     211              :    assign dec_tlu_br0_error_wb = dec_tlu_br0_r_pkt.br_error;
     212              :    assign dec_tlu_br0_middle_wb = dec_tlu_br0_r_pkt.middle;
     213              :    assign dec_tlu_br0_way_wb = dec_tlu_br0_r_pkt.way;
     214              :    assign dec_tlu_br0_start_error_wb = dec_tlu_br0_r_pkt.br_start_error;
     215              :    assign exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0] = exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0];
     216              : 
     217              : 
     218              : 
     219              : 
     220              :    // ----------------------------------------------------------------------
     221              :    // READ
     222              :    // ----------------------------------------------------------------------
     223              : 
     224              :    // hash the incoming fetch PC, first guess at hashing algorithm
     225              :    el2_btb_addr_hash #(.pt(pt)) f1hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
     226              : 
     227              : 
     228              :    assign fetch_addr_p1_f[31:2] = ifc_fetch_addr_f[31:2] + 30'b1;
     229              :    el2_btb_addr_hash #(.pt(pt)) f1hash_p1(.pc(fetch_addr_p1_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
     230              : 
     231              :    assign btb_sel_f[1] = ~bht_dir_f[0];
     232              :    assign btb_sel_f[0] =  bht_dir_f[0];
     233              : 
     234              :    assign fetch_start_f[1:0] = {ifc_fetch_addr_f[1], ~ifc_fetch_addr_f[1]};
     235              : 
     236              :    // Errors colliding with fetches must kill the btb/bht hit.
     237              : 
     238              :    assign branch_error_collision_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
     239              :    assign branch_error_collision_p1_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
     240              : 
     241              :    assign branch_error_bank_conflict_f = branch_error_collision_f & dec_tlu_error_wb;
     242              :    assign branch_error_bank_conflict_p1_f = branch_error_collision_p1_f & dec_tlu_error_wb;
     243              : 
     244              :    // set on leak one, hold until next flush without leak one
     245              :    assign leak_one_f = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f_d1 & ~dec_tlu_flush_lower_wb);
     246              : 
     247       674202 : logic exu_flush_final_d1;
     248              : 
     249              :  if(!pt.BTB_FULLYA) begin : genblock1
     250              :    assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
     251              :                                     exu_mp_valid & ifc_fetch_req_f &
     252              :                                     (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
     253              :                                     );
     254              :    assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
     255              :                                        exu_mp_valid & ifc_fetch_req_f &
     256              :                                        (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
     257              :                                        );
     258              :    // 2 -way SA, figure out the way hit and mux accordingly
     259              :    assign tag_match_way0_f = btb_bank0_rd_data_way0_f[BV] & (btb_bank0_rd_data_way0_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
     260              :                               ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
     261              : 
     262              :    assign tag_match_way1_f = btb_bank0_rd_data_way1_f[BV] & (btb_bank0_rd_data_way1_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
     263              :                               ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
     264              : 
     265              :    assign tag_match_way0_p1_f = btb_bank0_rd_data_way0_p1_f[BV] & (btb_bank0_rd_data_way0_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
     266              :                               ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
     267              : 
     268              :    assign tag_match_way1_p1_f = btb_bank0_rd_data_way1_p1_f[BV] & (btb_bank0_rd_data_way1_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
     269              :                               ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
     270              : 
     271              : 
     272              :    // Both ways could hit, use the offset bit to reorder
     273              : 
     274              :    assign tag_match_way0_expanded_f[1:0] = {tag_match_way0_f &  (btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]),
     275              :                                              tag_match_way0_f & ~(btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4])};
     276              : 
     277              :    assign tag_match_way1_expanded_f[1:0] = {tag_match_way1_f &  (btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]),
     278              :                                              tag_match_way1_f & ~(btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4])};
     279              : 
     280              :    assign tag_match_way0_expanded_p1_f[1:0] = {tag_match_way0_p1_f &  (btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]),
     281              :                                                 tag_match_way0_p1_f & ~(btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4])};
     282              : 
     283              :    assign tag_match_way1_expanded_p1_f[1:0] = {tag_match_way1_p1_f &  (btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]),
     284              :                                                 tag_match_way1_p1_f & ~(btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4])};
     285              : 
     286              :    assign wayhit_f[1:0] = tag_match_way0_expanded_f[1:0] | tag_match_way1_expanded_f[1:0];
     287              :    assign wayhit_p1_f[1:0] = tag_match_way0_expanded_p1_f[1:0] | tag_match_way1_expanded_p1_f[1:0];
     288              : 
     289              :    assign btb_bank0o_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[1]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
     290              :                                                             ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[1]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
     291              :    assign btb_bank0e_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[0]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
     292              :                                                             ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[0]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
     293              : 
     294              :    assign btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_p1_f[0]}} & btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0]) |
     295              :                                                                ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_p1_f[0]}} & btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0]) );
     296              : 
     297              :    // virtual bank order
     298              : 
     299              :    assign btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} &  btb_bank0e_rd_data_f[BTB_DWIDTH-1:0]) |
     300              :                                                             ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} &  btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) );
     301              :    assign btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} &  btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) |
     302              :                                                             ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} &  btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0]) );
     303              : 
     304              :    assign way_raw[1:0] =  tag_match_vway1_expanded_f[1:0] | (~vwayhit_f[1:0] & btb_vlru_rd_f[1:0]);
     305              : 
     306              :    // --------------------------------------------------------------------------------
     307              :    // --------------------------------------------------------------------------------
     308              :    // update lru
     309              :    // mp
     310              : 
     311              :    // create a onehot lru write vector
     312              :    assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     313              : 
     314              :    // fetch
     315              :    assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     316              :    assign fetch_wrindex_p1_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} <<  btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     317              : 
     318              :    assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{exu_mp_valid}};
     319              : 
     320              : 
     321              :    assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0];
     322              : 
     323              :    // Forward the mp lru information to the fetch, avoids multiple way hits later
     324              :    assign use_mp_way = fetch_mp_collision_f;
     325              :    assign use_mp_way_p1 = fetch_mp_collision_p1_f;
     326              : 
     327              :    assign lru_update_valid_f = (vwayhit_f[0] | vwayhit_f[1]) & ifc_fetch_req_f & ~leak_one_f;
     328              : 
     329              : 
     330              :    assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] &
     331              :                                          {LRU_SIZE{lru_update_valid_f}};
     332              :    assign fetch_wrlru_p1_b0[LRU_SIZE-1:0] = fetch_wrindex_p1_dec[LRU_SIZE-1:0] &
     333              :                                          {LRU_SIZE{lru_update_valid_f}};
     334              : 
     335              :    assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) |
     336              :                                           (mp_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) |
     337              :                                           (fetch_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f}}) |
     338              :                                           (fetch_wrlru_p1_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_p1_f}}) );
     339              : 
     340              : 
     341              : 
     342              :    assign btb_lru_rd_f = use_mp_way ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
     343              : 
     344              :    assign btb_lru_rd_p1_f = use_mp_way_p1 ? exu_mp_way_f : |(fetch_wrindex_p1_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
     345              : 
     346              :    // rotated
     347              :    assign btb_vlru_rd_f[1:0] = ( ({2{fetch_start_f[0]}} & {btb_lru_rd_f, btb_lru_rd_f}) |
     348              :                                   ({2{fetch_start_f[1]}} & {btb_lru_rd_p1_f, btb_lru_rd_f}));
     349              : 
     350              :    assign tag_match_vway1_expanded_f[1:0] = ( ({2{fetch_start_f[0]}} & {tag_match_way1_expanded_f[1:0]}) |
     351              :                                                ({2{fetch_start_f[1]}} & {tag_match_way1_expanded_p1_f[0], tag_match_way1_expanded_f[1]}) );
     352              : 
     353              : 
     354              :    rvdffe #(LRU_SIZE) btb_lru_ff (.*, .en(ifc_fetch_req_f | exu_mp_valid),
     355              :                                     .din(btb_lru_b0_ns[(LRU_SIZE)-1:0]),
     356              :                                    .dout(btb_lru_b0_f[(LRU_SIZE)-1:0]));
     357              : 
     358              :  end // if (!pt.BTB_FULLYA)
     359              :    // Detect end of cache line and mask as needed
     360       753630 :    logic eoc_near;
     361       201286 :    logic eoc_mask;
     362              :    assign eoc_near = &ifc_fetch_addr_f[pt.ICACHE_BEAT_ADDR_HI:3];
     363              :    assign eoc_mask = ~eoc_near| (|(~ifc_fetch_addr_f[2:1]));
     364              : 
     365              : 
     366              : 
     367              :    // --------------------------------------------------------------------------------
     368              :    // --------------------------------------------------------------------------------
     369              : 
     370              :    // mux out critical hit bank for pc computation
     371              :    // This is only useful for the first taken branch in the fetch group
     372      1992484 :    logic [16:1] btb_sel_data_f;
     373              : 
     374              :    assign btb_rd_tgt_f[11:0] = btb_sel_data_f[16:5];
     375              :    assign btb_rd_pc4_f       = btb_sel_data_f[4];
     376              :    assign btb_rd_call_f      = btb_sel_data_f[2];
     377              :    assign btb_rd_ret_f       = btb_sel_data_f[1];
     378              : 
     379              :    assign btb_sel_data_f[16:1] = ( ({16{btb_sel_f[1]}} & btb_vbank1_rd_data_f[16:1]) |
     380              :                                     ({16{btb_sel_f[0]}} & btb_vbank0_rd_data_f[16:1]) );
     381              : 
     382              : 
     383        70466 :    logic [1:0] hist0_raw, hist1_raw, pc4_raw, pret_raw;
     384              : 
     385              :    // a valid taken target needs to kill the next fetch as we compute the target address
     386              :    assign ifu_bp_hit_taken_f = |(vwayhit_f[1:0] & hist1_raw[1:0]) & ifc_fetch_req_f & ~leak_one_f_d1 & ~dec_tlu_bpred_disable;
     387              : 
     388              : 
     389              :    // Don't put calls/rets/ja in the predictor, force the bht taken instead
     390              :    assign bht_force_taken_f[1:0] = {(btb_vbank1_rd_data_f[CALL] | btb_vbank1_rd_data_f[RET]),
     391              :                                      (btb_vbank0_rd_data_f[CALL] | btb_vbank0_rd_data_f[RET])};
     392              : 
     393              : 
     394              :    // taken and valid, otherwise, branch errors must clear the bht
     395              :    assign bht_valid_f[1:0] = vwayhit_f[1:0];
     396              : 
     397              :    assign bht_vbank0_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank0_rd_data_f[1:0]) |
     398              :                                          ({2{fetch_start_f[1]}} & bht_bank1_rd_data_f[1:0]) );
     399              : 
     400              :    assign bht_vbank1_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank1_rd_data_f[1:0]) |
     401              :                                          ({2{fetch_start_f[1]}} & bht_bank0_rd_data_p1_f[1:0]) );
     402              : 
     403              : 
     404              :    assign bht_dir_f[1:0] = {(bht_force_taken_f[1] | bht_vbank1_rd_data_f[1]) & bht_valid_f[1],
     405              :                              (bht_force_taken_f[0] | bht_vbank0_rd_data_f[1]) & bht_valid_f[0]};
     406              : 
     407              :    assign ifu_bp_inst_mask_f = (ifu_bp_hit_taken_f & btb_sel_f[1]) | ~ifu_bp_hit_taken_f;
     408              : 
     409              : 
     410              : 
     411              : 
     412              :    // Branch prediction info is sent with the 2byte lane associated with the end of the branch.
     413              :    // Cases
     414              :    //       BANK1         BANK0
     415              :    // -------------------------------
     416              :    // |      :       |      :       |
     417              :    // -------------------------------
     418              :    //         <------------>                   : PC4 branch, offset, should be in B1 (indicated on [2])
     419              :    //                <------------>            : PC4 branch, no offset, indicate PC4, VALID, HIST on [1]
     420              :    //                       <------------>     : PC4 branch, offset, indicate PC4, VALID, HIST on [0]
     421              :    //                <------>                  : PC2 branch, offset, indicate VALID, HIST on [1]
     422              :    //                       <------>           : PC2 branch, no offset, indicate VALID, HIST on [0]
     423              :    //
     424              : 
     425              : 
     426              : 
     427              :    assign hist1_raw[1:0] = bht_force_taken_f[1:0] | {bht_vbank1_rd_data_f[1],
     428              :                                                       bht_vbank0_rd_data_f[1]};
     429              : 
     430              :    assign hist0_raw[1:0] = {bht_vbank1_rd_data_f[0],
     431              :                             bht_vbank0_rd_data_f[0]};
     432              : 
     433              : 
     434              :    assign pc4_raw[1:0] = {vwayhit_f[1] & btb_vbank1_rd_data_f[PC4],
     435              :                           vwayhit_f[0] & btb_vbank0_rd_data_f[PC4]};
     436              : 
     437              :    assign pret_raw[1:0] = {vwayhit_f[1] & ~btb_vbank1_rd_data_f[CALL] & btb_vbank1_rd_data_f[RET],
     438              :                            vwayhit_f[0] & ~btb_vbank0_rd_data_f[CALL] & btb_vbank0_rd_data_f[RET]};
     439              : 
     440              :    // GHR
     441              : 
     442              : 
     443              :   // count the valids with masking based on first taken
     444              :    assign num_valids[1:0] = countones(bht_valid_f[1:0]);
     445              : 
     446              :    // Note that the following property holds
     447              :    // P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0)
     448              :    // Num valid branches   What new GHR must be
     449              :    // 2                    0H
     450              :    // 1                    PH
     451              :    // 0                    PP
     452              : 
     453              :    assign final_h = |(btb_sel_f[1:0] & bht_dir_f[1:0]);
     454              : 
     455              :    assign merged_ghr[pt.BHT_GHR_SIZE-1:0] = (
     456              :                                             ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h2}} & {fghr[pt.BHT_GHR_SIZE-3:0], 1'b0, final_h}) | // 0H
     457              :                                             ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h1}} & {fghr[pt.BHT_GHR_SIZE-2:0], final_h}) | // PH
     458              :                                             ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h0}} & {fghr[pt.BHT_GHR_SIZE-1:0]}) ); // PP
     459              : 
     460       379120 :    logic [pt.BHT_GHR_SIZE-1:0] exu_flush_ghr;
     461              :    assign exu_flush_ghr[pt.BHT_GHR_SIZE-1:0] = exu_mp_fghr[pt.BHT_GHR_SIZE-1:0];
     462              : 
     463              :    assign fghr_ns[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{exu_flush_final_d1}} & exu_flush_ghr[pt.BHT_GHR_SIZE-1:0]) |
     464              :                                          ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1}} & merged_ghr[pt.BHT_GHR_SIZE-1:0]) |
     465              :                                          ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ~(ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1)}} & fghr[pt.BHT_GHR_SIZE-1:0]));
     466              : 
     467              :    rvdffie #(.WIDTH(pt.BHT_GHR_SIZE+3),.OVERRIDE(1)) fetchghr (.*,
     468              :                                           .din ({exu_flush_final, exu_mp_way, leak_one_f, fghr_ns[pt.BHT_GHR_SIZE-1:0]}),
     469              :                                           .dout({exu_flush_final_d1, exu_mp_way_f, leak_one_f_d1, fghr[pt.BHT_GHR_SIZE-1:0]}));
     470              : 
     471              :    assign ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] = fghr[pt.BHT_GHR_SIZE-1:0];
     472              : 
     473              : 
     474              :    assign ifu_bp_way_f[1:0] = way_raw[1:0];
     475              :    assign ifu_bp_hist1_f[1:0]    = hist1_raw[1:0];
     476              :    assign ifu_bp_hist0_f[1:0]    = hist0_raw[1:0];
     477              :    assign ifu_bp_pc4_f[1:0]     = pc4_raw[1:0];
     478              : 
     479              :    assign ifu_bp_valid_f[1:0]   = vwayhit_f[1:0] & ~{2{dec_tlu_bpred_disable}};
     480              :    assign ifu_bp_ret_f[1:0]     = pret_raw[1:0];
     481              : 
     482              : 
     483              :    // compute target
     484              :    // Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk
     485              : 
     486              : //  .i 5
     487              : //  .o 3
     488              : //  .ilb bht_dir_f[1] bht_dir_f[0] fetch_start_f[1] fetch_start_f[0] btb_rd_pc4_f
     489              : //  .ob bloc_f[1] bloc_f[0] use_fa_plus
     490              : //  .type fr
     491              : //
     492              : //
     493              : //  ## rotdir[1:0]  fs   pc4  off fapl
     494              : //    -1            01 -  01  0
     495              : //    10            01 -  10  0
     496              : //
     497              : //    -1            10 -  10  0
     498              : //    10            10 0  01  1
     499              : //    10            10 1  01  0
     500      2583795 : logic [1:0] bloc_f;
     501      2359714 : logic use_fa_plus;
     502              : assign bloc_f[1] = (bht_dir_f[0] & ~fetch_start_f[0]) | (~bht_dir_f[0]
     503              :      & fetch_start_f[0]);
     504              : assign bloc_f[0] = (bht_dir_f[0] & fetch_start_f[0]) | (~bht_dir_f[0]
     505              :      & ~fetch_start_f[0]);
     506              : assign use_fa_plus = (~bht_dir_f[0] & ~fetch_start_f[0] & ~btb_rd_pc4_f);
     507              : 
     508              : 
     509              : 
     510              : 
     511              :     assign btb_fg_crossing_f = fetch_start_f[0] & btb_sel_f[0] & btb_rd_pc4_f;
     512              : 
     513              :    assign bp_total_branch_offset_f =  bloc_f[1] ^ btb_rd_pc4_f;
     514              : 
     515          341 :    logic [31:2] adder_pc_in_f, ifc_fetch_adder_prior;
     516              :    rvdfflie #(.WIDTH(30), .LEFT(19)) faddrf_ff (.*, .en(ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f), .din(ifc_fetch_addr_f[31:2]), .dout(ifc_fetch_adder_prior[31:2]));
     517              : 
     518              : 
     519              :    assign ifu_bp_poffset_f[11:0] = btb_rd_tgt_f[11:0];
     520              : 
     521              :    assign adder_pc_in_f[31:2] = ( ({30{ use_fa_plus}} & fetch_addr_p1_f[31:2]) |
     522              :                                    ({30{ btb_fg_crossing_f}} & ifc_fetch_adder_prior[31:2]) |
     523              :                                    ({30{~btb_fg_crossing_f & ~use_fa_plus}} & ifc_fetch_addr_f[31:2]));
     524              : 
     525              :    rvbradder predtgt_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
     526              :                          .offset(btb_rd_tgt_f[11:0]),
     527              :                          .dout(bp_btb_target_adder_f[31:1])
     528              :                          );
     529              :    // mux in the return stack address here for a predicted return assuming the RS is valid, quite if no prediction
     530              :    assign ifu_bp_btb_target_f[31:1] = (({31{btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0] & ifu_bp_hit_taken_f}} & rets_out[0][31:1]) |
     531              :                                        ({31{~(btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0]) & ifu_bp_hit_taken_f}} & bp_btb_target_adder_f[31:1]) );
     532              : 
     533              : 
     534              :    // ----------------------------------------------------------------------
     535              :    // Return Stack
     536              :    // ----------------------------------------------------------------------
     537              : 
     538              :    rvbradder rs_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
     539              :                     .offset({11'b0,  ~btb_rd_pc4_f}),
     540              :                     .dout(bp_rs_call_target_f[31:1])
     541              :                          );
     542              : 
     543              :    assign rs_push = (btb_rd_call_f & ~btb_rd_ret_f & ifu_bp_hit_taken_f);
     544              :    assign rs_pop = (btb_rd_ret_f & ~btb_rd_call_f & ifu_bp_hit_taken_f);
     545              :    assign rs_hold = ~rs_push & ~rs_pop;
     546              : 
     547              : 
     548              : 
     549              :    // Fetch based (bit 0 is a valid)
     550              :    assign rets_in[0][31:0] = ( ({32{rs_push}} & {bp_rs_call_target_f[31:1], 1'b1}) | // target[31:1], valid
     551              :                                ({32{rs_pop}}  & rets_out[1][31:0]) );
     552              : 
     553              :    assign rsenable[0] = ~rs_hold;
     554              : 
     555              :    for (i=0; i<pt.RET_STACK_SIZE; i++) begin : retstack
     556              : 
     557              :       // for the last entry in the stack, we don't have a pop position
     558              :       if(i==pt.RET_STACK_SIZE-1) begin
     559              :          assign rets_in[i][31:0] = rets_out[i-1][31:0];
     560              :          assign rsenable[i] = rs_push;
     561              :       end
     562              :       else if(i>0) begin
     563              :         assign rets_in[i][31:0] = ( ({32{rs_push}} & rets_out[i-1][31:0]) |
     564              :                                     ({32{rs_pop}}  & rets_out[i+1][31:0]) );
     565              :          assign rsenable[i] = rs_push | rs_pop;
     566              :       end
     567              :       rvdffe #(32) rets_ff (.*, .en(rsenable[i]), .din(rets_in[i][31:0]), .dout(rets_out[i][31:0]));
     568              : 
     569              :    end : retstack
     570              : 
     571              :    // ----------------------------------------------------------------------
     572              :    // WRITE
     573              :    // ----------------------------------------------------------------------
     574              : 
     575              : 
     576              :    assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb;
     577              : 
     578              :    assign btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     579              : 
     580              :    assign dec_tlu_way_wb = dec_tlu_br0_way_wb;
     581              : 
     582              :    assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb;
     583              : 
     584              :    assign btb_wr_tag[pt.BTB_BTAG_SIZE-1:0] = exu_mp_btag[pt.BTB_BTAG_SIZE-1:0];
     585              : 
     586              :    if(!pt.BTB_FULLYA) begin
     587              : 
     588              :       if(pt.BTB_BTAG_FOLD) begin : btbfold
     589              :          el2_btb_tag_hash_fold #(.pt(pt)) rdtagf  (.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
     590              :                                                     .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
     591              :          el2_btb_tag_hash_fold #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
     592              :                                                     .pc({fetch_addr_p1_f[ pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
     593              :       end
     594              :       else begin : btbfold
     595              :          el2_btb_tag_hash #(.pt(pt)) rdtagf(.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
     596              :                                              .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
     597              :          el2_btb_tag_hash #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
     598              :                                                .pc({fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
     599              :       end
     600              : 
     601              :       assign btb_wr_en_way0 = ( ({{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
     602              :                                 ({{~dec_tlu_way_wb & dec_tlu_error_wb}}));
     603              : 
     604              :       assign btb_wr_en_way1 = ( ({{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
     605              :                                 ({{dec_tlu_way_wb & dec_tlu_error_wb}}));
     606              :       assign btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
     607              : 
     608              : 
     609              :       assign vwayhit_f[1:0] = ( ({2{fetch_start_f[0]}} & {wayhit_f[1:0]}) |
     610              :                                 ({2{fetch_start_f[1]}} & {wayhit_p1_f[0], wayhit_f[1]})) & {eoc_mask, 1'b1};
     611              : 
     612              :    end // if (!pt.BTB_FULLYA)
     613              : 
     614              :    assign btb_wr_data[BTB_DWIDTH-1:0] = {btb_wr_tag[pt.BTB_BTAG_SIZE-1:0], exu_mp_tgt[pt.BTB_TOFFSET_SIZE-1:0], exu_mp_pc4, exu_mp_boffset,
     615              :                                                 exu_mp_call | exu_mp_ja, exu_mp_ret | exu_mp_ja, btb_valid} ;
     616              : 
     617              :    assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken & ~exu_mp_pkt.valid;
     618       263972 :    logic [1:0] bht_wr_data0, bht_wr_data2;
     619      1652136 :    logic [1:0] bht_wr_en0, bht_wr_en2;
     620              : 
     621              :    assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset;
     622              :    assign bht_wr_en0[1:0] = {2{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & {middle_of_bank, ~middle_of_bank};
     623              :    assign bht_wr_en2[1:0] = {2{dec_tlu_br0_v_wb}} & {dec_tlu_br0_middle_wb, ~dec_tlu_br0_middle_wb} ;
     624              : 
     625              :    // Experiments show this is the best priority scheme for same bank/index writes at the same time.
     626              :    assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority
     627              :    assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority
     628              : 
     629              : 
     630              : 
     631       151813 :    logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] bht_rd_addr_f, bht_rd_addr_p1_f, bht_wr_addr0, bht_wr_addr2;
     632              : 
     633       151813 :    logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] mp_hashed, br0_hashed_wb, bht_rd_addr_hashed_f, bht_rd_addr_hashed_p1_f;
     634              :    el2_btb_ghr_hash #(.pt(pt)) mpghrhs  (.hashin(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]), .hash(mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
     635              :    el2_btb_ghr_hash #(.pt(pt)) br0ghrhs (.hashin(dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0]), .hash(br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
     636              :    el2_btb_ghr_hash #(.pt(pt)) fghrhs (.hashin(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
     637              :    el2_btb_ghr_hash #(.pt(pt)) fghrhs_p1 (.hashin(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
     638              : 
     639              :    assign bht_wr_addr0[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
     640              :    assign bht_wr_addr2[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
     641              :    assign bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
     642              :    assign bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
     643              : 
     644              : 
     645              :    // ----------------------------------------------------------------------
     646              :    // Structures. Using FLOPS
     647              :    // ----------------------------------------------------------------------
     648              :    // BTB
     649              :    // Entry -> tag[pt.BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid
     650              : 
     651              :    if(!pt.BTB_FULLYA) begin
     652              : 
     653              :       for (j=0 ; j<LRU_SIZE ; j++) begin : BTB_FLOPS
     654              :          // Way 0
     655              :          rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way0 (.*,
     656              :                     .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way0)),
     657              :                     .din        (btb_wr_data[BTB_DWIDTH-1:0]),
     658              :                     .dout       (btb_bank0_rd_data_way0_out[j]));
     659              : 
     660              :          // Way 1
     661              :          rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way1 (.*,
     662              :                     .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way1)),
     663              :                     .din        (btb_wr_data[BTB_DWIDTH-1:0]),
     664              :                     .dout       (btb_bank0_rd_data_way1_out[j]));
     665              : 
     666              :       end
     667              : 
     668              : 
     669          339 :     always_comb begin : BTB_rd_mux
     670          339 :         btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] = '0 ;
     671          339 :         btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] = '0 ;
     672          339 :         btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] = '0 ;
     673          339 :         btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] = '0 ;
     674              : 
     675          339 :         for (int j=0; j< LRU_SIZE; j++) begin
     676     28151601 :           if (btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
     677              : 
     678     28151601 :            btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way0_out[j];
     679     28151601 :            btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way1_out[j];
     680              : 
     681              :           end
     682              :         end
     683          339 :         for (int j=0; j< LRU_SIZE; j++) begin
     684     28151601 :           if (btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
     685              : 
     686     28151601 :            btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way0_out[j];
     687     28151601 :            btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] =  btb_bank0_rd_data_way1_out[j];
     688              : 
     689              :           end
     690              :         end
     691              :     end
     692              : end // if (!pt.BTB_FULLYA)
     693              : 
     694              : 
     695              : 
     696              : 
     697              : 
     698              :       if(pt.BTB_FULLYA) begin : fa
     699              : 
     700              :          logic found1, hit0, hit1;
     701              :          logic btb_used_reset, write_used;
     702              :          logic [$clog2(pt.BTB_SIZE)-1:0] btb_fa_wr_addr0, hit0_index, hit1_index;
     703              : 
     704              :          logic [pt.BTB_SIZE-1:0]         btb_tag_hit, btb_offset_0, btb_offset_1, btb_used_ns, btb_used,
     705              :                                          wr0_en, btb_upper_hit;
     706              :          logic [pt.BTB_SIZE-1:0][BTB_DWIDTH-1:0] btbdata;
     707              : 
     708              :          // Fully Associative tag hash uses bits 31:3. Bits 2:1 are the offset bits used for the 4 tag comp banks
     709              :          // Full tag used to speed up lookup. There is one 31:3 cmp per entry, and 4 2:1 cmps per entry.
     710              : 
     711              :          logic [FA_CMP_LOWER-1:1]  ifc_fetch_addr_p1_f;
     712              : 
     713              : 
     714              :          assign ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1] = ifc_fetch_addr_f[FA_CMP_LOWER-1:1] + 1'b1;
     715              : 
     716              :          assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == ifc_fetch_addr_f[31:1]) &
     717              :                                       exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
     718              :          assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == {ifc_fetch_addr_f[31:FA_CMP_LOWER], ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]}) &
     719              :                                       exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
     720              : 
     721              :       always_comb begin
     722              :          btb_vbank0_rd_data_f = '0;
     723              :          btb_vbank1_rd_data_f = '0;
     724              : //       btb_tag_hit = '0;
     725              : //       btb_upper_hit = '0;
     726              : //       btb_offset_0 = '0;
     727              : //       btb_offset_1 = '0;
     728              : 
     729              :          found1 = 1'b0;
     730              :          hit0 = 1'b0;
     731              :          hit1 = 1'b0;
     732              :          hit0_index = '0;
     733              :          hit1_index = '0;
     734              :          btb_fa_wr_addr0 = '0;
     735              : 
     736              :          for(int i=0; i<pt.BTB_SIZE; i++) begin
     737              :             logic upper_hit, offset_0, offset_1;
     738              : 
     739              :             // Break the cmp into chunks for lower area.
     740              :             // Chunk1: FA 31:6 or 31:5 depending on icache line size
     741              :             // Chunk2: FA 5:1 or 4:1 depending on icache line size
     742              : //          btb_upper_hit[i] = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
     743              : //          btb_offset_0[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
     744              : //          btb_offset_1[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
     745              : 
     746              :             upper_hit = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
     747              :             offset_0 = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & upper_hit;
     748              :             offset_1 = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & upper_hit;
     749              : 
     750              :             if(~hit0) begin
     751              :                if(offset_0) begin
     752              :                   hit0_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
     753              :                   // hit unless we are also writing this entry at the same time
     754              :                   hit0 = 1'b1;
     755              :                end
     756              :             end
     757              :             if(~hit1) begin
     758              :                if(offset_1) begin
     759              :                   hit1_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
     760              :                   hit1 = 1'b1;
     761              :                end
     762              :             end
     763              : 
     764              : 
     765              :             // Mux out the 2 potential branches
     766              :             if(offset_0)
     767              :               btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_f ? btb_wr_data : btbdata[i];
     768              :             if(offset_1)
     769              :               btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_p1_f ? btb_wr_data : btbdata[i];
     770              : 
     771              :             // find the first zero from bit zero in the used vector, this is the write address
     772              :             if(~found1 & ((exu_mp_valid_write & ~exu_mp_pkt.way) | dec_tlu_error_wb)) begin
     773              :                if(~btb_used[i]) begin
     774              :                   btb_fa_wr_addr0[BTB_FA_INDEX:0] = i[BTB_FA_INDEX:0];
     775              :                   found1 = 1'b1;
     776              :                end
     777              :             end
     778              :          end
     779              :       end // always_comb begin
     780              : 
     781              : //`ifdef RV_ASSERT_ON
     782              : //   btbhitonehot0: assert #0 ($onehot0(btb_offset_0));
     783              : //   btbhitonehot1: assert #0 ($onehot0(btb_offset_1));
     784              : //`endif
     785              : 
     786              :    assign vwayhit_f[1:0] = {hit1, hit0} & {eoc_mask, 1'b1};
     787              : 
     788              :    // way bit is reused as the predicted bit
     789              :    assign way_raw[1:0] =  vwayhit_f[1:0] | {fetch_mp_collision_p1_f, fetch_mp_collision_f};
     790              : 
     791              :    for (j=0 ; j<pt.BTB_SIZE ; j++) begin : BTB_FAFLOPS
     792              : 
     793              :       assign wr0_en[j] = ((btb_fa_wr_addr0[BTB_FA_INDEX:0] == j) & (exu_mp_valid_write & ~exu_mp_pkt.way)) |
     794              :                          ((dec_fa_error_index == j) & dec_tlu_error_wb);
     795              : 
     796              :       rvdffe #(BTB_DWIDTH) btb_fa (.*, .clk(clk),
     797              :                                    .en  (wr0_en[j]),
     798              :                                    .din (btb_wr_data[BTB_DWIDTH-1:0]),
     799              :                                    .dout(btbdata[j]));
     800              :    end // block: BTB_FAFLOPS
     801              : 
     802              :    assign ifu_bp_fa_index_f[1] = hit1 ? hit1_index : '0;
     803              :    assign ifu_bp_fa_index_f[0] = hit0 ? hit0_index : '0;
     804              : 
     805              :    assign btb_used_reset = &btb_used[pt.BTB_SIZE-1:0];
     806              :    assign btb_used_ns[pt.BTB_SIZE-1:0] = ({pt.BTB_SIZE{vwayhit_f[1]}} & (32'b1 << hit1_index[BTB_FA_INDEX:0])) |
     807              :                                          ({pt.BTB_SIZE{vwayhit_f[0]}} & (32'b1 << hit0_index[BTB_FA_INDEX:0])) |
     808              :                                          ({pt.BTB_SIZE{exu_mp_valid_write & ~exu_mp_pkt.way & ~dec_tlu_error_wb}} & (32'b1 << btb_fa_wr_addr0[BTB_FA_INDEX:0])) |
     809              :                                          ({pt.BTB_SIZE{btb_used_reset}} & {pt.BTB_SIZE{1'b0}}) |
     810              :                                          ({pt.BTB_SIZE{~btb_used_reset & dec_tlu_error_wb}} & (btb_used[pt.BTB_SIZE-1:0] & ~(32'b1 << dec_fa_error_index[BTB_FA_INDEX:0]))) |
     811              :                                          (~{pt.BTB_SIZE{btb_used_reset | dec_tlu_error_wb}} & btb_used[pt.BTB_SIZE-1:0]);
     812              : 
     813              :    assign write_used = btb_used_reset | ifu_bp_hit_taken_f | exu_mp_valid_write | dec_tlu_error_wb;
     814              : 
     815              : 
     816              :    rvdffe #(pt.BTB_SIZE) btb_usedf (.*, .clk(clk),
     817              :                     .en  (write_used),
     818              :                     .din (btb_used_ns[pt.BTB_SIZE-1:0]),
     819              :                     .dout(btb_used[pt.BTB_SIZE-1:0]));
     820              : 
     821              : end // block: fa
     822              : 
     823              : 
     824              :    //-----------------------------------------------------------------------------
     825              :    // BHT
     826              :    // 2 bit Entry -> direction, strength
     827              :    //
     828              :    //-----------------------------------------------------------------------------
     829              : 
     830              : //   logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0]      bht_bank_wr_data ;
     831              :    logic [1:0] [pt.BHT_ARRAY_DEPTH-1:0] [1:0]                bht_bank_rd_data_out ;
     832        11518 :    logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0]                 bht_bank_clken ;
     833            0 :    logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0]                 bht_bank_clk   ;
     834              : //   logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0]           bht_bank_sel   ;
     835              : 
     836              :    for ( i=0; i<2; i++) begin : BANKS
     837              :      wire[pt.BHT_ARRAY_DEPTH-1:0] wr0, wr1;
     838              :      assign wr0 = pt.BHT_ARRAY_DEPTH'(bht_wr_en0[i] << bht_wr_addr0);
     839              :      assign wr1 = pt.BHT_ARRAY_DEPTH'(bht_wr_en2[i] << bht_wr_addr2);
     840              :      for (genvar k=0 ; k < (pt.BHT_ARRAY_DEPTH)/NUM_BHT_LOOP ; k++) begin : BHT_CLK_GROUP
     841              :      assign bht_bank_clken[i][k]  = (bht_wr_en0[i] & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) |  BHT_NO_ADDR_MATCH)) |
     842              :                                     (bht_wr_en2[i] & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) |  BHT_NO_ADDR_MATCH));
     843              : `ifndef RV_FPGA_OPTIMIZE
     844              :      rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); // ifndef RV_FPGA_OPTIMIZE
     845              : `endif
     846              : 
     847              :      for (j=0 ; j<NUM_BHT_LOOP ; j++) begin : BHT_FLOPS
     848              :      wire[1:0] wdata;
     849              :      wire  bank_sel = wr1[NUM_BHT_LOOP*k+j] | wr0[NUM_BHT_LOOP*k+j];
     850              : 
     851              : //       assign   bht_bank_sel[i][k][j]    = (bht_wr_en0[i] & (bht_wr_addr0[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) |
     852              : //                                           (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ;
     853              : 
     854              : //       assign bht_bank_wr_data[i][k][j]  = (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI:pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ? bht_wr_data2[1:0] :
     855              : //                                                                                                                      bht_wr_data0[1:0]   ;
     856              :        assign wdata  = wr1[NUM_BHT_LOOP*k+j] ? bht_wr_data2[1:0] :bht_wr_data0;
     857              : 
     858              : 
     859              : 
     860              :           rvdffs_fpga #(2) bht_bank (.*,
     861              :                     .clk        (bht_bank_clk[i][k]),
     862              :                     .en         (bank_sel),
     863              :                     .rawclk     (clk),
     864              :                     .clken      (bank_sel),
     865              :                     .din        (wdata),
     866              :                     .dout       (bht_bank_rd_data_out[i][(16*k)+j]));
     867              : 
     868              :       end // block: BHT_FLOPS
     869              :    end // block: BHT_CLK_GROUP
     870              :  end // block: BANKS
     871              : 
     872          339 :     always_comb begin : BHT_rd_mux
     873          339 :      bht_bank0_rd_data_f[1:0] = '0 ;
     874          339 :      bht_bank1_rd_data_f[1:0] = '0 ;
     875          339 :      bht_bank0_rd_data_p1_f[1:0] = '0 ;
     876          339 :      for (int j=0; j< pt.BHT_ARRAY_DEPTH; j++) begin
     877     28151601 :        if (bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
     878     28151601 :          bht_bank0_rd_data_f[1:0] = bht_bank_rd_data_out[0][j];
     879     28151601 :          bht_bank1_rd_data_f[1:0] = bht_bank_rd_data_out[1][j];
     880              :        end
     881     28151601 :        if (bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
     882     28151601 :          bht_bank0_rd_data_p1_f[1:0] = bht_bank_rd_data_out[0][j];
     883              :        end
     884              :       end
     885              :     end // block: BHT_rd_mux
     886              : 
     887              : 
     888          339 : function [1:0] countones;
     889              :       input [1:0] valid;
     890              : 
     891          339 :       begin
     892              : 
     893          339 : countones[1:0] = {1'b0, valid[1]} +
     894          339 :                  {1'b0, valid[0]};
     895              :       end
     896              :    endfunction
     897              : endmodule // el2_ifu_bp_ctl
     898              :