Line data Source code
1 : //********************************************************************************
2 : // SPDX-License-Identifier: Apache-2.0
3 : // Copyright 2020 Western Digital Corporation or its affiliates.
4 : //
5 : // Licensed under the Apache License, Version 2.0 (the "License");
6 : // you may not use this file except in compliance with the License.
7 : // You may obtain a copy of the License at
8 : //
9 : // http://www.apache.org/licenses/LICENSE-2.0
10 : //
11 : // Unless required by applicable law or agreed to in writing, software
12 : // distributed under the License is distributed on an "AS IS" BASIS,
13 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 : // See the License for the specific language governing permissions and
15 : // limitations under the License.
16 : //********************************************************************************
17 :
18 : //********************************************************************************
19 : // Function: Branch predictor
20 : // Comments:
21 : //
22 : //
23 : // Bank3 : Bank2 : Bank1 : Bank0
24 : // FA C 8 4 0
25 : //********************************************************************************
26 :
27 : module el2_ifu_bp_ctl
28 : import el2_pkg::*;
29 : #(
30 : `include "el2_param.vh"
31 : )
32 : (
33 :
34 69840565 : input logic clk,
35 338 : input logic rst_l,
36 :
37 6782653 : input logic ic_hit_f, // Icache hit, enables F address capture
38 :
39 443 : input logic [31:1] ifc_fetch_addr_f, // look up btb address
40 3714536 : input logic ifc_fetch_req_f, // F1 valid
41 :
42 782023 : input el2_br_tlu_pkt_t dec_tlu_br0_r_pkt, // BP commit update packet, includes errors
43 366738 : input logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_r, // fghr to bp
44 187560 : input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_i0_br_index_r, // bp index
45 :
46 0 : input logic [$clog2(pt.BTB_SIZE)-1:0] dec_fa_error_index, // Fully associative btb error index
47 :
48 59220 : input logic dec_tlu_flush_lower_wb, // used to move EX4 RS to EX1 and F
49 2 : input logic dec_tlu_flush_leak_one_wb, // don't hit for leak one fetches
50 :
51 0 : input logic dec_tlu_bpred_disable, // disable all branch prediction
52 :
53 34486 : input el2_predict_pkt_t exu_mp_pkt, // mispredict packet
54 :
55 300064 : input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_eghr, // execute ghr (for patching fghr)
56 378616 : input logic [pt.BHT_GHR_SIZE-1:0] exu_mp_fghr, // Mispredict fghr
57 196152 : input logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_index, // Mispredict index
58 115620 : input logic [pt.BTB_BTAG_SIZE-1:0] exu_mp_btag, // Mispredict btag
59 :
60 673974 : input logic exu_flush_final, // all flushes
61 :
62 3170544 : output logic ifu_bp_hit_taken_f, // btb hit, select target
63 518479 : output logic [31:1] ifu_bp_btb_target_f, // predicted target PC
64 2404958 : output logic ifu_bp_inst_mask_f, // tell ic which valids to kill because of a taken branch, right justified
65 :
66 376431 : output logic [pt.BHT_GHR_SIZE-1:0] ifu_bp_fghr_f, // fetch ghr
67 :
68 2374031 : output logic [1:0] ifu_bp_way_f, // way
69 70446 : output logic [1:0] ifu_bp_ret_f, // predicted ret
70 2024478 : output logic [1:0] ifu_bp_hist1_f, // history counters for all 4 potential branches, bit 1, right justified
71 1823494 : output logic [1:0] ifu_bp_hist0_f, // history counters for all 4 potential branches, bit 0, right justified
72 408365 : output logic [1:0] ifu_bp_pc4_f, // pc4 indication, right justified
73 956919 : output logic [1:0] ifu_bp_valid_f, // branch valid, right justified
74 2176718 : output logic [11:0] ifu_bp_poffset_f, // predicted target
75 :
76 0 : output logic [1:0] [$clog2(pt.BTB_SIZE)-1:0] ifu_bp_fa_index_f, // predicted branch index (fully associative option)
77 :
78 : // Excluding scan_mode from coverage as its usage is determined by the integrator of the VeeR core.
79 : /*verilator coverage_off*/
80 : input logic scan_mode
81 : /*verilator coverage_on*/
82 : );
83 :
84 :
85 : localparam BTB_DWIDTH = pt.BTB_TOFFSET_SIZE+pt.BTB_BTAG_SIZE+5;
86 : localparam BTB_DWIDTH_TOP = int'(pt.BTB_TOFFSET_SIZE)+int'(pt.BTB_BTAG_SIZE)+4;
87 : localparam BTB_FA_INDEX = $clog2(pt.BTB_SIZE)-1;
88 : localparam FA_CMP_LOWER = $clog2(pt.ICACHE_LN_SZ);
89 : localparam FA_TAG_END_UPPER= 5+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER)-1; // must cast to int or vcs build fails
90 : localparam FA_TAG_START_LOWER = 3+int'(pt.BTB_TOFFSET_SIZE)+int'(FA_CMP_LOWER);
91 : localparam FA_TAG_END_LOWER = 5+int'(pt.BTB_TOFFSET_SIZE);
92 :
93 : localparam TAG_START=BTB_DWIDTH-1;
94 : localparam PC4=4;
95 : localparam BOFF=3;
96 : localparam CALL=2;
97 : localparam RET=1;
98 : localparam BV=0;
99 :
100 : localparam LRU_SIZE=pt.BTB_ARRAY_DEPTH;
101 : localparam NUM_BHT_LOOP = (pt.BHT_ARRAY_DEPTH > 16 ) ? 16 : pt.BHT_ARRAY_DEPTH;
102 : localparam NUM_BHT_LOOP_INNER_HI = (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+3 : pt.BHT_ADDR_HI;
103 : localparam NUM_BHT_LOOP_OUTER_LO = (pt.BHT_ARRAY_DEPTH > 16 ) ?pt.BHT_ADDR_LO+4 : pt.BHT_ADDR_LO;
104 : localparam BHT_NO_ADDR_MATCH = ( pt.BHT_ARRAY_DEPTH <= 16 );
105 :
106 :
107 200596 : logic exu_mp_valid_write;
108 523400 : logic exu_mp_ataken;
109 508792 : logic exu_mp_valid; // conditional branch mispredict
110 230218 : logic exu_mp_boffset; // branch offsett
111 273332 : logic exu_mp_pc4; // branch is a 4B inst
112 47300 : logic exu_mp_call; // branch is a call inst
113 149064 : logic exu_mp_ret; // branch is a ret inst
114 80332 : logic exu_mp_ja; // branch is a jump always
115 283182 : logic [1:0] exu_mp_hist; // new history
116 90270 : logic [11:0] exu_mp_tgt; // target offset
117 196152 : logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] exu_mp_addr; // BTB/BHT address
118 2913013 : logic dec_tlu_br0_v_wb; // WB stage history update
119 2721663 : logic [1:0] dec_tlu_br0_hist_wb; // new history
120 187560 : logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] dec_tlu_br0_addr_wb; // addr
121 28846 : logic dec_tlu_br0_error_wb; // error; invalidate bank
122 9608 : logic dec_tlu_br0_start_error_wb; // error; invalidate all 4 banks in fg
123 366738 : logic [pt.BHT_GHR_SIZE-1:0] exu_i0_br_fghr_wb;
124 :
125 5008 : logic use_mp_way, use_mp_way_p1;
126 122 : logic [pt.RET_STACK_SIZE-1:0][31:0] rets_out, rets_in;
127 230670 : logic [pt.RET_STACK_SIZE-1:0] rsenable;
128 :
129 :
130 2176718 : logic [11:0] btb_rd_tgt_f;
131 498032 : logic btb_rd_pc4_f, btb_rd_call_f, btb_rd_ret_f;
132 1443205 : logic [1:1] bp_total_branch_offset_f;
133 :
134 445 : logic [31:1] bp_btb_target_adder_f;
135 445 : logic [31:1] bp_rs_call_target_f;
136 203037 : logic rs_push, rs_pop, rs_hold;
137 126304 : logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_rd_addr_p1_f, btb_wr_addr, btb_rd_addr_f;
138 12203 : logic [pt.BTB_BTAG_SIZE-1:0] btb_wr_tag, fetch_rd_tag_f, fetch_rd_tag_p1_f;
139 56054 : logic [BTB_DWIDTH-1:0] btb_wr_data;
140 101186 : logic btb_wr_en_way0, btb_wr_en_way1;
141 :
142 :
143 261199 : logic dec_tlu_error_wb, btb_valid, dec_tlu_br0_middle_wb;
144 187560 : logic [pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] btb_error_addr_wb;
145 2468 : logic branch_error_collision_f, fetch_mp_collision_f, branch_error_collision_p1_f, fetch_mp_collision_p1_f;
146 :
147 1040 : logic branch_error_bank_conflict_f;
148 372619 : logic [pt.BHT_GHR_SIZE-1:0] merged_ghr, fghr_ns, fghr;
149 74753 : logic [1:0] num_valids;
150 232 : logic [LRU_SIZE-1:0] btb_lru_b0_f, btb_lru_b0_hold, btb_lru_b0_ns,
151 15418 : fetch_wrindex_dec, fetch_wrindex_p1_dec, fetch_wrlru_b0, fetch_wrlru_p1_b0,
152 366 : mp_wrindex_dec, mp_wrlru_b0;
153 1641818 : logic btb_lru_rd_f, btb_lru_rd_p1_f, lru_update_valid_f;
154 539554 : logic tag_match_way0_f, tag_match_way1_f;
155 755464 : logic [1:0] way_raw, bht_dir_f, btb_sel_f, wayhit_f, vwayhit_f, wayhit_p1_f;
156 361342 : logic [1:0] bht_valid_f, bht_force_taken_f;
157 :
158 2 : logic leak_one_f, leak_one_f_d1;
159 :
160 : logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_out ;
161 :
162 : logic [LRU_SIZE-1:0][BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_out ;
163 :
164 2669337 : logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_f ;
165 461811 : logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_f ;
166 :
167 1084332 : logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way0_p1_f ;
168 459074 : logic [BTB_DWIDTH-1:0] btb_bank0_rd_data_way1_p1_f ;
169 :
170 278253 : logic [BTB_DWIDTH-1:0] btb_vbank0_rd_data_f, btb_vbank1_rd_data_f;
171 :
172 3170544 : logic final_h;
173 182762 : logic btb_fg_crossing_f;
174 284882 : logic middle_of_bank;
175 :
176 :
177 1781405 : logic [1:0] bht_vbank0_rd_data_f, bht_vbank1_rd_data_f;
178 982 : logic branch_error_bank_conflict_p1_f;
179 595710 : logic tag_match_way0_p1_f, tag_match_way1_p1_f;
180 :
181 162887 : logic [1:0] btb_vlru_rd_f, fetch_start_f, tag_match_vway1_expanded_f, tag_match_way0_expanded_p1_f, tag_match_way1_expanded_p1_f;
182 445 : logic [31:2] fetch_addr_p1_f;
183 :
184 :
185 203424 : logic exu_mp_way, exu_mp_way_f, dec_tlu_br0_way_wb, dec_tlu_way_wb;
186 179172 : logic [BTB_DWIDTH-1:0] btb_bank0e_rd_data_f, btb_bank0e_rd_data_p1_f;
187 :
188 1809448 : logic [BTB_DWIDTH-1:0] btb_bank0o_rd_data_f;
189 :
190 196725 : logic [1:0] tag_match_way0_expanded_f, tag_match_way1_expanded_f;
191 :
192 :
193 1709581 : logic [1:0] bht_bank0_rd_data_f;
194 1958482 : logic [1:0] bht_bank1_rd_data_f;
195 1802789 : logic [1:0] bht_bank0_rd_data_p1_f;
196 : genvar j, i;
197 :
198 : assign exu_mp_valid = exu_mp_pkt.misp & ~leak_one_f; // conditional branch mispredict
199 : assign exu_mp_boffset = exu_mp_pkt.boffset; // branch offset
200 : assign exu_mp_pc4 = exu_mp_pkt.pc4; // branch is a 4B inst
201 : assign exu_mp_call = exu_mp_pkt.pcall; // branch is a call inst
202 : assign exu_mp_ret = exu_mp_pkt.pret; // branch is a ret inst
203 : assign exu_mp_ja = exu_mp_pkt.pja; // branch is a jump always
204 : assign exu_mp_way = exu_mp_pkt.way; // repl way
205 : assign exu_mp_hist[1:0] = exu_mp_pkt.hist[1:0]; // new history
206 : assign exu_mp_tgt[11:0] = exu_mp_pkt.toffset[11:0] ; // target offset
207 : assign exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_mp_index[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] ; // BTB/BHT address
208 : assign exu_mp_ataken = exu_mp_pkt.ataken;
209 :
210 :
211 : assign dec_tlu_br0_v_wb = dec_tlu_br0_r_pkt.valid;
212 : assign dec_tlu_br0_hist_wb[1:0] = dec_tlu_br0_r_pkt.hist[1:0];
213 : assign dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = exu_i0_br_index_r[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
214 : assign dec_tlu_br0_error_wb = dec_tlu_br0_r_pkt.br_error;
215 : assign dec_tlu_br0_middle_wb = dec_tlu_br0_r_pkt.middle;
216 : assign dec_tlu_br0_way_wb = dec_tlu_br0_r_pkt.way;
217 : assign dec_tlu_br0_start_error_wb = dec_tlu_br0_r_pkt.br_start_error;
218 : assign exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0] = exu_i0_br_fghr_r[pt.BHT_GHR_SIZE-1:0];
219 :
220 :
221 :
222 :
223 : // ----------------------------------------------------------------------
224 : // READ
225 : // ----------------------------------------------------------------------
226 :
227 : // hash the incoming fetch PC, first guess at hashing algorithm
228 : el2_btb_addr_hash #(.pt(pt)) f1hash(.pc(ifc_fetch_addr_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
229 :
230 :
231 : assign fetch_addr_p1_f[31:2] = ifc_fetch_addr_f[31:2] + 30'b1;
232 : el2_btb_addr_hash #(.pt(pt)) f1hash_p1(.pc(fetch_addr_p1_f[pt.BTB_INDEX3_HI:pt.BTB_INDEX1_LO]), .hash(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]));
233 :
234 : assign btb_sel_f[1] = ~bht_dir_f[0];
235 : assign btb_sel_f[0] = bht_dir_f[0];
236 :
237 : assign fetch_start_f[1:0] = {ifc_fetch_addr_f[1], ~ifc_fetch_addr_f[1]};
238 :
239 : // Errors colliding with fetches must kill the btb/bht hit.
240 :
241 : assign branch_error_collision_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
242 : assign branch_error_collision_p1_f = dec_tlu_error_wb & (btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]);
243 :
244 : assign branch_error_bank_conflict_f = branch_error_collision_f & dec_tlu_error_wb;
245 : assign branch_error_bank_conflict_p1_f = branch_error_collision_p1_f & dec_tlu_error_wb;
246 :
247 : // set on leak one, hold until next flush without leak one
248 : assign leak_one_f = (dec_tlu_flush_leak_one_wb & dec_tlu_flush_lower_wb) | (leak_one_f_d1 & ~dec_tlu_flush_lower_wb);
249 :
250 673974 : logic exu_flush_final_d1;
251 :
252 : if(!pt.BTB_FULLYA) begin : genblock1
253 : assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
254 : exu_mp_valid & ifc_fetch_req_f &
255 : (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
256 : );
257 : assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
258 : exu_mp_valid & ifc_fetch_req_f &
259 : (exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO])
260 : );
261 : // 2 -way SA, figure out the way hit and mux accordingly
262 : assign tag_match_way0_f = btb_bank0_rd_data_way0_f[BV] & (btb_bank0_rd_data_way0_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
263 : ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
264 :
265 : assign tag_match_way1_f = btb_bank0_rd_data_way1_f[BV] & (btb_bank0_rd_data_way1_f[TAG_START:17] == fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]) &
266 : ~(dec_tlu_way_wb & branch_error_bank_conflict_f) & ifc_fetch_req_f & ~leak_one_f;
267 :
268 : assign tag_match_way0_p1_f = btb_bank0_rd_data_way0_p1_f[BV] & (btb_bank0_rd_data_way0_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
269 : ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
270 :
271 : assign tag_match_way1_p1_f = btb_bank0_rd_data_way1_p1_f[BV] & (btb_bank0_rd_data_way1_p1_f[TAG_START:17] == fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]) &
272 : ~(dec_tlu_way_wb & branch_error_bank_conflict_p1_f) & ifc_fetch_req_f & ~leak_one_f;
273 :
274 :
275 : // Both ways could hit, use the offset bit to reorder
276 :
277 : assign tag_match_way0_expanded_f[1:0] = {tag_match_way0_f & (btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4]),
278 : tag_match_way0_f & ~(btb_bank0_rd_data_way0_f[BOFF] ^ btb_bank0_rd_data_way0_f[PC4])};
279 :
280 : assign tag_match_way1_expanded_f[1:0] = {tag_match_way1_f & (btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4]),
281 : tag_match_way1_f & ~(btb_bank0_rd_data_way1_f[BOFF] ^ btb_bank0_rd_data_way1_f[PC4])};
282 :
283 : assign tag_match_way0_expanded_p1_f[1:0] = {tag_match_way0_p1_f & (btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4]),
284 : tag_match_way0_p1_f & ~(btb_bank0_rd_data_way0_p1_f[BOFF] ^ btb_bank0_rd_data_way0_p1_f[PC4])};
285 :
286 : assign tag_match_way1_expanded_p1_f[1:0] = {tag_match_way1_p1_f & (btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4]),
287 : tag_match_way1_p1_f & ~(btb_bank0_rd_data_way1_p1_f[BOFF] ^ btb_bank0_rd_data_way1_p1_f[PC4])};
288 :
289 : assign wayhit_f[1:0] = tag_match_way0_expanded_f[1:0] | tag_match_way1_expanded_f[1:0];
290 : assign wayhit_p1_f[1:0] = tag_match_way0_expanded_p1_f[1:0] | tag_match_way1_expanded_p1_f[1:0];
291 :
292 : assign btb_bank0o_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[1]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
293 : ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[1]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
294 : assign btb_bank0e_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_f[0]}} & btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0]) |
295 : ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_f[0]}} & btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0]) );
296 :
297 : assign btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{tag_match_way0_expanded_p1_f[0]}} & btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0]) |
298 : ({17+pt.BTB_BTAG_SIZE{tag_match_way1_expanded_p1_f[0]}} & btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0]) );
299 :
300 : // virtual bank order
301 :
302 : assign btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0e_rd_data_f[BTB_DWIDTH-1:0]) |
303 : ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} & btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) );
304 : assign btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = ( ({17+pt.BTB_BTAG_SIZE{fetch_start_f[0]}} & btb_bank0o_rd_data_f[BTB_DWIDTH-1:0]) |
305 : ({17+pt.BTB_BTAG_SIZE{fetch_start_f[1]}} & btb_bank0e_rd_data_p1_f[BTB_DWIDTH-1:0]) );
306 :
307 : assign way_raw[1:0] = tag_match_vway1_expanded_f[1:0] | (~vwayhit_f[1:0] & btb_vlru_rd_f[1:0]);
308 :
309 : // --------------------------------------------------------------------------------
310 : // --------------------------------------------------------------------------------
311 : // update lru
312 : // mp
313 :
314 : // create a onehot lru write vector
315 : assign mp_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
316 :
317 : // fetch
318 : assign fetch_wrindex_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
319 : assign fetch_wrindex_p1_dec[LRU_SIZE-1:0] = {{LRU_SIZE-1{1'b0}},1'b1} << btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
320 :
321 : assign mp_wrlru_b0[LRU_SIZE-1:0] = mp_wrindex_dec[LRU_SIZE-1:0] & {LRU_SIZE{exu_mp_valid}};
322 :
323 :
324 : assign btb_lru_b0_hold[LRU_SIZE-1:0] = ~mp_wrlru_b0[LRU_SIZE-1:0] & ~fetch_wrlru_b0[LRU_SIZE-1:0];
325 :
326 : // Forward the mp lru information to the fetch, avoids multiple way hits later
327 : assign use_mp_way = fetch_mp_collision_f;
328 : assign use_mp_way_p1 = fetch_mp_collision_p1_f;
329 :
330 : assign lru_update_valid_f = (vwayhit_f[0] | vwayhit_f[1]) & ifc_fetch_req_f & ~leak_one_f;
331 :
332 :
333 : assign fetch_wrlru_b0[LRU_SIZE-1:0] = fetch_wrindex_dec[LRU_SIZE-1:0] &
334 : {LRU_SIZE{lru_update_valid_f}};
335 : assign fetch_wrlru_p1_b0[LRU_SIZE-1:0] = fetch_wrindex_p1_dec[LRU_SIZE-1:0] &
336 : {LRU_SIZE{lru_update_valid_f}};
337 :
338 : assign btb_lru_b0_ns[LRU_SIZE-1:0] = ( (btb_lru_b0_hold[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]) |
339 : (mp_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{~exu_mp_way}}) |
340 : (fetch_wrlru_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_f}}) |
341 : (fetch_wrlru_p1_b0[LRU_SIZE-1:0] & {LRU_SIZE{tag_match_way0_p1_f}}) );
342 :
343 :
344 :
345 : assign btb_lru_rd_f = use_mp_way ? exu_mp_way_f : |(fetch_wrindex_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
346 :
347 : assign btb_lru_rd_p1_f = use_mp_way_p1 ? exu_mp_way_f : |(fetch_wrindex_p1_dec[LRU_SIZE-1:0] & btb_lru_b0_f[LRU_SIZE-1:0]);
348 :
349 : // rotated
350 : assign btb_vlru_rd_f[1:0] = ( ({2{fetch_start_f[0]}} & {btb_lru_rd_f, btb_lru_rd_f}) |
351 : ({2{fetch_start_f[1]}} & {btb_lru_rd_p1_f, btb_lru_rd_f}));
352 :
353 : assign tag_match_vway1_expanded_f[1:0] = ( ({2{fetch_start_f[0]}} & {tag_match_way1_expanded_f[1:0]}) |
354 : ({2{fetch_start_f[1]}} & {tag_match_way1_expanded_p1_f[0], tag_match_way1_expanded_f[1]}) );
355 :
356 :
357 : rvdffe #(LRU_SIZE) btb_lru_ff (.*, .en(ifc_fetch_req_f | exu_mp_valid),
358 : .din(btb_lru_b0_ns[(LRU_SIZE)-1:0]),
359 : .dout(btb_lru_b0_f[(LRU_SIZE)-1:0]));
360 :
361 : end // if (!pt.BTB_FULLYA)
362 : // Detect end of cache line and mask as needed
363 752417 : logic eoc_near;
364 200327 : logic eoc_mask;
365 : assign eoc_near = &ifc_fetch_addr_f[pt.ICACHE_BEAT_ADDR_HI:3];
366 : assign eoc_mask = ~eoc_near| (|(~ifc_fetch_addr_f[2:1]));
367 :
368 :
369 :
370 : // --------------------------------------------------------------------------------
371 : // --------------------------------------------------------------------------------
372 :
373 : // mux out critical hit bank for pc computation
374 : // This is only useful for the first taken branch in the fetch group
375 1992323 : logic [16:1] btb_sel_data_f;
376 :
377 : assign btb_rd_tgt_f[11:0] = btb_sel_data_f[16:5];
378 : assign btb_rd_pc4_f = btb_sel_data_f[4];
379 : assign btb_rd_call_f = btb_sel_data_f[2];
380 : assign btb_rd_ret_f = btb_sel_data_f[1];
381 :
382 : assign btb_sel_data_f[16:1] = ( ({16{btb_sel_f[1]}} & btb_vbank1_rd_data_f[16:1]) |
383 : ({16{btb_sel_f[0]}} & btb_vbank0_rd_data_f[16:1]) );
384 :
385 :
386 70446 : logic [1:0] hist0_raw, hist1_raw, pc4_raw, pret_raw;
387 :
388 : // a valid taken target needs to kill the next fetch as we compute the target address
389 : assign ifu_bp_hit_taken_f = |(vwayhit_f[1:0] & hist1_raw[1:0]) & ifc_fetch_req_f & ~leak_one_f_d1 & ~dec_tlu_bpred_disable;
390 :
391 :
392 : // Don't put calls/rets/ja in the predictor, force the bht taken instead
393 : assign bht_force_taken_f[1:0] = {(btb_vbank1_rd_data_f[CALL] | btb_vbank1_rd_data_f[RET]),
394 : (btb_vbank0_rd_data_f[CALL] | btb_vbank0_rd_data_f[RET])};
395 :
396 :
397 : // taken and valid, otherwise, branch errors must clear the bht
398 : assign bht_valid_f[1:0] = vwayhit_f[1:0];
399 :
400 : assign bht_vbank0_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank0_rd_data_f[1:0]) |
401 : ({2{fetch_start_f[1]}} & bht_bank1_rd_data_f[1:0]) );
402 :
403 : assign bht_vbank1_rd_data_f[1:0] = ( ({2{fetch_start_f[0]}} & bht_bank1_rd_data_f[1:0]) |
404 : ({2{fetch_start_f[1]}} & bht_bank0_rd_data_p1_f[1:0]) );
405 :
406 :
407 : assign bht_dir_f[1:0] = {(bht_force_taken_f[1] | bht_vbank1_rd_data_f[1]) & bht_valid_f[1],
408 : (bht_force_taken_f[0] | bht_vbank0_rd_data_f[1]) & bht_valid_f[0]};
409 :
410 : assign ifu_bp_inst_mask_f = (ifu_bp_hit_taken_f & btb_sel_f[1]) | ~ifu_bp_hit_taken_f;
411 :
412 :
413 :
414 :
415 : // Branch prediction info is sent with the 2byte lane associated with the end of the branch.
416 : // Cases
417 : // BANK1 BANK0
418 : // -------------------------------
419 : // | : | : |
420 : // -------------------------------
421 : // <------------> : PC4 branch, offset, should be in B1 (indicated on [2])
422 : // <------------> : PC4 branch, no offset, indicate PC4, VALID, HIST on [1]
423 : // <------------> : PC4 branch, offset, indicate PC4, VALID, HIST on [0]
424 : // <------> : PC2 branch, offset, indicate VALID, HIST on [1]
425 : // <------> : PC2 branch, no offset, indicate VALID, HIST on [0]
426 : //
427 :
428 :
429 :
430 : assign hist1_raw[1:0] = bht_force_taken_f[1:0] | {bht_vbank1_rd_data_f[1],
431 : bht_vbank0_rd_data_f[1]};
432 :
433 : assign hist0_raw[1:0] = {bht_vbank1_rd_data_f[0],
434 : bht_vbank0_rd_data_f[0]};
435 :
436 :
437 : assign pc4_raw[1:0] = {vwayhit_f[1] & btb_vbank1_rd_data_f[PC4],
438 : vwayhit_f[0] & btb_vbank0_rd_data_f[PC4]};
439 :
440 : assign pret_raw[1:0] = {vwayhit_f[1] & ~btb_vbank1_rd_data_f[CALL] & btb_vbank1_rd_data_f[RET],
441 : vwayhit_f[0] & ~btb_vbank0_rd_data_f[CALL] & btb_vbank0_rd_data_f[RET]};
442 :
443 : // GHR
444 :
445 :
446 : // count the valids with masking based on first taken
447 : assign num_valids[1:0] = countones(bht_valid_f[1:0]);
448 :
449 : // Note that the following property holds
450 : // P: prior ghr, H: history bit of last valid branch in line (could be 1 or 0)
451 : // Num valid branches What new GHR must be
452 : // 2 0H
453 : // 1 PH
454 : // 0 PP
455 :
456 : assign final_h = |(btb_sel_f[1:0] & bht_dir_f[1:0]);
457 :
458 : assign merged_ghr[pt.BHT_GHR_SIZE-1:0] = (
459 : ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h2}} & {fghr[pt.BHT_GHR_SIZE-3:0], 1'b0, final_h}) | // 0H
460 : ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h1}} & {fghr[pt.BHT_GHR_SIZE-2:0], final_h}) | // PH
461 : ({pt.BHT_GHR_SIZE{num_valids[1:0] == 2'h0}} & {fghr[pt.BHT_GHR_SIZE-1:0]}) ); // PP
462 :
463 378616 : logic [pt.BHT_GHR_SIZE-1:0] exu_flush_ghr;
464 : assign exu_flush_ghr[pt.BHT_GHR_SIZE-1:0] = exu_mp_fghr[pt.BHT_GHR_SIZE-1:0];
465 :
466 : assign fghr_ns[pt.BHT_GHR_SIZE-1:0] = ( ({pt.BHT_GHR_SIZE{exu_flush_final_d1}} & exu_flush_ghr[pt.BHT_GHR_SIZE-1:0]) |
467 : ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1}} & merged_ghr[pt.BHT_GHR_SIZE-1:0]) |
468 : ({pt.BHT_GHR_SIZE{~exu_flush_final_d1 & ~(ifc_fetch_req_f & ic_hit_f & ~leak_one_f_d1)}} & fghr[pt.BHT_GHR_SIZE-1:0]));
469 :
470 : rvdffie #(.WIDTH(pt.BHT_GHR_SIZE+3),.OVERRIDE(1)) fetchghr (.*,
471 : .din ({exu_flush_final, exu_mp_way, leak_one_f, fghr_ns[pt.BHT_GHR_SIZE-1:0]}),
472 : .dout({exu_flush_final_d1, exu_mp_way_f, leak_one_f_d1, fghr[pt.BHT_GHR_SIZE-1:0]}));
473 :
474 : assign ifu_bp_fghr_f[pt.BHT_GHR_SIZE-1:0] = fghr[pt.BHT_GHR_SIZE-1:0];
475 :
476 :
477 : assign ifu_bp_way_f[1:0] = way_raw[1:0];
478 : assign ifu_bp_hist1_f[1:0] = hist1_raw[1:0];
479 : assign ifu_bp_hist0_f[1:0] = hist0_raw[1:0];
480 : assign ifu_bp_pc4_f[1:0] = pc4_raw[1:0];
481 :
482 : assign ifu_bp_valid_f[1:0] = vwayhit_f[1:0] & ~{2{dec_tlu_bpred_disable}};
483 : assign ifu_bp_ret_f[1:0] = pret_raw[1:0];
484 :
485 :
486 : // compute target
487 : // Form the fetch group offset based on the btb hit location and the location of the branch within the 4 byte chunk
488 :
489 : // .i 5
490 : // .o 3
491 : // .ilb bht_dir_f[1] bht_dir_f[0] fetch_start_f[1] fetch_start_f[0] btb_rd_pc4_f
492 : // .ob bloc_f[1] bloc_f[0] use_fa_plus
493 : // .type fr
494 : //
495 : //
496 : // ## rotdir[1:0] fs pc4 off fapl
497 : // -1 01 - 01 0
498 : // 10 01 - 10 0
499 : //
500 : // -1 10 - 10 0
501 : // 10 10 0 01 1
502 : // 10 10 1 01 0
503 2583121 : logic [1:0] bloc_f;
504 2358836 : logic use_fa_plus;
505 : assign bloc_f[1] = (bht_dir_f[0] & ~fetch_start_f[0]) | (~bht_dir_f[0]
506 : & fetch_start_f[0]);
507 : assign bloc_f[0] = (bht_dir_f[0] & fetch_start_f[0]) | (~bht_dir_f[0]
508 : & ~fetch_start_f[0]);
509 : assign use_fa_plus = (~bht_dir_f[0] & ~fetch_start_f[0] & ~btb_rd_pc4_f);
510 :
511 :
512 :
513 :
514 : assign btb_fg_crossing_f = fetch_start_f[0] & btb_sel_f[0] & btb_rd_pc4_f;
515 :
516 : assign bp_total_branch_offset_f = bloc_f[1] ^ btb_rd_pc4_f;
517 :
518 340 : logic [31:2] adder_pc_in_f, ifc_fetch_adder_prior;
519 : rvdfflie #(.WIDTH(30), .LEFT(19)) faddrf_ff (.*, .en(ifc_fetch_req_f & ~ifu_bp_hit_taken_f & ic_hit_f), .din(ifc_fetch_addr_f[31:2]), .dout(ifc_fetch_adder_prior[31:2]));
520 :
521 :
522 : assign ifu_bp_poffset_f[11:0] = btb_rd_tgt_f[11:0];
523 :
524 : assign adder_pc_in_f[31:2] = ( ({30{ use_fa_plus}} & fetch_addr_p1_f[31:2]) |
525 : ({30{ btb_fg_crossing_f}} & ifc_fetch_adder_prior[31:2]) |
526 : ({30{~btb_fg_crossing_f & ~use_fa_plus}} & ifc_fetch_addr_f[31:2]));
527 :
528 : rvbradder predtgt_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
529 : .offset(btb_rd_tgt_f[11:0]),
530 : .dout(bp_btb_target_adder_f[31:1])
531 : );
532 : // mux in the return stack address here for a predicted return assuming the RS is valid, quite if no prediction
533 : assign ifu_bp_btb_target_f[31:1] = (({31{btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0] & ifu_bp_hit_taken_f}} & rets_out[0][31:1]) |
534 : ({31{~(btb_rd_ret_f & ~btb_rd_call_f & rets_out[0][0]) & ifu_bp_hit_taken_f}} & bp_btb_target_adder_f[31:1]) );
535 :
536 :
537 : // ----------------------------------------------------------------------
538 : // Return Stack
539 : // ----------------------------------------------------------------------
540 :
541 : rvbradder rs_addr (.pc({adder_pc_in_f[31:2], bp_total_branch_offset_f}),
542 : .offset({11'b0, ~btb_rd_pc4_f}),
543 : .dout(bp_rs_call_target_f[31:1])
544 : );
545 :
546 : assign rs_push = (btb_rd_call_f & ~btb_rd_ret_f & ifu_bp_hit_taken_f);
547 : assign rs_pop = (btb_rd_ret_f & ~btb_rd_call_f & ifu_bp_hit_taken_f);
548 : assign rs_hold = ~rs_push & ~rs_pop;
549 :
550 :
551 :
552 : // Fetch based (bit 0 is a valid)
553 : assign rets_in[0][31:0] = ( ({32{rs_push}} & {bp_rs_call_target_f[31:1], 1'b1}) | // target[31:1], valid
554 : ({32{rs_pop}} & rets_out[1][31:0]) );
555 :
556 : assign rsenable[0] = ~rs_hold;
557 :
558 : for (i=0; i<pt.RET_STACK_SIZE; i++) begin : retstack
559 :
560 : // for the last entry in the stack, we don't have a pop position
561 : if(i==pt.RET_STACK_SIZE-1) begin
562 : assign rets_in[i][31:0] = rets_out[i-1][31:0];
563 : assign rsenable[i] = rs_push;
564 : end
565 : else if(i>0) begin
566 : assign rets_in[i][31:0] = ( ({32{rs_push}} & rets_out[i-1][31:0]) |
567 : ({32{rs_pop}} & rets_out[i+1][31:0]) );
568 : assign rsenable[i] = rs_push | rs_pop;
569 : end
570 : rvdffe #(32) rets_ff (.*, .en(rsenable[i]), .din(rets_in[i][31:0]), .dout(rets_out[i][31:0]));
571 :
572 : end : retstack
573 :
574 : // ----------------------------------------------------------------------
575 : // WRITE
576 : // ----------------------------------------------------------------------
577 :
578 :
579 : assign dec_tlu_error_wb = dec_tlu_br0_start_error_wb | dec_tlu_br0_error_wb;
580 :
581 : assign btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
582 :
583 : assign dec_tlu_way_wb = dec_tlu_br0_way_wb;
584 :
585 : assign btb_valid = exu_mp_valid & ~dec_tlu_error_wb;
586 :
587 : assign btb_wr_tag[pt.BTB_BTAG_SIZE-1:0] = exu_mp_btag[pt.BTB_BTAG_SIZE-1:0];
588 :
589 : if(!pt.BTB_FULLYA) begin
590 :
591 : if(pt.BTB_BTAG_FOLD) begin : btbfold
592 : el2_btb_tag_hash_fold #(.pt(pt)) rdtagf (.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
593 : .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
594 : el2_btb_tag_hash_fold #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
595 : .pc({fetch_addr_p1_f[ pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
596 : end
597 : else begin : btbfold
598 : el2_btb_tag_hash #(.pt(pt)) rdtagf(.hash(fetch_rd_tag_f[pt.BTB_BTAG_SIZE-1:0]),
599 : .pc({ifc_fetch_addr_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
600 : el2_btb_tag_hash #(.pt(pt)) rdtagp1f(.hash(fetch_rd_tag_p1_f[pt.BTB_BTAG_SIZE-1:0]),
601 : .pc({fetch_addr_p1_f[pt.BTB_ADDR_HI+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE+pt.BTB_BTAG_SIZE:pt.BTB_ADDR_HI+1]}));
602 : end
603 :
604 : assign btb_wr_en_way0 = ( ({{~exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
605 : ({{~dec_tlu_way_wb & dec_tlu_error_wb}}));
606 :
607 : assign btb_wr_en_way1 = ( ({{exu_mp_way & exu_mp_valid_write & ~dec_tlu_error_wb}}) |
608 : ({{dec_tlu_way_wb & dec_tlu_error_wb}}));
609 : assign btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] = dec_tlu_error_wb ? btb_error_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] : exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO];
610 :
611 :
612 : assign vwayhit_f[1:0] = ( ({2{fetch_start_f[0]}} & {wayhit_f[1:0]}) |
613 : ({2{fetch_start_f[1]}} & {wayhit_p1_f[0], wayhit_f[1]})) & {eoc_mask, 1'b1};
614 :
615 : end // if (!pt.BTB_FULLYA)
616 :
617 : assign btb_wr_data[BTB_DWIDTH-1:0] = {btb_wr_tag[pt.BTB_BTAG_SIZE-1:0], exu_mp_tgt[pt.BTB_TOFFSET_SIZE-1:0], exu_mp_pc4, exu_mp_boffset,
618 : exu_mp_call | exu_mp_ja, exu_mp_ret | exu_mp_ja, btb_valid} ;
619 :
620 : assign exu_mp_valid_write = exu_mp_valid & exu_mp_ataken & ~exu_mp_pkt.valid;
621 263744 : logic [1:0] bht_wr_data0, bht_wr_data2;
622 1652695 : logic [1:0] bht_wr_en0, bht_wr_en2;
623 :
624 : assign middle_of_bank = exu_mp_pc4 ^ exu_mp_boffset;
625 : assign bht_wr_en0[1:0] = {2{exu_mp_valid & ~exu_mp_call & ~exu_mp_ret & ~exu_mp_ja}} & {middle_of_bank, ~middle_of_bank};
626 : assign bht_wr_en2[1:0] = {2{dec_tlu_br0_v_wb}} & {dec_tlu_br0_middle_wb, ~dec_tlu_br0_middle_wb} ;
627 :
628 : // Experiments show this is the best priority scheme for same bank/index writes at the same time.
629 : assign bht_wr_data0[1:0] = exu_mp_hist[1:0]; // lowest priority
630 : assign bht_wr_data2[1:0] = dec_tlu_br0_hist_wb[1:0]; // highest priority
631 :
632 :
633 :
634 151741 : logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] bht_rd_addr_f, bht_rd_addr_p1_f, bht_wr_addr0, bht_wr_addr2;
635 :
636 151741 : logic [pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] mp_hashed, br0_hashed_wb, bht_rd_addr_hashed_f, bht_rd_addr_hashed_p1_f;
637 : el2_btb_ghr_hash #(.pt(pt)) mpghrhs (.hashin(exu_mp_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_mp_eghr[pt.BHT_GHR_SIZE-1:0]), .hash(mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
638 : el2_btb_ghr_hash #(.pt(pt)) br0ghrhs (.hashin(dec_tlu_br0_addr_wb[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(exu_i0_br_fghr_wb[pt.BHT_GHR_SIZE-1:0]), .hash(br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
639 : el2_btb_ghr_hash #(.pt(pt)) fghrhs (.hashin(btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
640 : el2_btb_ghr_hash #(.pt(pt)) fghrhs_p1 (.hashin(btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO]), .ghr(fghr[pt.BHT_GHR_SIZE-1:0]), .hash(bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO]));
641 :
642 : assign bht_wr_addr0[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = mp_hashed[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
643 : assign bht_wr_addr2[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = br0_hashed_wb[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
644 : assign bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
645 : assign bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] = bht_rd_addr_hashed_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO];
646 :
647 :
648 : // ----------------------------------------------------------------------
649 : // Structures. Using FLOPS
650 : // ----------------------------------------------------------------------
651 : // BTB
652 : // Entry -> tag[pt.BTB_BTAG_SIZE-1:0], toffset[11:0], pc4, boffset, call, ret, valid
653 :
654 : if(!pt.BTB_FULLYA) begin
655 :
656 : for (j=0 ; j<LRU_SIZE ; j++) begin : BTB_FLOPS
657 : // Way 0
658 : rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way0 (.*,
659 : .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way0)),
660 : .din (btb_wr_data[BTB_DWIDTH-1:0]),
661 : .dout (btb_bank0_rd_data_way0_out[j]));
662 :
663 : // Way 1
664 : rvdffe #(17+pt.BTB_BTAG_SIZE) btb_bank0_way1 (.*,
665 : .en(((btb_wr_addr[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == j) & btb_wr_en_way1)),
666 : .din (btb_wr_data[BTB_DWIDTH-1:0]),
667 : .dout (btb_bank0_rd_data_way1_out[j]));
668 :
669 : end
670 :
671 :
672 339 : always_comb begin : BTB_rd_mux
673 339 : btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] = '0 ;
674 339 : btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] = '0 ;
675 339 : btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] = '0 ;
676 339 : btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] = '0 ;
677 :
678 339 : for (int j=0; j< LRU_SIZE; j++) begin
679 28265884 : if (btb_rd_addr_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
680 :
681 28265884 : btb_bank0_rd_data_way0_f[BTB_DWIDTH-1:0] = btb_bank0_rd_data_way0_out[j];
682 28265884 : btb_bank0_rd_data_way1_f[BTB_DWIDTH-1:0] = btb_bank0_rd_data_way1_out[j];
683 :
684 : end
685 : end
686 339 : for (int j=0; j< LRU_SIZE; j++) begin
687 28265884 : if (btb_rd_addr_p1_f[pt.BTB_ADDR_HI:pt.BTB_ADDR_LO] == (pt.BTB_ADDR_HI-pt.BTB_ADDR_LO+1)'(j)) begin
688 :
689 28265884 : btb_bank0_rd_data_way0_p1_f[BTB_DWIDTH-1:0] = btb_bank0_rd_data_way0_out[j];
690 28265884 : btb_bank0_rd_data_way1_p1_f[BTB_DWIDTH-1:0] = btb_bank0_rd_data_way1_out[j];
691 :
692 : end
693 : end
694 : end
695 : end // if (!pt.BTB_FULLYA)
696 :
697 :
698 :
699 :
700 :
701 : if(pt.BTB_FULLYA) begin : fa
702 :
703 : logic found1, hit0, hit1;
704 : logic btb_used_reset, write_used;
705 : logic [$clog2(pt.BTB_SIZE)-1:0] btb_fa_wr_addr0, hit0_index, hit1_index;
706 :
707 : logic [pt.BTB_SIZE-1:0] btb_tag_hit, btb_offset_0, btb_offset_1, btb_used_ns, btb_used,
708 : wr0_en, btb_upper_hit;
709 : logic [pt.BTB_SIZE-1:0][BTB_DWIDTH-1:0] btbdata;
710 :
711 : // Fully Associative tag hash uses bits 31:3. Bits 2:1 are the offset bits used for the 4 tag comp banks
712 : // Full tag used to speed up lookup. There is one 31:3 cmp per entry, and 4 2:1 cmps per entry.
713 :
714 : logic [FA_CMP_LOWER-1:1] ifc_fetch_addr_p1_f;
715 :
716 :
717 : assign ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1] = ifc_fetch_addr_f[FA_CMP_LOWER-1:1] + 1'b1;
718 :
719 : assign fetch_mp_collision_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == ifc_fetch_addr_f[31:1]) &
720 : exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
721 : assign fetch_mp_collision_p1_f = ( (exu_mp_btag[pt.BTB_BTAG_SIZE-1:0] == {ifc_fetch_addr_f[31:FA_CMP_LOWER], ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]}) &
722 : exu_mp_valid & ifc_fetch_req_f & ~exu_mp_pkt.way);
723 :
724 : always_comb begin
725 : btb_vbank0_rd_data_f = '0;
726 : btb_vbank1_rd_data_f = '0;
727 : // btb_tag_hit = '0;
728 : // btb_upper_hit = '0;
729 : // btb_offset_0 = '0;
730 : // btb_offset_1 = '0;
731 :
732 : found1 = 1'b0;
733 : hit0 = 1'b0;
734 : hit1 = 1'b0;
735 : hit0_index = '0;
736 : hit1_index = '0;
737 : btb_fa_wr_addr0 = '0;
738 :
739 : for(int i=0; i<pt.BTB_SIZE; i++) begin
740 : logic upper_hit, offset_0, offset_1;
741 :
742 : // Break the cmp into chunks for lower area.
743 : // Chunk1: FA 31:6 or 31:5 depending on icache line size
744 : // Chunk2: FA 5:1 or 4:1 depending on icache line size
745 : // btb_upper_hit[i] = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
746 : // btb_offset_0[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
747 : // btb_offset_1[i] = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & btb_upper_hit[i];
748 :
749 : upper_hit = (btbdata[i][BTB_DWIDTH_TOP:FA_TAG_END_UPPER] == ifc_fetch_addr_f[31:FA_CMP_LOWER]) & btbdata[i][0] & ~wr0_en[i];
750 : offset_0 = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_f[FA_CMP_LOWER-1:1]) & upper_hit;
751 : offset_1 = (btbdata[i][FA_TAG_START_LOWER:FA_TAG_END_LOWER] == ifc_fetch_addr_p1_f[FA_CMP_LOWER-1:1]) & upper_hit;
752 :
753 : if(~hit0) begin
754 : if(offset_0) begin
755 : hit0_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
756 : // hit unless we are also writing this entry at the same time
757 : hit0 = 1'b1;
758 : end
759 : end
760 : if(~hit1) begin
761 : if(offset_1) begin
762 : hit1_index[BTB_FA_INDEX:0] = (BTB_FA_INDEX+1)'(i);
763 : hit1 = 1'b1;
764 : end
765 : end
766 :
767 :
768 : // Mux out the 2 potential branches
769 : if(offset_0)
770 : btb_vbank0_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_f ? btb_wr_data : btbdata[i];
771 : if(offset_1)
772 : btb_vbank1_rd_data_f[BTB_DWIDTH-1:0] = fetch_mp_collision_p1_f ? btb_wr_data : btbdata[i];
773 :
774 : // find the first zero from bit zero in the used vector, this is the write address
775 : if(~found1 & ((exu_mp_valid_write & ~exu_mp_pkt.way) | dec_tlu_error_wb)) begin
776 : if(~btb_used[i]) begin
777 : btb_fa_wr_addr0[BTB_FA_INDEX:0] = i[BTB_FA_INDEX:0];
778 : found1 = 1'b1;
779 : end
780 : end
781 : end
782 : end // always_comb begin
783 :
784 : //`ifdef RV_ASSERT_ON
785 : // btbhitonehot0: assert #0 ($onehot0(btb_offset_0));
786 : // btbhitonehot1: assert #0 ($onehot0(btb_offset_1));
787 : //`endif
788 :
789 : assign vwayhit_f[1:0] = {hit1, hit0} & {eoc_mask, 1'b1};
790 :
791 : // way bit is reused as the predicted bit
792 : assign way_raw[1:0] = vwayhit_f[1:0] | {fetch_mp_collision_p1_f, fetch_mp_collision_f};
793 :
794 : for (j=0 ; j<pt.BTB_SIZE ; j++) begin : BTB_FAFLOPS
795 :
796 : assign wr0_en[j] = ((btb_fa_wr_addr0[BTB_FA_INDEX:0] == j) & (exu_mp_valid_write & ~exu_mp_pkt.way)) |
797 : ((dec_fa_error_index == j) & dec_tlu_error_wb);
798 :
799 : rvdffe #(BTB_DWIDTH) btb_fa (.*, .clk(clk),
800 : .en (wr0_en[j]),
801 : .din (btb_wr_data[BTB_DWIDTH-1:0]),
802 : .dout(btbdata[j]));
803 : end // block: BTB_FAFLOPS
804 :
805 : assign ifu_bp_fa_index_f[1] = hit1 ? hit1_index : '0;
806 : assign ifu_bp_fa_index_f[0] = hit0 ? hit0_index : '0;
807 :
808 : assign btb_used_reset = &btb_used[pt.BTB_SIZE-1:0];
809 : assign btb_used_ns[pt.BTB_SIZE-1:0] = ({pt.BTB_SIZE{vwayhit_f[1]}} & (32'b1 << hit1_index[BTB_FA_INDEX:0])) |
810 : ({pt.BTB_SIZE{vwayhit_f[0]}} & (32'b1 << hit0_index[BTB_FA_INDEX:0])) |
811 : ({pt.BTB_SIZE{exu_mp_valid_write & ~exu_mp_pkt.way & ~dec_tlu_error_wb}} & (32'b1 << btb_fa_wr_addr0[BTB_FA_INDEX:0])) |
812 : ({pt.BTB_SIZE{btb_used_reset}} & {pt.BTB_SIZE{1'b0}}) |
813 : ({pt.BTB_SIZE{~btb_used_reset & dec_tlu_error_wb}} & (btb_used[pt.BTB_SIZE-1:0] & ~(32'b1 << dec_fa_error_index[BTB_FA_INDEX:0]))) |
814 : (~{pt.BTB_SIZE{btb_used_reset | dec_tlu_error_wb}} & btb_used[pt.BTB_SIZE-1:0]);
815 :
816 : assign write_used = btb_used_reset | ifu_bp_hit_taken_f | exu_mp_valid_write | dec_tlu_error_wb;
817 :
818 :
819 : rvdffe #(pt.BTB_SIZE) btb_usedf (.*, .clk(clk),
820 : .en (write_used),
821 : .din (btb_used_ns[pt.BTB_SIZE-1:0]),
822 : .dout(btb_used[pt.BTB_SIZE-1:0]));
823 :
824 : end // block: fa
825 :
826 :
827 : //-----------------------------------------------------------------------------
828 : // BHT
829 : // 2 bit Entry -> direction, strength
830 : //
831 : //-----------------------------------------------------------------------------
832 :
833 : // logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0][1:0] bht_bank_wr_data ;
834 : logic [1:0] [pt.BHT_ARRAY_DEPTH-1:0] [1:0] bht_bank_rd_data_out ;
835 11506 : logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clken ;
836 0 : logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0] bht_bank_clk ;
837 : // logic [1:0] [(pt.BHT_ARRAY_DEPTH/NUM_BHT_LOOP)-1:0][NUM_BHT_LOOP-1:0] bht_bank_sel ;
838 :
839 : for ( i=0; i<2; i++) begin : BANKS
840 : wire[pt.BHT_ARRAY_DEPTH-1:0] wr0, wr1;
841 : assign wr0 = pt.BHT_ARRAY_DEPTH'(bht_wr_en0[i] << bht_wr_addr0);
842 : assign wr1 = pt.BHT_ARRAY_DEPTH'(bht_wr_en2[i] << bht_wr_addr2);
843 : for (genvar k=0 ; k < (pt.BHT_ARRAY_DEPTH)/NUM_BHT_LOOP ; k++) begin : BHT_CLK_GROUP
844 : assign bht_bank_clken[i][k] = (bht_wr_en0[i] & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) |
845 : (bht_wr_en2[i] & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH));
846 : `ifndef RV_FPGA_OPTIMIZE
847 : rvclkhdr bht_bank_grp_cgc ( .en(bht_bank_clken[i][k]), .l1clk(bht_bank_clk[i][k]), .* ); // ifndef RV_FPGA_OPTIMIZE
848 : `endif
849 :
850 : for (j=0 ; j<NUM_BHT_LOOP ; j++) begin : BHT_FLOPS
851 : wire[1:0] wdata;
852 : wire bank_sel = wr1[NUM_BHT_LOOP*k+j] | wr0[NUM_BHT_LOOP*k+j];
853 :
854 : // assign bht_bank_sel[i][k][j] = (bht_wr_en0[i] & (bht_wr_addr0[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr0[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) |
855 : // (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI :pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ;
856 :
857 : // assign bht_bank_wr_data[i][k][j] = (bht_wr_en2[i] & (bht_wr_addr2[NUM_BHT_LOOP_INNER_HI:pt.BHT_ADDR_LO] == j) & ((bht_wr_addr2[pt.BHT_ADDR_HI: NUM_BHT_LOOP_OUTER_LO]==k) | BHT_NO_ADDR_MATCH)) ? bht_wr_data2[1:0] :
858 : // bht_wr_data0[1:0] ;
859 : assign wdata = wr1[NUM_BHT_LOOP*k+j] ? bht_wr_data2[1:0] :bht_wr_data0;
860 :
861 :
862 :
863 : rvdffs_fpga #(2) bht_bank (.*,
864 : .clk (bht_bank_clk[i][k]),
865 : .en (bank_sel),
866 : .rawclk (clk),
867 : .clken (bank_sel),
868 : .din (wdata),
869 : .dout (bht_bank_rd_data_out[i][(16*k)+j]));
870 :
871 : end // block: BHT_FLOPS
872 : end // block: BHT_CLK_GROUP
873 : end // block: BANKS
874 :
875 339 : always_comb begin : BHT_rd_mux
876 339 : bht_bank0_rd_data_f[1:0] = '0 ;
877 339 : bht_bank1_rd_data_f[1:0] = '0 ;
878 339 : bht_bank0_rd_data_p1_f[1:0] = '0 ;
879 339 : for (int j=0; j< pt.BHT_ARRAY_DEPTH; j++) begin
880 28265884 : if (bht_rd_addr_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
881 28265884 : bht_bank0_rd_data_f[1:0] = bht_bank_rd_data_out[0][j];
882 28265884 : bht_bank1_rd_data_f[1:0] = bht_bank_rd_data_out[1][j];
883 : end
884 28265884 : if (bht_rd_addr_p1_f[pt.BHT_ADDR_HI:pt.BHT_ADDR_LO] == (pt.BHT_ADDR_HI-pt.BHT_ADDR_LO+1)'(j)) begin
885 28265884 : bht_bank0_rd_data_p1_f[1:0] = bht_bank_rd_data_out[0][j];
886 : end
887 : end
888 : end // block: BHT_rd_mux
889 :
890 :
891 339 : function [1:0] countones;
892 : input [1:0] valid;
893 :
894 339 : begin
895 :
896 339 : countones[1:0] = {1'b0, valid[1]} +
897 339 : {1'b0, valid[0]};
898 : end
899 : endfunction
900 : endmodule // el2_ifu_bp_ctl
901 :
|