Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : // Copyright 2020 Western Digital Corporation or its affiliates.
3 : //
4 : // Licensed under the Apache License, Version 2.0 (the "License");
5 : // you may not use this file except in compliance with the License.
6 : // You may obtain a copy of the License at
7 : //
8 : // http://www.apache.org/licenses/LICENSE-2.0
9 : //
10 : // Unless required by applicable law or agreed to in writing, software
11 : // distributed under the License is distributed on an "AS IS" BASIS,
12 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 : // See the License for the specific language governing permissions and
14 : // limitations under the License.
15 :
16 :
17 : module el2_exu_mul_ctl
18 : import el2_pkg::*;
19 : #(
20 : `include "el2_param.vh"
21 : )
22 : (
23 69840792 : input logic clk, // Top level clock
24 339 : input logic rst_l, // Reset
25 : // Excluding scan_mode from coverage as its usage is determined by the integrator of the VeeR core.
26 : /*verilator coverage_off*/
27 : input logic scan_mode, // Scan mode
28 : /*verilator coverage_on*/
29 :
30 0 : input el2_mul_pkt_t mul_p, // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result}
31 :
32 20188 : input logic [31:0] rs1_in, // A operand
33 22747 : input logic [31:0] rs2_in, // B operand
34 :
35 :
36 17528 : output logic [31:0] result_x // Result
37 : );
38 :
39 :
40 251048 : logic mul_x_enable;
41 251048 : logic bit_x_enable;
42 16868 : logic signed [32:0] rs1_ext_in;
43 12631 : logic signed [32:0] rs2_ext_in;
44 34802 : logic [65:0] prod_x;
45 16705 : logic low_x;
46 :
47 :
48 :
49 : // *** Start - BitManip ***
50 :
51 2770 : logic bitmanip_sel_d;
52 1428 : logic bitmanip_sel_x;
53 242 : logic [31:0] bitmanip_d;
54 224 : logic [31:0] bitmanip_x;
55 :
56 :
57 :
58 : // ZBE
59 0 : logic ap_bcompress;
60 0 : logic ap_bdecompress;
61 :
62 : // ZBC
63 852 : logic ap_clmul;
64 888 : logic ap_clmulh;
65 1030 : logic ap_clmulr;
66 :
67 : // ZBP
68 0 : logic ap_grev;
69 0 : logic ap_gorc;
70 0 : logic ap_shfl;
71 0 : logic ap_unshfl;
72 0 : logic ap_xperm_n;
73 0 : logic ap_xperm_b;
74 0 : logic ap_xperm_h;
75 :
76 : // ZBR
77 0 : logic ap_crc32_b;
78 0 : logic ap_crc32_h;
79 0 : logic ap_crc32_w;
80 0 : logic ap_crc32c_b;
81 0 : logic ap_crc32c_h;
82 0 : logic ap_crc32c_w;
83 :
84 : // ZBF
85 0 : logic ap_bfp;
86 :
87 :
88 : if (pt.BITMANIP_ZBE == 1)
89 : begin
90 : assign ap_bcompress = mul_p.bcompress;
91 : assign ap_bdecompress = mul_p.bdecompress;
92 : end
93 : else
94 : begin
95 : assign ap_bcompress = 1'b0;
96 : assign ap_bdecompress = 1'b0;
97 : end
98 :
99 : if (pt.BITMANIP_ZBC == 1)
100 : begin
101 : assign ap_clmul = mul_p.clmul;
102 : assign ap_clmulh = mul_p.clmulh;
103 : assign ap_clmulr = mul_p.clmulr;
104 : end
105 : else
106 : begin
107 : assign ap_clmul = 1'b0;
108 : assign ap_clmulh = 1'b0;
109 : assign ap_clmulr = 1'b0;
110 : end
111 :
112 : if (pt.BITMANIP_ZBP == 1)
113 : begin
114 : assign ap_grev = mul_p.grev;
115 : assign ap_gorc = mul_p.gorc;
116 : assign ap_shfl = mul_p.shfl;
117 : assign ap_unshfl = mul_p.unshfl;
118 : assign ap_xperm_n = mul_p.xperm_n;
119 : assign ap_xperm_b = mul_p.xperm_b;
120 : assign ap_xperm_h = mul_p.xperm_h;
121 : end
122 : else
123 : begin
124 : assign ap_grev = 1'b0;
125 : assign ap_gorc = 1'b0;
126 : assign ap_shfl = 1'b0;
127 : assign ap_unshfl = 1'b0;
128 : assign ap_xperm_n = 1'b0;
129 : assign ap_xperm_b = 1'b0;
130 : assign ap_xperm_h = 1'b0;
131 : end
132 :
133 : if (pt.BITMANIP_ZBR == 1)
134 : begin
135 : assign ap_crc32_b = mul_p.crc32_b;
136 : assign ap_crc32_h = mul_p.crc32_h;
137 : assign ap_crc32_w = mul_p.crc32_w;
138 : assign ap_crc32c_b = mul_p.crc32c_b;
139 : assign ap_crc32c_h = mul_p.crc32c_h;
140 : assign ap_crc32c_w = mul_p.crc32c_w;
141 : end
142 : else
143 : begin
144 : assign ap_crc32_b = 1'b0;
145 : assign ap_crc32_h = 1'b0;
146 : assign ap_crc32_w = 1'b0;
147 : assign ap_crc32c_b = 1'b0;
148 : assign ap_crc32c_h = 1'b0;
149 : assign ap_crc32c_w = 1'b0;
150 : end
151 :
152 : if (pt.BITMANIP_ZBF == 1)
153 : begin
154 : assign ap_bfp = mul_p.bfp;
155 : end
156 : else
157 : begin
158 : assign ap_bfp = 1'b0;
159 : end
160 :
161 :
162 : // *** End - BitManip ***
163 :
164 :
165 :
166 : assign mul_x_enable = mul_p.valid;
167 : assign bit_x_enable = mul_p.valid;
168 :
169 : assign rs1_ext_in[32] = mul_p.rs1_sign & rs1_in[31];
170 : assign rs2_ext_in[32] = mul_p.rs2_sign & rs2_in[31];
171 :
172 : assign rs1_ext_in[31:0] = rs1_in[31:0];
173 : assign rs2_ext_in[31:0] = rs2_in[31:0];
174 :
175 :
176 :
177 : // --------------------------- Multiply ----------------------------------
178 :
179 :
180 12410 : logic signed [32:0] rs1_x;
181 11050 : logic signed [32:0] rs2_x;
182 :
183 : rvdffe #(34) i_a_x_ff (.*, .clk(clk), .din({mul_p.low,rs1_ext_in[32:0]}), .dout({low_x,rs1_x[32:0]}), .en(mul_x_enable));
184 : rvdffe #(33) i_b_x_ff (.*, .clk(clk), .din( rs2_ext_in[32:0] ), .dout( rs2_x[32:0] ), .en(mul_x_enable));
185 :
186 :
187 : assign prod_x[65:0] = rs1_x * rs2_x;
188 :
189 :
190 :
191 :
192 : // * * * * * * * * * * * * * * * * * * BitManip : BCOMPRESS, BDECOMPRESS * * * * * * * * * * * * *
193 :
194 :
195 : // *** BCOMPRESS == "gather" ***
196 :
197 1362 : logic [31:0] bcompress_d;
198 41861 : logic bcompress_test_bit_d;
199 : integer bcompress_i, bcompress_j;
200 :
201 :
202 340 : always_comb
203 340 : begin
204 :
205 340 : bcompress_j = 0;
206 340 : bcompress_test_bit_d = 1'b0;
207 340 : bcompress_d[31:0] = 32'b0;
208 :
209 340 : for (bcompress_i=0; bcompress_i<32; bcompress_i++)
210 10880 : begin
211 10880 : bcompress_test_bit_d = rs2_in[bcompress_i];
212 938946400 : if (bcompress_test_bit_d)
213 1873760 : begin
214 1873760 : bcompress_d[bcompress_j] = rs1_in[bcompress_i];
215 1873760 : bcompress_j = bcompress_j + 1;
216 : end // IF bcompress_test_bit
217 : end // FOR bcompress_i
218 : end // ALWAYS_COMB
219 :
220 :
221 :
222 : // *** BDECOMPRESS == "scatter" ***
223 :
224 7191 : logic [31:0] bdecompress_d;
225 41861 : logic bdecompress_test_bit_d;
226 : integer bdecompress_i, bdecompress_j;
227 :
228 :
229 340 : always_comb
230 340 : begin
231 :
232 340 : bdecompress_j = 0;
233 340 : bdecompress_test_bit_d = 1'b0;
234 340 : bdecompress_d[31:0] = 32'b0;
235 :
236 340 : for (bdecompress_i=0; bdecompress_i<32; bdecompress_i++)
237 10880 : begin
238 10880 : bdecompress_test_bit_d = rs2_in[bdecompress_i];
239 938946400 : if (bdecompress_test_bit_d)
240 1873760 : begin
241 1873760 : bdecompress_d[bdecompress_i] = rs1_in[bdecompress_j];
242 1873760 : bdecompress_j = bdecompress_j + 1;
243 : end // IF bdecompress_test_bit
244 : end // FOR bdecompress_i
245 : end // ALWAYS_COMB
246 :
247 :
248 :
249 :
250 : // * * * * * * * * * * * * * * * * * * BitManip : CLMUL, CLMULH, CLMULR * * * * * * * * * * * * *
251 :
252 57611 : logic [62:0] clmul_raw_d;
253 :
254 :
255 : assign clmul_raw_d[62:0] = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0] } ) ^
256 : ( {63{rs2_in[01]}} & {30'b0,rs1_in[31:0], 1'b0} ) ^
257 : ( {63{rs2_in[02]}} & {29'b0,rs1_in[31:0], 2'b0} ) ^
258 : ( {63{rs2_in[03]}} & {28'b0,rs1_in[31:0], 3'b0} ) ^
259 : ( {63{rs2_in[04]}} & {27'b0,rs1_in[31:0], 4'b0} ) ^
260 : ( {63{rs2_in[05]}} & {26'b0,rs1_in[31:0], 5'b0} ) ^
261 : ( {63{rs2_in[06]}} & {25'b0,rs1_in[31:0], 6'b0} ) ^
262 : ( {63{rs2_in[07]}} & {24'b0,rs1_in[31:0], 7'b0} ) ^
263 : ( {63{rs2_in[08]}} & {23'b0,rs1_in[31:0], 8'b0} ) ^
264 : ( {63{rs2_in[09]}} & {22'b0,rs1_in[31:0], 9'b0} ) ^
265 : ( {63{rs2_in[10]}} & {21'b0,rs1_in[31:0],10'b0} ) ^
266 : ( {63{rs2_in[11]}} & {20'b0,rs1_in[31:0],11'b0} ) ^
267 : ( {63{rs2_in[12]}} & {19'b0,rs1_in[31:0],12'b0} ) ^
268 : ( {63{rs2_in[13]}} & {18'b0,rs1_in[31:0],13'b0} ) ^
269 : ( {63{rs2_in[14]}} & {17'b0,rs1_in[31:0],14'b0} ) ^
270 : ( {63{rs2_in[15]}} & {16'b0,rs1_in[31:0],15'b0} ) ^
271 : ( {63{rs2_in[16]}} & {15'b0,rs1_in[31:0],16'b0} ) ^
272 : ( {63{rs2_in[17]}} & {14'b0,rs1_in[31:0],17'b0} ) ^
273 : ( {63{rs2_in[18]}} & {13'b0,rs1_in[31:0],18'b0} ) ^
274 : ( {63{rs2_in[19]}} & {12'b0,rs1_in[31:0],19'b0} ) ^
275 : ( {63{rs2_in[20]}} & {11'b0,rs1_in[31:0],20'b0} ) ^
276 : ( {63{rs2_in[21]}} & {10'b0,rs1_in[31:0],21'b0} ) ^
277 : ( {63{rs2_in[22]}} & { 9'b0,rs1_in[31:0],22'b0} ) ^
278 : ( {63{rs2_in[23]}} & { 8'b0,rs1_in[31:0],23'b0} ) ^
279 : ( {63{rs2_in[24]}} & { 7'b0,rs1_in[31:0],24'b0} ) ^
280 : ( {63{rs2_in[25]}} & { 6'b0,rs1_in[31:0],25'b0} ) ^
281 : ( {63{rs2_in[26]}} & { 5'b0,rs1_in[31:0],26'b0} ) ^
282 : ( {63{rs2_in[27]}} & { 4'b0,rs1_in[31:0],27'b0} ) ^
283 : ( {63{rs2_in[28]}} & { 3'b0,rs1_in[31:0],28'b0} ) ^
284 : ( {63{rs2_in[29]}} & { 2'b0,rs1_in[31:0],29'b0} ) ^
285 : ( {63{rs2_in[30]}} & { 1'b0,rs1_in[31:0],30'b0} ) ^
286 : ( {63{rs2_in[31]}} & { rs1_in[31:0],31'b0} );
287 :
288 :
289 :
290 :
291 : // * * * * * * * * * * * * * * * * * * BitManip : GREV * * * * * * * * * * * * * * * * * *
292 :
293 : // uint32_t grev32(uint32_t rs1, uint32_t rs2)
294 : // {
295 : // uint32_t x = rs1;
296 : // int shamt = rs2 & 31;
297 : //
298 : // if (shamt & 1) x = ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1);
299 : // if (shamt & 2) x = ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2);
300 : // if (shamt & 4) x = ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4);
301 : // if (shamt & 8) x = ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8);
302 : // if (shamt & 16) x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
303 : //
304 : // return x;
305 : // }
306 :
307 :
308 29061 : logic [31:0] grev1_d;
309 26044 : logic [31:0] grev2_d;
310 24671 : logic [31:0] grev4_d;
311 46661 : logic [31:0] grev8_d;
312 35104 : logic [31:0] grev_d;
313 :
314 :
315 : assign grev1_d[31:0] = (rs2_in[0]) ? {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
316 : rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
317 : rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
318 : rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} : rs1_in[31:0];
319 :
320 : assign grev2_d[31:0] = (rs2_in[1]) ? {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26],
321 : grev1_d[21:20],grev1_d[23:22],grev1_d[17:16],grev1_d[19:18],
322 : grev1_d[13:12],grev1_d[15:14],grev1_d[09:08],grev1_d[11:10],
323 : grev1_d[05:04],grev1_d[07:06],grev1_d[01:00],grev1_d[03:02]} : grev1_d[31:0];
324 :
325 : assign grev4_d[31:0] = (rs2_in[2]) ? {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20],
326 : grev2_d[11:08],grev2_d[15:12],grev2_d[03:00],grev2_d[07:04]} : grev2_d[31:0];
327 :
328 : assign grev8_d[31:0] = (rs2_in[3]) ? {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]} : grev4_d[31:0];
329 :
330 : assign grev_d[31:0] = (rs2_in[4]) ? {grev8_d[15:00],grev8_d[31:16]} : grev8_d[31:0];
331 :
332 :
333 :
334 :
335 : // * * * * * * * * * * * * * * * * * * BitManip : GORC * * * * * * * * * * * * * * * * * *
336 :
337 : // uint32_t gorc32(uint32_t rs1, uint32_t rs2)
338 : // {
339 : // uint32_t x = rs1;
340 : // int shamt = rs2 & 31;
341 : //
342 : // if (shamt & 1) x |= ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1);
343 : // if (shamt & 2) x |= ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2);
344 : // if (shamt & 4) x |= ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4);
345 : // if (shamt & 8) x |= ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8);
346 : // if (shamt & 16) x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
347 : //
348 : // return x;
349 : // }
350 :
351 :
352 31102 : logic [31:0] gorc1_d;
353 32894 : logic [31:0] gorc2_d;
354 35793 : logic [31:0] gorc4_d;
355 80845 : logic [31:0] gorc8_d;
356 52895 : logic [31:0] gorc_d;
357 :
358 :
359 : assign gorc1_d[31:0] = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
360 : rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
361 : rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
362 : rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} ) | rs1_in[31:0];
363 :
364 : assign gorc2_d[31:0] = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26],
365 : gorc1_d[21:20],gorc1_d[23:22],gorc1_d[17:16],gorc1_d[19:18],
366 : gorc1_d[13:12],gorc1_d[15:14],gorc1_d[09:08],gorc1_d[11:10],
367 : gorc1_d[05:04],gorc1_d[07:06],gorc1_d[01:00],gorc1_d[03:02]} ) | gorc1_d[31:0];
368 :
369 : assign gorc4_d[31:0] = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20],
370 : gorc2_d[11:08],gorc2_d[15:12],gorc2_d[03:00],gorc2_d[07:04]} ) | gorc2_d[31:0];
371 :
372 : assign gorc8_d[31:0] = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0];
373 :
374 : assign gorc_d[31:0] = ( {32{rs2_in[4]}} & {gorc8_d[15:00],gorc8_d[31:16]} ) | gorc8_d[31:0];
375 :
376 :
377 :
378 :
379 : // * * * * * * * * * * * * * * * * * * BitManip : SHFL, UNSHLF * * * * * * * * * * * * * * * * * *
380 :
381 : // uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N)
382 : // {
383 : // uint32_t x = src & ~(maskL | maskR);
384 : // x |= ((src << N) & maskL) | ((src >> N) & maskR);
385 : // return x;
386 : // }
387 : //
388 : //
389 : //
390 : // uint32_t shfl32(uint32_t rs1, uint32_t rs2)
391 : // {
392 : // uint32_t x = rs1;
393 : // int shamt = rs2 & 15
394 : //
395 : // if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
396 : // if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
397 : // if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
398 : // if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
399 : //
400 : // return x;
401 : // }
402 :
403 :
404 20634 : logic [31:0] shfl8_d;
405 37767 : logic [31:0] shfl4_d;
406 37878 : logic [31:0] shfl2_d;
407 35804 : logic [31:0] shfl_d;
408 :
409 :
410 :
411 : assign shfl8_d[31:0] = (rs2_in[3]) ? {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]} : rs1_in[31:0];
412 :
413 : assign shfl4_d[31:0] = (rs2_in[2]) ? {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16],
414 : shfl8_d[15:12],shfl8_d[07:04],shfl8_d[11:08],shfl8_d[03:00]} : shfl8_d[31:0];
415 :
416 : assign shfl2_d[31:0] = (rs2_in[1]) ? {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24],
417 : shfl4_d[23:22],shfl4_d[19:18],shfl4_d[21:20],shfl4_d[17:16],
418 : shfl4_d[15:14],shfl4_d[11:10],shfl4_d[13:12],shfl4_d[09:08],
419 : shfl4_d[07:06],shfl4_d[03:02],shfl4_d[05:04],shfl4_d[01:00]} : shfl4_d[31:0];
420 :
421 : assign shfl_d[31:0] = (rs2_in[0]) ? {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24],
422 : shfl2_d[23],shfl2_d[21],shfl2_d[22],shfl2_d[20],shfl2_d[19],shfl2_d[17],shfl2_d[18],shfl2_d[16],
423 : shfl2_d[15],shfl2_d[13],shfl2_d[14],shfl2_d[12],shfl2_d[11],shfl2_d[09],shfl2_d[10],shfl2_d[08],
424 : shfl2_d[07],shfl2_d[05],shfl2_d[06],shfl2_d[04],shfl2_d[03],shfl2_d[01],shfl2_d[02],shfl2_d[00]} : shfl2_d[31:0];
425 :
426 :
427 :
428 :
429 : // uint32_t unshfl32(uint32_t rs1, uint32_t rs2)
430 : // {
431 : // uint32_t x = rs1;
432 : // int shamt = rs2 & 15
433 : //
434 : // if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
435 : // if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
436 : // if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
437 : // if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
438 : //
439 : // return x;
440 : // }
441 :
442 :
443 20262 : logic [31:0] unshfl1_d;
444 20450 : logic [31:0] unshfl2_d;
445 39731 : logic [31:0] unshfl4_d;
446 32020 : logic [31:0] unshfl_d;
447 :
448 :
449 : assign unshfl1_d[31:0] = (rs2_in[0]) ? {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24],
450 : rs1_in[23],rs1_in[21],rs1_in[22],rs1_in[20],rs1_in[19],rs1_in[17],rs1_in[18],rs1_in[16],
451 : rs1_in[15],rs1_in[13],rs1_in[14],rs1_in[12],rs1_in[11],rs1_in[09],rs1_in[10],rs1_in[08],
452 : rs1_in[07],rs1_in[05],rs1_in[06],rs1_in[04],rs1_in[03],rs1_in[01],rs1_in[02],rs1_in[00]} : rs1_in[31:0];
453 :
454 : assign unshfl2_d[31:0] = (rs2_in[1]) ? {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24],
455 : unshfl1_d[23:22],unshfl1_d[19:18],unshfl1_d[21:20],unshfl1_d[17:16],
456 : unshfl1_d[15:14],unshfl1_d[11:10],unshfl1_d[13:12],unshfl1_d[09:08],
457 : unshfl1_d[07:06],unshfl1_d[03:02],unshfl1_d[05:04],unshfl1_d[01:00]} : unshfl1_d[31:0];
458 :
459 : assign unshfl4_d[31:0] = (rs2_in[2]) ? {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16],
460 : unshfl2_d[15:12],unshfl2_d[07:04],unshfl2_d[11:08],unshfl2_d[03:00]} : unshfl2_d[31:0];
461 :
462 : assign unshfl_d[31:0] = (rs2_in[3]) ? {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]} : unshfl4_d[31:0];
463 :
464 :
465 :
466 :
467 : // * * * * * * * * * * * * * * * * * * BitManip : XPERM * * * * * * * * * * * * * * * * *
468 :
469 : //
470 : // These instructions operate on nibbles/bytes/half-words/words.
471 : // rs1 is a vector of data words and rs2 is a vector of indices into rs1.
472 : // The result of the instruction is the vector rs2 with each element replaced by the corresponding data word from rs1,
473 : // or zero then the index in rs2 is out of bounds.
474 : //
475 : // uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2)
476 : // {
477 : // uint_xlen_t r = 0;
478 : // uint_xlen_t sz = 1LL << sz_log2;
479 : // uint_xlen_t mask = (1LL << sz) - 1;
480 : // for (int i = 0; i < XLEN; i += sz)
481 : // { uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2;
482 : // if (pos < XLEN)
483 : // r |= ((rs1 >> pos) & mask) << i;
484 : // }
485 : // return r;
486 : // }
487 : //
488 : // uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); }
489 : // uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); }
490 : // uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); }
491 : // uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); } Not part of RV32
492 : //
493 : // The xperm.[nbhw] instructions can be implemented with an XLEN/4-lane nibble-wide crossbarswitch.
494 :
495 : // *** XPERM_B ***
496 :
497 : // XLEN = 32
498 : // SZ_LOG2 = 3
499 : // SZ = 4'd8;
500 : // MASK = ( 1 << 8 ) - 1
501 : // = 8'hFF
502 :
503 : // integer xperm_b_i;
504 : // logic [31:0] xperm_b_r;
505 : // logic [3:0] xperm_b_sz;
506 : // logic [7:0] xperm_b_mask;
507 : // logic [31:0] xperm_b_pos;
508 : //
509 : //
510 : // assign xperm_b_sz[3:0] = 4'd8;
511 : // assign xperm_b_mask[7:0] = 8'hff;
512 : //
513 : // always_comb
514 : // begin
515 : // xperm_b_r[31:0] = 32'b0;
516 : //
517 : // for (xperm_b_i=0; xperm_b_i<32; xperm_b_i = xperm_b_i + xperm_b_sz) // This code did not work...
518 : // begin
519 : // xperm_b_pos[31:0] = ( (rs2_in[31:0] >> xperm_b_i) & {24'h0,xperm_b_mask[7:0]} ) << 3;
520 : // if (xperm_b_pos[31:0] < 32'd32)
521 : // xperm_b_r[31:0] = xperm_b_r[31:0] | ( ((rs1_in[31:0] >> xperm_b_pos[4:0]) & {24'h0,xperm_b_mask[7:0]}) << xperm_b_i );
522 : // end
523 : // end
524 :
525 20236 : logic [31:0] xperm_n;
526 59290 : logic [31:0] xperm_b;
527 7230 : logic [31:0] xperm_h;
528 :
529 : assign xperm_n[03:00] = { 4{ ~rs2_in[03] }} & 4'( (rs1_in[31:0] >> {rs2_in[02:00],2'b0}) & 4'hf ); // This is a 8:1 mux with qualified selects
530 : assign xperm_n[07:04] = { 4{ ~rs2_in[07] }} & 4'( (rs1_in[31:0] >> {rs2_in[06:04],2'b0}) & 4'hf );
531 : assign xperm_n[11:08] = { 4{ ~rs2_in[11] }} & 4'( (rs1_in[31:0] >> {rs2_in[10:08],2'b0}) & 4'hf );
532 : assign xperm_n[15:12] = { 4{ ~rs2_in[15] }} & 4'( (rs1_in[31:0] >> {rs2_in[14:12],2'b0}) & 4'hf );
533 : assign xperm_n[19:16] = { 4{ ~rs2_in[19] }} & 4'( (rs1_in[31:0] >> {rs2_in[18:16],2'b0}) & 4'hf );
534 : assign xperm_n[23:20] = { 4{ ~rs2_in[23] }} & 4'( (rs1_in[31:0] >> {rs2_in[22:20],2'b0}) & 4'hf );
535 : assign xperm_n[27:24] = { 4{ ~rs2_in[27] }} & 4'( (rs1_in[31:0] >> {rs2_in[26:24],2'b0}) & 4'hf );
536 : assign xperm_n[31:28] = { 4{ ~rs2_in[31] }} & 4'( (rs1_in[31:0] >> {rs2_in[30:28],2'b0}) & 4'hf );
537 :
538 : assign xperm_b[07:00] = { 8{ ~(| rs2_in[07:02]) }} & 8'( (rs1_in[31:0] >> {rs2_in[01:00],3'b0}) & 8'hff ); // This is a 4:1 mux with qualified selects
539 : assign xperm_b[15:08] = { 8{ ~(| rs2_in[15:10]) }} & 8'( (rs1_in[31:0] >> {rs2_in[09:08],3'b0}) & 8'hff );
540 : assign xperm_b[23:16] = { 8{ ~(| rs2_in[23:18]) }} & 8'( (rs1_in[31:0] >> {rs2_in[17:16],3'b0}) & 8'hff );
541 : assign xperm_b[31:24] = { 8{ ~(| rs2_in[31:26]) }} & 8'( (rs1_in[31:0] >> {rs2_in[25:24],3'b0}) & 8'hff );
542 :
543 : assign xperm_h[15:00] = {16{ ~(| rs2_in[15:01]) }} & 16'( (rs1_in[31:0] >> {rs2_in[00] ,4'b0}) & 16'hffff ); // This is a 2:1 mux with qualified selects
544 : assign xperm_h[31:16] = {16{ ~(| rs2_in[31:17]) }} & 16'( (rs1_in[31:0] >> {rs2_in[16] ,4'b0}) & 16'hffff );
545 :
546 :
547 :
548 :
549 : // * * * * * * * * * * * * * * * * * * BitManip : CRC32, CRC32c * * * * * * * * * * * * * * * * *
550 :
551 : // *** computed from https: //crccalc.com ***
552 : //
553 : // "a" is 8'h61 = 8'b0110_0001 (8'h61 ^ 8'hff = 8'h9e)
554 : //
555 : // Input must first be XORed with 32'hffff_ffff
556 : //
557 : //
558 : // CRC32
559 : //
560 : // Input Output Input Output
561 : // ----- -------- -------- --------
562 : // "a" e8b7be43 ffffff9e 174841bc
563 : // "aa" 078a19d7 ffff9e9e f875e628
564 : // "aaaa" ad98e545 9e9e9e9e 5267a1ba
565 : //
566 : //
567 : //
568 : // CRC32c
569 : //
570 : // Input Output Input Output
571 : // ----- -------- -------- --------
572 : // "a" c1d04330 ffffff9e 3e2fbccf
573 : // "aa" f1f2dac2 ffff9e9e 0e0d253d
574 : // "aaaa" 6a52eeb0 9e9e9e9e 95ad114f
575 :
576 :
577 0 : logic crc32_all;
578 340 : logic [31:0] crc32_poly_rev;
579 340 : logic [31:0] crc32c_poly_rev;
580 : integer crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi;
581 47786 : logic [31:0] crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd;
582 :
583 :
584 : assign crc32_all = ap_crc32_b | ap_crc32_h | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w;
585 :
586 : assign crc32_poly_rev[31:0] = 32'hEDB88320; // bit reverse of 32'h04C11DB7
587 : assign crc32c_poly_rev[31:0] = 32'h82F63B78; // bit reverse of 32'h1EDC6F41
588 :
589 :
590 340 : always_comb
591 340 : begin
592 340 : crc32_bd[31:0] = rs1_in[31:0];
593 :
594 340 : for (crc32_bi=0; crc32_bi<8; crc32_bi++)
595 2720 : begin
596 2720 : crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}});
597 : end // FOR crc32_bi
598 : end // ALWAYS_COMB
599 :
600 :
601 340 : always_comb
602 340 : begin
603 340 : crc32_hd[31:0] = rs1_in[31:0];
604 :
605 340 : for (crc32_hi=0; crc32_hi<16; crc32_hi++)
606 5440 : begin
607 5440 : crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}});
608 : end // FOR crc32_hi
609 : end // ALWAYS_COMB
610 :
611 :
612 340 : always_comb
613 340 : begin
614 340 : crc32_wd[31:0] = rs1_in[31:0];
615 :
616 340 : for (crc32_wi=0; crc32_wi<32; crc32_wi++)
617 10880 : begin
618 10880 : crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}});
619 : end // FOR crc32_wi
620 : end // ALWAYS_COMB
621 :
622 :
623 :
624 :
625 340 : always_comb
626 340 : begin
627 340 : crc32c_bd[31:0] = rs1_in[31:0];
628 :
629 340 : for (crc32c_bi=0; crc32c_bi<8; crc32c_bi++)
630 2720 : begin
631 2720 : crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}});
632 : end // FOR crc32c_bi
633 : end // ALWAYS_COMB
634 :
635 :
636 340 : always_comb
637 340 : begin
638 340 : crc32c_hd[31:0] = rs1_in[31:0];
639 :
640 340 : for (crc32c_hi=0; crc32c_hi<16; crc32c_hi++)
641 5440 : begin
642 5440 : crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}});
643 : end // FOR crc32c_hi
644 : end // ALWAYS_COMB
645 :
646 :
647 340 : always_comb
648 340 : begin
649 340 : crc32c_wd[31:0] = rs1_in[31:0];
650 :
651 340 : for (crc32c_wi=0; crc32c_wi<32; crc32c_wi++)
652 10880 : begin
653 10880 : crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}});
654 : end // FOR crc32c_wi
655 : end // ALWAYS_COMB
656 :
657 :
658 :
659 :
660 :
661 : // * * * * * * * * * * * * * * * * * * BitManip : BFP * * * * * * * * * * * * * * * * * *
662 :
663 :
664 : // uint_xlen_t bfp(uint_xlen_t rs1, uint_xlen_t rs2)
665 : // {
666 : // uint_xlen_t cfg = rs2 >> (XLEN/2);
667 : // if ((cfg >> 30) == 2) cfg = cfg >> 16;
668 : // int len = (cfg >> 8) & (XLEN/2-1);
669 : // int off = cfg & (XLEN-1);
670 : // len = len ? len : XLEN/2;
671 : // uint_xlen_t mask = slo(0, len) << off;
672 : // uint_xlen_t data = rs2 << off;
673 : // return (data & mask) | (rs1 & ~mask);
674 :
675 :
676 27363 : logic [4:0] bfp_len;
677 27751 : logic [4:0] bfp_off;
678 276 : logic [31:0] bfp_len_mask_;
679 340 : logic [31:0] bfp_off_mask_;
680 25762 : logic [15:0] bfp_preshift_data;
681 4976 : logic [31:0] bfp_shift_data;
682 10408 : logic [31:0] bfp_shift_mask;
683 19594 : logic [31:0] bfp_result_d;
684 :
685 :
686 : assign bfp_len[3:0] = rs2_in[27:24];
687 : assign bfp_len[4] = (bfp_len[3:0] == 4'b0); // If LEN field is zero, then LEN=16
688 : assign bfp_off[4:0] = rs2_in[20:16];
689 :
690 : assign bfp_len_mask_[31:0] = 32'hffff_ffff << bfp_len[4:0];
691 : assign bfp_off_mask_[31:0] = 32'hffff_ffff << bfp_off[4:0];
692 : assign bfp_preshift_data[15:0]= rs2_in[15:0] & ~bfp_len_mask_[15:0];
693 :
694 : assign bfp_shift_data[31:0] = {16'b0,bfp_preshift_data[15:0]} << bfp_off[4:0];
695 : assign bfp_shift_mask[31:0] = (bfp_len_mask_[31:0] << bfp_off[4:0]) | ~bfp_off_mask_[31:0];
696 :
697 : assign bfp_result_d[31:0] = bfp_shift_data[31:0] | (rs1_in[31:0] & bfp_shift_mask[31:0]);
698 :
699 :
700 :
701 :
702 : // * * * * * * * * * * * * * * * * * * BitManip : Common logic * * * * * * * * * * * * * * * * * *
703 :
704 :
705 : assign bitmanip_sel_d = ap_bcompress | ap_bdecompress | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp | ap_xperm_n | ap_xperm_b | ap_xperm_h;
706 :
707 : assign bitmanip_d[31:0] = ( {32{ap_bcompress}} & bcompress_d[31:0] ) |
708 : ( {32{ap_bdecompress}} & bdecompress_d[31:0] ) |
709 : ( {32{ap_clmul}} & clmul_raw_d[31:0] ) |
710 : ( {32{ap_clmulh}} & {1'b0,clmul_raw_d[62:32]} ) |
711 : ( {32{ap_clmulr}} & clmul_raw_d[62:31] ) |
712 : ( {32{ap_grev}} & grev_d[31:0] ) |
713 : ( {32{ap_gorc}} & gorc_d[31:0] ) |
714 : ( {32{ap_shfl}} & shfl_d[31:0] ) |
715 : ( {32{ap_unshfl}} & unshfl_d[31:0] ) |
716 : ( {32{ap_crc32_b}} & crc32_bd[31:0] ) |
717 : ( {32{ap_crc32_h}} & crc32_hd[31:0] ) |
718 : ( {32{ap_crc32_w}} & crc32_wd[31:0] ) |
719 : ( {32{ap_crc32c_b}} & crc32c_bd[31:0] ) |
720 : ( {32{ap_crc32c_h}} & crc32c_hd[31:0] ) |
721 : ( {32{ap_crc32c_w}} & crc32c_wd[31:0] ) |
722 : ( {32{ap_bfp}} & bfp_result_d[31:0] ) |
723 : ( {32{ap_xperm_n}} & xperm_n[31:0] ) |
724 : ( {32{ap_xperm_b}} & xperm_b[31:0] ) |
725 : ( {32{ap_xperm_h}} & xperm_h[31:0] );
726 :
727 :
728 :
729 : rvdffe #(33) i_bitmanip_ff (.*, .clk(clk), .din({bitmanip_sel_d,bitmanip_d[31:0]}), .dout({bitmanip_sel_x,bitmanip_x[31:0]}), .en(bit_x_enable));
730 :
731 :
732 :
733 :
734 : assign result_x[31:0] = ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32] ) |
735 : ( {32{~bitmanip_sel_x & low_x}} & prod_x[31:0] ) |
736 : bitmanip_x[31:0];
737 :
738 :
739 :
740 : endmodule // el2_exu_mul_ctl
|