Line data Source code
1 : // SPDX-License-Identifier: Apache-2.0
2 : // Copyright 2020 Western Digital Corporation or its affiliates.
3 : //
4 : // Licensed under the Apache License, Version 2.0 (the "License");
5 : // you may not use this file except in compliance with the License.
6 : // You may obtain a copy of the License at
7 : //
8 : // http://www.apache.org/licenses/LICENSE-2.0
9 : //
10 : // Unless required by applicable law or agreed to in writing, software
11 : // distributed under the License is distributed on an "AS IS" BASIS,
12 : // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 : // See the License for the specific language governing permissions and
14 : // limitations under the License.
15 :
16 :
17 : module el2_exu_mul_ctl
18 : import el2_pkg::*;
19 : #(
20 : `include "el2_param.vh"
21 : )
22 : (
23 61843973 : input logic clk, // Top level clock
24 317 : input logic rst_l, // Reset
25 0 : input logic scan_mode, // Scan mode
26 :
27 0 : input el2_mul_pkt_t mul_p, // {Valid, RS1 signed operand, RS2 signed operand, Select low 32-bits of result}
28 :
29 20188 : input logic [31:0] rs1_in, // A operand
30 22747 : input logic [31:0] rs2_in, // B operand
31 :
32 :
33 17528 : output logic [31:0] result_x // Result
34 : );
35 :
36 :
37 251048 : logic mul_x_enable;
38 251048 : logic bit_x_enable;
39 16868 : logic signed [32:0] rs1_ext_in;
40 12631 : logic signed [32:0] rs2_ext_in;
41 34802 : logic [65:0] prod_x;
42 16705 : logic low_x;
43 :
44 :
45 :
46 : // *** Start - BitManip ***
47 :
48 2770 : logic bitmanip_sel_d;
49 1428 : logic bitmanip_sel_x;
50 242 : logic [31:0] bitmanip_d;
51 224 : logic [31:0] bitmanip_x;
52 :
53 :
54 :
55 : // ZBE
56 0 : logic ap_bcompress;
57 0 : logic ap_bdecompress;
58 :
59 : // ZBC
60 852 : logic ap_clmul;
61 888 : logic ap_clmulh;
62 1030 : logic ap_clmulr;
63 :
64 : // ZBP
65 0 : logic ap_grev;
66 0 : logic ap_gorc;
67 0 : logic ap_shfl;
68 0 : logic ap_unshfl;
69 0 : logic ap_xperm_n;
70 0 : logic ap_xperm_b;
71 0 : logic ap_xperm_h;
72 :
73 : // ZBR
74 0 : logic ap_crc32_b;
75 0 : logic ap_crc32_h;
76 0 : logic ap_crc32_w;
77 0 : logic ap_crc32c_b;
78 0 : logic ap_crc32c_h;
79 0 : logic ap_crc32c_w;
80 :
81 : // ZBF
82 0 : logic ap_bfp;
83 :
84 :
85 : if (pt.BITMANIP_ZBE == 1)
86 : begin
87 : assign ap_bcompress = mul_p.bcompress;
88 : assign ap_bdecompress = mul_p.bdecompress;
89 : end
90 : else
91 : begin
92 : assign ap_bcompress = 1'b0;
93 : assign ap_bdecompress = 1'b0;
94 : end
95 :
96 : if (pt.BITMANIP_ZBC == 1)
97 : begin
98 : assign ap_clmul = mul_p.clmul;
99 : assign ap_clmulh = mul_p.clmulh;
100 : assign ap_clmulr = mul_p.clmulr;
101 : end
102 : else
103 : begin
104 : assign ap_clmul = 1'b0;
105 : assign ap_clmulh = 1'b0;
106 : assign ap_clmulr = 1'b0;
107 : end
108 :
109 : if (pt.BITMANIP_ZBP == 1)
110 : begin
111 : assign ap_grev = mul_p.grev;
112 : assign ap_gorc = mul_p.gorc;
113 : assign ap_shfl = mul_p.shfl;
114 : assign ap_unshfl = mul_p.unshfl;
115 : assign ap_xperm_n = mul_p.xperm_n;
116 : assign ap_xperm_b = mul_p.xperm_b;
117 : assign ap_xperm_h = mul_p.xperm_h;
118 : end
119 : else
120 : begin
121 : assign ap_grev = 1'b0;
122 : assign ap_gorc = 1'b0;
123 : assign ap_shfl = 1'b0;
124 : assign ap_unshfl = 1'b0;
125 : assign ap_xperm_n = 1'b0;
126 : assign ap_xperm_b = 1'b0;
127 : assign ap_xperm_h = 1'b0;
128 : end
129 :
130 : if (pt.BITMANIP_ZBR == 1)
131 : begin
132 : assign ap_crc32_b = mul_p.crc32_b;
133 : assign ap_crc32_h = mul_p.crc32_h;
134 : assign ap_crc32_w = mul_p.crc32_w;
135 : assign ap_crc32c_b = mul_p.crc32c_b;
136 : assign ap_crc32c_h = mul_p.crc32c_h;
137 : assign ap_crc32c_w = mul_p.crc32c_w;
138 : end
139 : else
140 : begin
141 : assign ap_crc32_b = 1'b0;
142 : assign ap_crc32_h = 1'b0;
143 : assign ap_crc32_w = 1'b0;
144 : assign ap_crc32c_b = 1'b0;
145 : assign ap_crc32c_h = 1'b0;
146 : assign ap_crc32c_w = 1'b0;
147 : end
148 :
149 : if (pt.BITMANIP_ZBF == 1)
150 : begin
151 : assign ap_bfp = mul_p.bfp;
152 : end
153 : else
154 : begin
155 : assign ap_bfp = 1'b0;
156 : end
157 :
158 :
159 : // *** End - BitManip ***
160 :
161 :
162 :
163 : assign mul_x_enable = mul_p.valid;
164 : assign bit_x_enable = mul_p.valid;
165 :
166 : assign rs1_ext_in[32] = mul_p.rs1_sign & rs1_in[31];
167 : assign rs2_ext_in[32] = mul_p.rs2_sign & rs2_in[31];
168 :
169 : assign rs1_ext_in[31:0] = rs1_in[31:0];
170 : assign rs2_ext_in[31:0] = rs2_in[31:0];
171 :
172 :
173 :
174 : // --------------------------- Multiply ----------------------------------
175 :
176 :
177 12410 : logic signed [32:0] rs1_x;
178 11050 : logic signed [32:0] rs2_x;
179 :
180 : rvdffe #(34) i_a_x_ff (.*, .clk(clk), .din({mul_p.low,rs1_ext_in[32:0]}), .dout({low_x,rs1_x[32:0]}), .en(mul_x_enable));
181 : rvdffe #(33) i_b_x_ff (.*, .clk(clk), .din( rs2_ext_in[32:0] ), .dout( rs2_x[32:0] ), .en(mul_x_enable));
182 :
183 :
184 : assign prod_x[65:0] = rs1_x * rs2_x;
185 :
186 :
187 :
188 :
189 : // * * * * * * * * * * * * * * * * * * BitManip : BCOMPRESS, BDECOMPRESS * * * * * * * * * * * * *
190 :
191 :
192 : // *** BCOMPRESS == "gather" ***
193 :
194 1362 : logic [31:0] bcompress_d;
195 41861 : logic bcompress_test_bit_d;
196 : integer bcompress_i, bcompress_j;
197 :
198 :
199 318 : always_comb
200 318 : begin
201 :
202 318 : bcompress_j = 0;
203 318 : bcompress_test_bit_d = 1'b0;
204 318 : bcompress_d[31:0] = 32'b0;
205 :
206 318 : for (bcompress_i=0; bcompress_i<32; bcompress_i++)
207 10176 : begin
208 10176 : bcompress_test_bit_d = rs2_in[bcompress_i];
209 438942656 : if (bcompress_test_bit_d)
210 715744 : begin
211 715744 : bcompress_d[bcompress_j] = rs1_in[bcompress_i];
212 715744 : bcompress_j = bcompress_j + 1;
213 : end // IF bcompress_test_bit
214 : end // FOR bcompress_i
215 : end // ALWAYS_COMB
216 :
217 :
218 :
219 : // *** BDECOMPRESS == "scatter" ***
220 :
221 7191 : logic [31:0] bdecompress_d;
222 41861 : logic bdecompress_test_bit_d;
223 : integer bdecompress_i, bdecompress_j;
224 :
225 :
226 318 : always_comb
227 318 : begin
228 :
229 318 : bdecompress_j = 0;
230 318 : bdecompress_test_bit_d = 1'b0;
231 318 : bdecompress_d[31:0] = 32'b0;
232 :
233 318 : for (bdecompress_i=0; bdecompress_i<32; bdecompress_i++)
234 10176 : begin
235 10176 : bdecompress_test_bit_d = rs2_in[bdecompress_i];
236 438942656 : if (bdecompress_test_bit_d)
237 715744 : begin
238 715744 : bdecompress_d[bdecompress_i] = rs1_in[bdecompress_j];
239 715744 : bdecompress_j = bdecompress_j + 1;
240 : end // IF bdecompress_test_bit
241 : end // FOR bdecompress_i
242 : end // ALWAYS_COMB
243 :
244 :
245 :
246 :
247 : // * * * * * * * * * * * * * * * * * * BitManip : CLMUL, CLMULH, CLMULR * * * * * * * * * * * * *
248 :
249 57611 : logic [62:0] clmul_raw_d;
250 :
251 :
252 : assign clmul_raw_d[62:0] = ( {63{rs2_in[00]}} & {31'b0,rs1_in[31:0] } ) ^
253 : ( {63{rs2_in[01]}} & {30'b0,rs1_in[31:0], 1'b0} ) ^
254 : ( {63{rs2_in[02]}} & {29'b0,rs1_in[31:0], 2'b0} ) ^
255 : ( {63{rs2_in[03]}} & {28'b0,rs1_in[31:0], 3'b0} ) ^
256 : ( {63{rs2_in[04]}} & {27'b0,rs1_in[31:0], 4'b0} ) ^
257 : ( {63{rs2_in[05]}} & {26'b0,rs1_in[31:0], 5'b0} ) ^
258 : ( {63{rs2_in[06]}} & {25'b0,rs1_in[31:0], 6'b0} ) ^
259 : ( {63{rs2_in[07]}} & {24'b0,rs1_in[31:0], 7'b0} ) ^
260 : ( {63{rs2_in[08]}} & {23'b0,rs1_in[31:0], 8'b0} ) ^
261 : ( {63{rs2_in[09]}} & {22'b0,rs1_in[31:0], 9'b0} ) ^
262 : ( {63{rs2_in[10]}} & {21'b0,rs1_in[31:0],10'b0} ) ^
263 : ( {63{rs2_in[11]}} & {20'b0,rs1_in[31:0],11'b0} ) ^
264 : ( {63{rs2_in[12]}} & {19'b0,rs1_in[31:0],12'b0} ) ^
265 : ( {63{rs2_in[13]}} & {18'b0,rs1_in[31:0],13'b0} ) ^
266 : ( {63{rs2_in[14]}} & {17'b0,rs1_in[31:0],14'b0} ) ^
267 : ( {63{rs2_in[15]}} & {16'b0,rs1_in[31:0],15'b0} ) ^
268 : ( {63{rs2_in[16]}} & {15'b0,rs1_in[31:0],16'b0} ) ^
269 : ( {63{rs2_in[17]}} & {14'b0,rs1_in[31:0],17'b0} ) ^
270 : ( {63{rs2_in[18]}} & {13'b0,rs1_in[31:0],18'b0} ) ^
271 : ( {63{rs2_in[19]}} & {12'b0,rs1_in[31:0],19'b0} ) ^
272 : ( {63{rs2_in[20]}} & {11'b0,rs1_in[31:0],20'b0} ) ^
273 : ( {63{rs2_in[21]}} & {10'b0,rs1_in[31:0],21'b0} ) ^
274 : ( {63{rs2_in[22]}} & { 9'b0,rs1_in[31:0],22'b0} ) ^
275 : ( {63{rs2_in[23]}} & { 8'b0,rs1_in[31:0],23'b0} ) ^
276 : ( {63{rs2_in[24]}} & { 7'b0,rs1_in[31:0],24'b0} ) ^
277 : ( {63{rs2_in[25]}} & { 6'b0,rs1_in[31:0],25'b0} ) ^
278 : ( {63{rs2_in[26]}} & { 5'b0,rs1_in[31:0],26'b0} ) ^
279 : ( {63{rs2_in[27]}} & { 4'b0,rs1_in[31:0],27'b0} ) ^
280 : ( {63{rs2_in[28]}} & { 3'b0,rs1_in[31:0],28'b0} ) ^
281 : ( {63{rs2_in[29]}} & { 2'b0,rs1_in[31:0],29'b0} ) ^
282 : ( {63{rs2_in[30]}} & { 1'b0,rs1_in[31:0],30'b0} ) ^
283 : ( {63{rs2_in[31]}} & { rs1_in[31:0],31'b0} );
284 :
285 :
286 :
287 :
288 : // * * * * * * * * * * * * * * * * * * BitManip : GREV * * * * * * * * * * * * * * * * * *
289 :
290 : // uint32_t grev32(uint32_t rs1, uint32_t rs2)
291 : // {
292 : // uint32_t x = rs1;
293 : // int shamt = rs2 & 31;
294 : //
295 : // if (shamt & 1) x = ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1);
296 : // if (shamt & 2) x = ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2);
297 : // if (shamt & 4) x = ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4);
298 : // if (shamt & 8) x = ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8);
299 : // if (shamt & 16) x = ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
300 : //
301 : // return x;
302 : // }
303 :
304 :
305 29061 : logic [31:0] grev1_d;
306 26044 : logic [31:0] grev2_d;
307 24671 : logic [31:0] grev4_d;
308 46661 : logic [31:0] grev8_d;
309 35104 : logic [31:0] grev_d;
310 :
311 :
312 : assign grev1_d[31:0] = (rs2_in[0]) ? {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
313 : rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
314 : rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
315 : rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} : rs1_in[31:0];
316 :
317 : assign grev2_d[31:0] = (rs2_in[1]) ? {grev1_d[29:28],grev1_d[31:30],grev1_d[25:24],grev1_d[27:26],
318 : grev1_d[21:20],grev1_d[23:22],grev1_d[17:16],grev1_d[19:18],
319 : grev1_d[13:12],grev1_d[15:14],grev1_d[09:08],grev1_d[11:10],
320 : grev1_d[05:04],grev1_d[07:06],grev1_d[01:00],grev1_d[03:02]} : grev1_d[31:0];
321 :
322 : assign grev4_d[31:0] = (rs2_in[2]) ? {grev2_d[27:24],grev2_d[31:28],grev2_d[19:16],grev2_d[23:20],
323 : grev2_d[11:08],grev2_d[15:12],grev2_d[03:00],grev2_d[07:04]} : grev2_d[31:0];
324 :
325 : assign grev8_d[31:0] = (rs2_in[3]) ? {grev4_d[23:16],grev4_d[31:24],grev4_d[07:00],grev4_d[15:08]} : grev4_d[31:0];
326 :
327 : assign grev_d[31:0] = (rs2_in[4]) ? {grev8_d[15:00],grev8_d[31:16]} : grev8_d[31:0];
328 :
329 :
330 :
331 :
332 : // * * * * * * * * * * * * * * * * * * BitManip : GORC * * * * * * * * * * * * * * * * * *
333 :
334 : // uint32_t gorc32(uint32_t rs1, uint32_t rs2)
335 : // {
336 : // uint32_t x = rs1;
337 : // int shamt = rs2 & 31;
338 : //
339 : // if (shamt & 1) x |= ( (x & 0x55555555) << 1) | ( (x & 0xAAAAAAAA) >> 1);
340 : // if (shamt & 2) x |= ( (x & 0x33333333) << 2) | ( (x & 0xCCCCCCCC) >> 2);
341 : // if (shamt & 4) x |= ( (x & 0x0F0F0F0F) << 4) | ( (x & 0xF0F0F0F0) >> 4);
342 : // if (shamt & 8) x |= ( (x & 0x00FF00FF) << 8) | ( (x & 0xFF00FF00) >> 8);
343 : // if (shamt & 16) x |= ( (x & 0x0000FFFF) << 16) | ( (x & 0xFFFF0000) >> 16);
344 : //
345 : // return x;
346 : // }
347 :
348 :
349 31102 : logic [31:0] gorc1_d;
350 32894 : logic [31:0] gorc2_d;
351 35793 : logic [31:0] gorc4_d;
352 80845 : logic [31:0] gorc8_d;
353 52895 : logic [31:0] gorc_d;
354 :
355 :
356 : assign gorc1_d[31:0] = ( {32{rs2_in[0]}} & {rs1_in[30],rs1_in[31],rs1_in[28],rs1_in[29],rs1_in[26],rs1_in[27],rs1_in[24],rs1_in[25],
357 : rs1_in[22],rs1_in[23],rs1_in[20],rs1_in[21],rs1_in[18],rs1_in[19],rs1_in[16],rs1_in[17],
358 : rs1_in[14],rs1_in[15],rs1_in[12],rs1_in[13],rs1_in[10],rs1_in[11],rs1_in[08],rs1_in[09],
359 : rs1_in[06],rs1_in[07],rs1_in[04],rs1_in[05],rs1_in[02],rs1_in[03],rs1_in[00],rs1_in[01]} ) | rs1_in[31:0];
360 :
361 : assign gorc2_d[31:0] = ( {32{rs2_in[1]}} & {gorc1_d[29:28],gorc1_d[31:30],gorc1_d[25:24],gorc1_d[27:26],
362 : gorc1_d[21:20],gorc1_d[23:22],gorc1_d[17:16],gorc1_d[19:18],
363 : gorc1_d[13:12],gorc1_d[15:14],gorc1_d[09:08],gorc1_d[11:10],
364 : gorc1_d[05:04],gorc1_d[07:06],gorc1_d[01:00],gorc1_d[03:02]} ) | gorc1_d[31:0];
365 :
366 : assign gorc4_d[31:0] = ( {32{rs2_in[2]}} & {gorc2_d[27:24],gorc2_d[31:28],gorc2_d[19:16],gorc2_d[23:20],
367 : gorc2_d[11:08],gorc2_d[15:12],gorc2_d[03:00],gorc2_d[07:04]} ) | gorc2_d[31:0];
368 :
369 : assign gorc8_d[31:0] = ( {32{rs2_in[3]}} & {gorc4_d[23:16],gorc4_d[31:24],gorc4_d[07:00],gorc4_d[15:08]} ) | gorc4_d[31:0];
370 :
371 : assign gorc_d[31:0] = ( {32{rs2_in[4]}} & {gorc8_d[15:00],gorc8_d[31:16]} ) | gorc8_d[31:0];
372 :
373 :
374 :
375 :
376 : // * * * * * * * * * * * * * * * * * * BitManip : SHFL, UNSHLF * * * * * * * * * * * * * * * * * *
377 :
378 : // uint32_t shuffle32_stage (uint32_t src, uint32_t maskL, uint32_t maskR, int N)
379 : // {
380 : // uint32_t x = src & ~(maskL | maskR);
381 : // x |= ((src << N) & maskL) | ((src >> N) & maskR);
382 : // return x;
383 : // }
384 : //
385 : //
386 : //
387 : // uint32_t shfl32(uint32_t rs1, uint32_t rs2)
388 : // {
389 : // uint32_t x = rs1;
390 : // int shamt = rs2 & 15
391 : //
392 : // if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
393 : // if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
394 : // if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
395 : // if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
396 : //
397 : // return x;
398 : // }
399 :
400 :
401 20634 : logic [31:0] shfl8_d;
402 37767 : logic [31:0] shfl4_d;
403 37878 : logic [31:0] shfl2_d;
404 35804 : logic [31:0] shfl_d;
405 :
406 :
407 :
408 : assign shfl8_d[31:0] = (rs2_in[3]) ? {rs1_in[31:24],rs1_in[15:08],rs1_in[23:16],rs1_in[07:00]} : rs1_in[31:0];
409 :
410 : assign shfl4_d[31:0] = (rs2_in[2]) ? {shfl8_d[31:28],shfl8_d[23:20],shfl8_d[27:24],shfl8_d[19:16],
411 : shfl8_d[15:12],shfl8_d[07:04],shfl8_d[11:08],shfl8_d[03:00]} : shfl8_d[31:0];
412 :
413 : assign shfl2_d[31:0] = (rs2_in[1]) ? {shfl4_d[31:30],shfl4_d[27:26],shfl4_d[29:28],shfl4_d[25:24],
414 : shfl4_d[23:22],shfl4_d[19:18],shfl4_d[21:20],shfl4_d[17:16],
415 : shfl4_d[15:14],shfl4_d[11:10],shfl4_d[13:12],shfl4_d[09:08],
416 : shfl4_d[07:06],shfl4_d[03:02],shfl4_d[05:04],shfl4_d[01:00]} : shfl4_d[31:0];
417 :
418 : assign shfl_d[31:0] = (rs2_in[0]) ? {shfl2_d[31],shfl2_d[29],shfl2_d[30],shfl2_d[28],shfl2_d[27],shfl2_d[25],shfl2_d[26],shfl2_d[24],
419 : shfl2_d[23],shfl2_d[21],shfl2_d[22],shfl2_d[20],shfl2_d[19],shfl2_d[17],shfl2_d[18],shfl2_d[16],
420 : shfl2_d[15],shfl2_d[13],shfl2_d[14],shfl2_d[12],shfl2_d[11],shfl2_d[09],shfl2_d[10],shfl2_d[08],
421 : shfl2_d[07],shfl2_d[05],shfl2_d[06],shfl2_d[04],shfl2_d[03],shfl2_d[01],shfl2_d[02],shfl2_d[00]} : shfl2_d[31:0];
422 :
423 :
424 :
425 :
426 : // uint32_t unshfl32(uint32_t rs1, uint32_t rs2)
427 : // {
428 : // uint32_t x = rs1;
429 : // int shamt = rs2 & 15
430 : //
431 : // if (shamt & 1) x = shuffle32_stage(x, 0x44444444, 0x22222222, 1);
432 : // if (shamt & 2) x = shuffle32_stage(x, 0x30303030, 0xc0c0c0c0, 2);
433 : // if (shamt & 4) x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
434 : // if (shamt & 8) x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
435 : //
436 : // return x;
437 : // }
438 :
439 :
440 20262 : logic [31:0] unshfl1_d;
441 20450 : logic [31:0] unshfl2_d;
442 39731 : logic [31:0] unshfl4_d;
443 32020 : logic [31:0] unshfl_d;
444 :
445 :
446 : assign unshfl1_d[31:0] = (rs2_in[0]) ? {rs1_in[31],rs1_in[29],rs1_in[30],rs1_in[28],rs1_in[27],rs1_in[25],rs1_in[26],rs1_in[24],
447 : rs1_in[23],rs1_in[21],rs1_in[22],rs1_in[20],rs1_in[19],rs1_in[17],rs1_in[18],rs1_in[16],
448 : rs1_in[15],rs1_in[13],rs1_in[14],rs1_in[12],rs1_in[11],rs1_in[09],rs1_in[10],rs1_in[08],
449 : rs1_in[07],rs1_in[05],rs1_in[06],rs1_in[04],rs1_in[03],rs1_in[01],rs1_in[02],rs1_in[00]} : rs1_in[31:0];
450 :
451 : assign unshfl2_d[31:0] = (rs2_in[1]) ? {unshfl1_d[31:30],unshfl1_d[27:26],unshfl1_d[29:28],unshfl1_d[25:24],
452 : unshfl1_d[23:22],unshfl1_d[19:18],unshfl1_d[21:20],unshfl1_d[17:16],
453 : unshfl1_d[15:14],unshfl1_d[11:10],unshfl1_d[13:12],unshfl1_d[09:08],
454 : unshfl1_d[07:06],unshfl1_d[03:02],unshfl1_d[05:04],unshfl1_d[01:00]} : unshfl1_d[31:0];
455 :
456 : assign unshfl4_d[31:0] = (rs2_in[2]) ? {unshfl2_d[31:28],unshfl2_d[23:20],unshfl2_d[27:24],unshfl2_d[19:16],
457 : unshfl2_d[15:12],unshfl2_d[07:04],unshfl2_d[11:08],unshfl2_d[03:00]} : unshfl2_d[31:0];
458 :
459 : assign unshfl_d[31:0] = (rs2_in[3]) ? {unshfl4_d[31:24],unshfl4_d[15:08],unshfl4_d[23:16],unshfl4_d[07:00]} : unshfl4_d[31:0];
460 :
461 :
462 :
463 :
464 : // * * * * * * * * * * * * * * * * * * BitManip : XPERM * * * * * * * * * * * * * * * * *
465 :
466 : //
467 : // These instructions operate on nibbles/bytes/half-words/words.
468 : // rs1 is a vector of data words and rs2 is a vector of indices into rs1.
469 : // The result of the instruction is the vector rs2 with each element replaced by the corresponding data word from rs1,
470 : // or zero then the index in rs2 is out of bounds.
471 : //
472 : // uint_xlen_t xperm(uint_xlen_t rs1, uint_xlen_t rs2, int sz_log2)
473 : // {
474 : // uint_xlen_t r = 0;
475 : // uint_xlen_t sz = 1LL << sz_log2;
476 : // uint_xlen_t mask = (1LL << sz) - 1;
477 : // for (int i = 0; i < XLEN; i += sz)
478 : // { uint_xlen_t pos = ((rs2 >> i) & mask) << sz_log2;
479 : // if (pos < XLEN)
480 : // r |= ((rs1 >> pos) & mask) << i;
481 : // }
482 : // return r;
483 : // }
484 : //
485 : // uint_xlen_t xperm_n (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 2); }
486 : // uint_xlen_t xperm_b (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 3); }
487 : // uint_xlen_t xperm_h (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 4); }
488 : // uint_xlen_t xperm_w (uint_xlen_t rs1, uint_xlen_t rs2) { return xperm(rs1, rs2, 5); } Not part of RV32
489 : //
490 : // The xperm.[nbhw] instructions can be implemented with an XLEN/4-lane nibble-wide crossbarswitch.
491 :
492 : // *** XPERM_B ***
493 :
494 : // XLEN = 32
495 : // SZ_LOG2 = 3
496 : // SZ = 4'd8;
497 : // MASK = ( 1 << 8 ) - 1
498 : // = 8'hFF
499 :
500 : // integer xperm_b_i;
501 : // logic [31:0] xperm_b_r;
502 : // logic [3:0] xperm_b_sz;
503 : // logic [7:0] xperm_b_mask;
504 : // logic [31:0] xperm_b_pos;
505 : //
506 : //
507 : // assign xperm_b_sz[3:0] = 4'd8;
508 : // assign xperm_b_mask[7:0] = 8'hff;
509 : //
510 : // always_comb
511 : // begin
512 : // xperm_b_r[31:0] = 32'b0;
513 : //
514 : // for (xperm_b_i=0; xperm_b_i<32; xperm_b_i = xperm_b_i + xperm_b_sz) // This code did not work...
515 : // begin
516 : // xperm_b_pos[31:0] = ( (rs2_in[31:0] >> xperm_b_i) & {24'h0,xperm_b_mask[7:0]} ) << 3;
517 : // if (xperm_b_pos[31:0] < 32'd32)
518 : // xperm_b_r[31:0] = xperm_b_r[31:0] | ( ((rs1_in[31:0] >> xperm_b_pos[4:0]) & {24'h0,xperm_b_mask[7:0]}) << xperm_b_i );
519 : // end
520 : // end
521 :
522 20236 : logic [31:0] xperm_n;
523 59290 : logic [31:0] xperm_b;
524 7230 : logic [31:0] xperm_h;
525 :
526 : assign xperm_n[03:00] = { 4{ ~rs2_in[03] }} & 4'( (rs1_in[31:0] >> {rs2_in[02:00],2'b0}) & 4'hf ); // This is a 8:1 mux with qualified selects
527 : assign xperm_n[07:04] = { 4{ ~rs2_in[07] }} & 4'( (rs1_in[31:0] >> {rs2_in[06:04],2'b0}) & 4'hf );
528 : assign xperm_n[11:08] = { 4{ ~rs2_in[11] }} & 4'( (rs1_in[31:0] >> {rs2_in[10:08],2'b0}) & 4'hf );
529 : assign xperm_n[15:12] = { 4{ ~rs2_in[15] }} & 4'( (rs1_in[31:0] >> {rs2_in[14:12],2'b0}) & 4'hf );
530 : assign xperm_n[19:16] = { 4{ ~rs2_in[19] }} & 4'( (rs1_in[31:0] >> {rs2_in[18:16],2'b0}) & 4'hf );
531 : assign xperm_n[23:20] = { 4{ ~rs2_in[23] }} & 4'( (rs1_in[31:0] >> {rs2_in[22:20],2'b0}) & 4'hf );
532 : assign xperm_n[27:24] = { 4{ ~rs2_in[27] }} & 4'( (rs1_in[31:0] >> {rs2_in[26:24],2'b0}) & 4'hf );
533 : assign xperm_n[31:28] = { 4{ ~rs2_in[31] }} & 4'( (rs1_in[31:0] >> {rs2_in[30:28],2'b0}) & 4'hf );
534 :
535 : assign xperm_b[07:00] = { 8{ ~(| rs2_in[07:02]) }} & 8'( (rs1_in[31:0] >> {rs2_in[01:00],3'b0}) & 8'hff ); // This is a 4:1 mux with qualified selects
536 : assign xperm_b[15:08] = { 8{ ~(| rs2_in[15:10]) }} & 8'( (rs1_in[31:0] >> {rs2_in[09:08],3'b0}) & 8'hff );
537 : assign xperm_b[23:16] = { 8{ ~(| rs2_in[23:18]) }} & 8'( (rs1_in[31:0] >> {rs2_in[17:16],3'b0}) & 8'hff );
538 : assign xperm_b[31:24] = { 8{ ~(| rs2_in[31:26]) }} & 8'( (rs1_in[31:0] >> {rs2_in[25:24],3'b0}) & 8'hff );
539 :
540 : assign xperm_h[15:00] = {16{ ~(| rs2_in[15:01]) }} & 16'( (rs1_in[31:0] >> {rs2_in[00] ,4'b0}) & 16'hffff ); // This is a 2:1 mux with qualified selects
541 : assign xperm_h[31:16] = {16{ ~(| rs2_in[31:17]) }} & 16'( (rs1_in[31:0] >> {rs2_in[16] ,4'b0}) & 16'hffff );
542 :
543 :
544 :
545 :
546 : // * * * * * * * * * * * * * * * * * * BitManip : CRC32, CRC32c * * * * * * * * * * * * * * * * *
547 :
548 : // *** computed from https: //crccalc.com ***
549 : //
550 : // "a" is 8'h61 = 8'b0110_0001 (8'h61 ^ 8'hff = 8'h9e)
551 : //
552 : // Input must first be XORed with 32'hffff_ffff
553 : //
554 : //
555 : // CRC32
556 : //
557 : // Input Output Input Output
558 : // ----- -------- -------- --------
559 : // "a" e8b7be43 ffffff9e 174841bc
560 : // "aa" 078a19d7 ffff9e9e f875e628
561 : // "aaaa" ad98e545 9e9e9e9e 5267a1ba
562 : //
563 : //
564 : //
565 : // CRC32c
566 : //
567 : // Input Output Input Output
568 : // ----- -------- -------- --------
569 : // "a" c1d04330 ffffff9e 3e2fbccf
570 : // "aa" f1f2dac2 ffff9e9e 0e0d253d
571 : // "aaaa" 6a52eeb0 9e9e9e9e 95ad114f
572 :
573 :
574 0 : logic crc32_all;
575 318 : logic [31:0] crc32_poly_rev;
576 318 : logic [31:0] crc32c_poly_rev;
577 : integer crc32_bi, crc32_hi, crc32_wi, crc32c_bi, crc32c_hi, crc32c_wi;
578 47786 : logic [31:0] crc32_bd, crc32_hd, crc32_wd, crc32c_bd, crc32c_hd, crc32c_wd;
579 :
580 :
581 : assign crc32_all = ap_crc32_b | ap_crc32_h | ap_crc32_w | ap_crc32c_b | ap_crc32c_h | ap_crc32c_w;
582 :
583 : assign crc32_poly_rev[31:0] = 32'hEDB88320; // bit reverse of 32'h04C11DB7
584 : assign crc32c_poly_rev[31:0] = 32'h82F63B78; // bit reverse of 32'h1EDC6F41
585 :
586 :
587 318 : always_comb
588 318 : begin
589 318 : crc32_bd[31:0] = rs1_in[31:0];
590 :
591 318 : for (crc32_bi=0; crc32_bi<8; crc32_bi++)
592 2544 : begin
593 2544 : crc32_bd[31:0] = (crc32_bd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_bd[0]}});
594 : end // FOR crc32_bi
595 : end // ALWAYS_COMB
596 :
597 :
598 318 : always_comb
599 318 : begin
600 318 : crc32_hd[31:0] = rs1_in[31:0];
601 :
602 318 : for (crc32_hi=0; crc32_hi<16; crc32_hi++)
603 5088 : begin
604 5088 : crc32_hd[31:0] = (crc32_hd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_hd[0]}});
605 : end // FOR crc32_hi
606 : end // ALWAYS_COMB
607 :
608 :
609 318 : always_comb
610 318 : begin
611 318 : crc32_wd[31:0] = rs1_in[31:0];
612 :
613 318 : for (crc32_wi=0; crc32_wi<32; crc32_wi++)
614 10176 : begin
615 10176 : crc32_wd[31:0] = (crc32_wd[31:0] >> 1) ^ (crc32_poly_rev[31:0] & {32{crc32_wd[0]}});
616 : end // FOR crc32_wi
617 : end // ALWAYS_COMB
618 :
619 :
620 :
621 :
622 318 : always_comb
623 318 : begin
624 318 : crc32c_bd[31:0] = rs1_in[31:0];
625 :
626 318 : for (crc32c_bi=0; crc32c_bi<8; crc32c_bi++)
627 2544 : begin
628 2544 : crc32c_bd[31:0] = (crc32c_bd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_bd[0]}});
629 : end // FOR crc32c_bi
630 : end // ALWAYS_COMB
631 :
632 :
633 318 : always_comb
634 318 : begin
635 318 : crc32c_hd[31:0] = rs1_in[31:0];
636 :
637 318 : for (crc32c_hi=0; crc32c_hi<16; crc32c_hi++)
638 5088 : begin
639 5088 : crc32c_hd[31:0] = (crc32c_hd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_hd[0]}});
640 : end // FOR crc32c_hi
641 : end // ALWAYS_COMB
642 :
643 :
644 318 : always_comb
645 318 : begin
646 318 : crc32c_wd[31:0] = rs1_in[31:0];
647 :
648 318 : for (crc32c_wi=0; crc32c_wi<32; crc32c_wi++)
649 10176 : begin
650 10176 : crc32c_wd[31:0] = (crc32c_wd[31:0] >> 1) ^ (crc32c_poly_rev[31:0] & {32{crc32c_wd[0]}});
651 : end // FOR crc32c_wi
652 : end // ALWAYS_COMB
653 :
654 :
655 :
656 :
657 :
658 : // * * * * * * * * * * * * * * * * * * BitManip : BFP * * * * * * * * * * * * * * * * * *
659 :
660 :
661 : // uint_xlen_t bfp(uint_xlen_t rs1, uint_xlen_t rs2)
662 : // {
663 : // uint_xlen_t cfg = rs2 >> (XLEN/2);
664 : // if ((cfg >> 30) == 2) cfg = cfg >> 16;
665 : // int len = (cfg >> 8) & (XLEN/2-1);
666 : // int off = cfg & (XLEN-1);
667 : // len = len ? len : XLEN/2;
668 : // uint_xlen_t mask = slo(0, len) << off;
669 : // uint_xlen_t data = rs2 << off;
670 : // return (data & mask) | (rs1 & ~mask);
671 :
672 :
673 27341 : logic [4:0] bfp_len;
674 27751 : logic [4:0] bfp_off;
675 276 : logic [31:0] bfp_len_mask_;
676 318 : logic [31:0] bfp_off_mask_;
677 25762 : logic [15:0] bfp_preshift_data;
678 4976 : logic [31:0] bfp_shift_data;
679 10408 : logic [31:0] bfp_shift_mask;
680 19594 : logic [31:0] bfp_result_d;
681 :
682 :
683 : assign bfp_len[3:0] = rs2_in[27:24];
684 : assign bfp_len[4] = (bfp_len[3:0] == 4'b0); // If LEN field is zero, then LEN=16
685 : assign bfp_off[4:0] = rs2_in[20:16];
686 :
687 : assign bfp_len_mask_[31:0] = 32'hffff_ffff << bfp_len[4:0];
688 : assign bfp_off_mask_[31:0] = 32'hffff_ffff << bfp_off[4:0];
689 : assign bfp_preshift_data[15:0]= rs2_in[15:0] & ~bfp_len_mask_[15:0];
690 :
691 : assign bfp_shift_data[31:0] = {16'b0,bfp_preshift_data[15:0]} << bfp_off[4:0];
692 : assign bfp_shift_mask[31:0] = (bfp_len_mask_[31:0] << bfp_off[4:0]) | ~bfp_off_mask_[31:0];
693 :
694 : assign bfp_result_d[31:0] = bfp_shift_data[31:0] | (rs1_in[31:0] & bfp_shift_mask[31:0]);
695 :
696 :
697 :
698 :
699 : // * * * * * * * * * * * * * * * * * * BitManip : Common logic * * * * * * * * * * * * * * * * * *
700 :
701 :
702 : assign bitmanip_sel_d = ap_bcompress | ap_bdecompress | ap_clmul | ap_clmulh | ap_clmulr | ap_grev | ap_gorc | ap_shfl | ap_unshfl | crc32_all | ap_bfp | ap_xperm_n | ap_xperm_b | ap_xperm_h;
703 :
704 : assign bitmanip_d[31:0] = ( {32{ap_bcompress}} & bcompress_d[31:0] ) |
705 : ( {32{ap_bdecompress}} & bdecompress_d[31:0] ) |
706 : ( {32{ap_clmul}} & clmul_raw_d[31:0] ) |
707 : ( {32{ap_clmulh}} & {1'b0,clmul_raw_d[62:32]} ) |
708 : ( {32{ap_clmulr}} & clmul_raw_d[62:31] ) |
709 : ( {32{ap_grev}} & grev_d[31:0] ) |
710 : ( {32{ap_gorc}} & gorc_d[31:0] ) |
711 : ( {32{ap_shfl}} & shfl_d[31:0] ) |
712 : ( {32{ap_unshfl}} & unshfl_d[31:0] ) |
713 : ( {32{ap_crc32_b}} & crc32_bd[31:0] ) |
714 : ( {32{ap_crc32_h}} & crc32_hd[31:0] ) |
715 : ( {32{ap_crc32_w}} & crc32_wd[31:0] ) |
716 : ( {32{ap_crc32c_b}} & crc32c_bd[31:0] ) |
717 : ( {32{ap_crc32c_h}} & crc32c_hd[31:0] ) |
718 : ( {32{ap_crc32c_w}} & crc32c_wd[31:0] ) |
719 : ( {32{ap_bfp}} & bfp_result_d[31:0] ) |
720 : ( {32{ap_xperm_n}} & xperm_n[31:0] ) |
721 : ( {32{ap_xperm_b}} & xperm_b[31:0] ) |
722 : ( {32{ap_xperm_h}} & xperm_h[31:0] );
723 :
724 :
725 :
726 : rvdffe #(33) i_bitmanip_ff (.*, .clk(clk), .din({bitmanip_sel_d,bitmanip_d[31:0]}), .dout({bitmanip_sel_x,bitmanip_x[31:0]}), .en(bit_x_enable));
727 :
728 :
729 :
730 :
731 : assign result_x[31:0] = ( {32{~bitmanip_sel_x & ~low_x}} & prod_x[63:32] ) |
732 : ( {32{~bitmanip_sel_x & low_x}} & prod_x[31:0] ) |
733 : bitmanip_x[31:0];
734 :
735 :
736 :
737 : endmodule // el2_exu_mul_ctl
|